Skip to content

BaseTarget

A collection of BBOT events that represent a scan target.

Uses a RadixTarget internally for fast scope lookups, and layers on BBOT-specific parsing (events, URLs, emails, host:port) and hashing.

This class is inherited by all three components of the BBOT target
  • Target
  • Blacklist
  • Seeds
Source code in bbot/scanner/target.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
class BaseTarget:
    """
    A collection of BBOT events that represent a scan target.

    Uses a RadixTarget internally for fast scope lookups, and layers on
    BBOT-specific parsing (events, URLs, emails, host:port) and hashing.

    This class is inherited by all three components of the BBOT target:
        - Target
        - Blacklist
        - Seeds
    """

    accept_target_types = ["TARGET"]

    def __init__(self, *targets, strict_scope=False, acl_mode=False):
        # strip comments and ignore blank targets
        targets = [stripped for t in targets if (stripped := (strip_comments(t).strip() if isinstance(t, str) else t))]
        self.strict_scope = strict_scope
        self._rt = RadixTarget(strict_scope=strict_scope, acl_mode=acl_mode)
        self.event_seeds = set()
        if targets:
            self.add(list(targets))

    @property
    def inputs(self):
        return set(e.input for e in self.event_seeds)

    @property
    def hosts(self):
        return set(self._rt.hosts)

    @property
    def hash(self):
        h = self._rt.hash
        if self.strict_scope:
            h ^= 1
        return h.to_bytes(8, "big", signed=True)

    def get(self, event, **kwargs):
        """Look up a host in the radix tree.

        Accepts events, URLs, emails, host:port strings, IPs, CIDRs, and hostnames.
        Returns the stored data for the matching host, or None.
        """
        raise_error = kwargs.get("raise_error", False)
        host_str = _host_str(event)
        if host_str is None:
            if raise_error:
                raise KeyError(f"Host not found: '{event}'")
            return None
        return self._rt.get(host_str)

    def add(self, targets, data=None):
        if not isinstance(targets, (list, set, tuple)):
            targets = [targets]
        event_seeds = set()
        for target in targets:
            # accept pre-parsed EventSeed objects to avoid expensive re-parsing
            if isinstance(target, BaseEventSeed):
                event_seed = target
            else:
                event_seed = EventSeed(target)
            if not event_seed._target_type in self.accept_target_types:
                log.warning(f"Invalid target type for {self.__class__.__name__}: {event_seed.type}")
                continue
            event_seeds.add(event_seed)

        # sort by host size to ensure consistency
        event_seeds = sorted(event_seeds, key=lambda e: (0, 0) if not e.host else host_size_key(str(e.host)))
        for event_seed in event_seeds:
            self.event_seeds.add(event_seed)
            # Some event seeds (e.g. ORG_STUB, USERNAME, BLACKLIST_REGEX) are not host-based and have
            # host == None. These are still useful as parsed target entries, but cannot always be
            # represented in the underlying RadixTarget tree, which expects a concrete host.
            # Subclasses like ScanBlacklist may still need to see these entries (for regex handling,
            # etc.), so we always call self._add() and let the subclass decide whether to forward to
            # the radix layer.
            self._add(event_seed.host, data=(event_seed if data is None else data))

    def _add(self, host, data):
        """Insert a host into the radix tree.

        The radix tree cannot handle host == None, but some subclasses (e.g. ScanBlacklist)
        need to receive non-host-based entries such as BLACKLIST_REGEX. BaseTarget.add()
        always calls self._add(); this default implementation safely ignores hostless
        entries while still delegating normal hosts to the radix tree.
        """
        if host is None:
            return
        self._rt.insert(str(host), data=data)

    # RFC 2317 classless reverse delegation names contain "/" in their labels
    # (e.g. "207.128/25.38.186.64.in-addr.arpa").  These are valid DNS wire
    # names but fail hostname validation.  Hickory-DNS may also backslash-
    # escape the slash in presentation format ("128\/25").
    _rfc2317_re = re.compile(r"[/\\].*\.in-addr\.arpa$|[/\\].*\.ip6\.arpa$", re.IGNORECASE)

    def _make_event_seed(self, target, raise_error=False):
        try:
            return EventSeed(target)
        except ValidationError:
            import traceback

            msg = f"Invalid target: '{target}'"
            if raise_error:
                raise KeyError(msg)
            elif self._rfc2317_re.search(str(target)):
                log.verbose(f"Skipping RFC 2317 classless delegation name: '{target}'")
            else:
                log.warning(msg)
                log.trace("".join(traceback.format_stack()))

    def __contains__(self, other):
        if isinstance(other, BaseTarget):
            for h in other.hosts:
                if self.get(str(h)) is None:
                    return False
            return True
        try:
            return self.get(other) is not None
        except (ValueError, TypeError):
            return False

    def __iter__(self):
        yield from self.event_seeds

    def __len__(self):
        return len(self._rt)

    def __bool__(self):
        return bool(len(self._rt)) or bool(self.event_seeds)

    def __getstate__(self):
        return {
            "event_seeds": self.event_seeds,
            "strict_scope": self.strict_scope,
            "acl_mode": self._rt._acl_mode,
        }

    def __setstate__(self, state):
        self.strict_scope = state["strict_scope"]
        self._rt = RadixTarget(strict_scope=state["strict_scope"], acl_mode=state["acl_mode"])
        self.event_seeds = set()
        for event_seed in state["event_seeds"]:
            self.event_seeds.add(event_seed)
            self._add(event_seed.host, data=event_seed)

    def __eq__(self, other):
        return self.hash == getattr(other, "hash", None)

    def __hash__(self):
        return hash(self.hash)

get

get(event, **kwargs)

Look up a host in the radix tree.

Accepts events, URLs, emails, host:port strings, IPs, CIDRs, and hostnames. Returns the stored data for the matching host, or None.

Source code in bbot/scanner/target.py
87
88
89
90
91
92
93
94
95
96
97
98
99
def get(self, event, **kwargs):
    """Look up a host in the radix tree.

    Accepts events, URLs, emails, host:port strings, IPs, CIDRs, and hostnames.
    Returns the stored data for the matching host, or None.
    """
    raise_error = kwargs.get("raise_error", False)
    host_str = _host_str(event)
    if host_str is None:
        if raise_error:
            raise KeyError(f"Host not found: '{event}'")
        return None
    return self._rt.get(host_str)

ScanSeeds

Bases: BaseTarget

Initial events used to seed a scan.

These are the seeds specified by the user, e.g. via -s on the CLI. If no seeds were specified, the targets (-t) are copied here.

Source code in bbot/scanner/target.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
class ScanSeeds(BaseTarget):
    """
    Initial events used to seed a scan.

    These are the seeds specified by the user, e.g. via `-s` on the CLI.
    If no seeds were specified, the targets (`-t`) are copied here.
    """

    def get(self, event, single=True, **kwargs):
        results = super().get(event, **kwargs)
        if results and single:
            return next(iter(results))
        return results

    def _add(self, host, data):
        """
        Overrides the base method to enable having multiple events for the same host.

        The "data" attribute of the node is now a set of events.

        This is useful for seeds, because it lets us have both evilcorp.com:80 and https://evilcorp.com
            as separate events even though they have the same host.
        """
        if host:
            existing = self.get(str(host), raise_error=False, single=False)
            if existing is not None:
                existing.add(data)
                event_set = existing
            else:
                event_set = {data}
            super()._add(host, data=event_set)

    @property
    def hash(self):
        """Seeds get hashed by event data, not by hosts."""
        h = _fnv1a_64(sorted(str(e.data) for e in self.event_seeds))
        return h.to_bytes(8, "big")

hash property

hash

Seeds get hashed by event data, not by hosts.

ScanTarget

Bases: ACLTarget

A collection of BBOT events that represent a scan's targets.

Source code in bbot/scanner/target.py
253
254
255
256
257
258
class ScanTarget(ACLTarget):
    """
    A collection of BBOT events that represent a scan's targets.
    """

    pass

ScanBlacklist

Bases: ACLTarget

A collection of BBOT events that represent a scan's blacklist.

Source code in bbot/scanner/target.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
class ScanBlacklist(ACLTarget):
    """
    A collection of BBOT events that represent a scan's blacklist.
    """

    accept_target_types = ["TARGET", "BLACKLIST"]

    def __init__(self, *args, **kwargs):
        self.blacklist_regexes = set()
        super().__init__(*args, **kwargs)

    def __setstate__(self, state):
        self.blacklist_regexes = set()
        super().__setstate__(state)

    def get(self, host, **kwargs):
        """
        Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.
        """
        if not (is_ip_type(host) or isinstance(host, str)):
            raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(host)}")
        raise_error = kwargs.get("raise_error", False)
        # first, check event's host against blacklist
        try:
            event_seed = self._make_event_seed(host, raise_error=raise_error)
            if event_seed is not None:
                host = event_seed.host
                to_match = event_seed.data
            else:
                to_match = str(host)
        except ValidationError:
            to_match = str(host)
        event_result = super().get(host)
        if event_result is not None:
            return event_result
        # next, check event's host against regexes
        for regex in self.blacklist_regexes:
            if regex.search(to_match):
                return host
        if raise_error:
            raise KeyError(f"Host not found: '{host}'")
        return None

    def _add(self, host, data):
        if getattr(data, "type", "") == "BLACKLIST_REGEX":
            self.blacklist_regexes.add(re.compile(data.data))
        if host is not None:
            super()._add(host, data)

    @property
    def hash(self):
        """Blacklist hash includes both hosts and regex patterns."""
        h = (self._rt.hash ^ _fnv1a_64(sorted(r.pattern for r in self.blacklist_regexes))) & 0xFFFFFFFFFFFFFFFF
        return h.to_bytes(8, "big")

    def __len__(self):
        return len(self._rt) + len(self.blacklist_regexes)

    def __bool__(self):
        return bool(len(self))

hash property

hash

Blacklist hash includes both hosts and regex patterns.

get

get(host, **kwargs)

Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.

Source code in bbot/scanner/target.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def get(self, host, **kwargs):
    """
    Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.
    """
    if not (is_ip_type(host) or isinstance(host, str)):
        raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(host)}")
    raise_error = kwargs.get("raise_error", False)
    # first, check event's host against blacklist
    try:
        event_seed = self._make_event_seed(host, raise_error=raise_error)
        if event_seed is not None:
            host = event_seed.host
            to_match = event_seed.data
        else:
            to_match = str(host)
    except ValidationError:
        to_match = str(host)
    event_result = super().get(host)
    if event_result is not None:
        return event_result
    # next, check event's host against regexes
    for regex in self.blacklist_regexes:
        if regex.search(to_match):
            return host
    if raise_error:
        raise KeyError(f"Host not found: '{host}'")
    return None

BBOTTarget

A convenient abstraction of a scan target that contains three subtargets
  • seeds
  • target
  • blacklist

Provides high-level functions like in_scope(), which includes both target and blacklist checks.

Source code in bbot/scanner/target.py
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
class BBOTTarget:
    """
    A convenient abstraction of a scan target that contains three subtargets:
        - seeds
        - target
        - blacklist

    Provides high-level functions like in_scope(), which includes both target and blacklist checks.
    """

    def __init__(self, seeds=None, target=None, blacklist=None, strict_scope=False):
        self.strict_scope = strict_scope
        self._orig_seeds = seeds

        target_list = list(target) if target else []
        self.target = ScanTarget(*target_list, strict_scope=strict_scope)

        # Seeds are only copied from target if target is defined but seeds are NOT defined
        # Pass pre-parsed event_seeds to avoid expensive re-parsing of every target string
        if seeds is None:
            seeds = list(self.target.event_seeds)
        self.seeds = ScanSeeds(*list(seeds), strict_scope=strict_scope)

        blacklist_list = list(blacklist) if blacklist else []
        self.blacklist = ScanBlacklist(*blacklist_list)

    @property
    def json(self):
        j = {
            "target": sorted(self.target.inputs),
            "blacklist": sorted(self.blacklist.inputs),
            "strict_scope": self.strict_scope,
            "hash": self.hash.hex(),
            "seed_hash": self.seeds.hash.hex(),
            "target_hash": self.target.hash.hex(),
            "blacklist_hash": self.blacklist.hash.hex(),
            "scope_hash": self.scope_hash.hex(),
        }
        if self._orig_seeds is not None:
            j["seeds"] = sorted(self.seeds.inputs)
        return j

    @property
    def hash(self):
        return b"".join(t.hash for t in (self.seeds, self.target, self.blacklist))

    @property
    def scope_hash(self):
        return b"".join(t.hash for t in (self.target, self.blacklist))

    def in_scope(self, host):
        """
        Check whether a hostname, url, IP, etc. is in scope.
        Accepts either events or string data.

        This method checks both target AND blacklist.
        A host is in-scope if it is in the target AND not blacklisted.

        Note: This is different from `in_target()` which only checks the target.
        - `in_target()`: checks if host is in the target
        - `in_scope()`: checks if host is in the target AND not blacklisted

        Examples:
            Check if a URL is in scope:
            >>> preset.in_scope("http://www.evilcorp.com")
            True
        """
        if self.blacklisted(host):
            return False
        return self.in_target(host)

    def blacklisted(self, host):
        """
        Check whether a hostname, url, IP, etc. is blacklisted.

        Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

        Args:
            host (str or IPAddress or Event): The host to check against the blacklist

        Examples:
            Check if a URL's host is blacklisted:
            >>> preset.blacklisted("http://www.evilcorp.com")
            True
        """
        return self.blacklist.get(host) is not None

    def in_target(self, host):
        """
        Check whether a hostname, url, IP, etc. is in the target.

        This method ONLY checks the target, NOT the blacklist.
        Use `in_scope()` to check both target AND blacklist.

        Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

        Args:
            host (str or IPAddress or Event): The host to check against the target

        Examples:
            Check if a URL's host is in target:
            >>> preset.in_target("http://www.evilcorp.com")
            True
        """
        return self.target.get(host) is not None

    def __eq__(self, other):
        return self.hash == other.hash

    async def generate_children(self, ssl_verify=False):
        """
        Generate children for the target, for seed types that expand into other seed types.
        E.g. ASN targets are expanded into their constituent IP ranges.
        """
        # If the user explicitly set a narrower target scope than their seeds
        # (e.g. `-t evilcorp.com -s AS1234`), don't widen the target with expanded seeds
        has_explicit_scope = set(self.target.inputs) != set(self.seeds.inputs)

        # Expand seeds first
        for event_seed in list(self.seeds.event_seeds):
            children = await event_seed._generate_children(ssl_verify=ssl_verify)
            for child in children:
                self.seeds.add(child)

        # Also expand blacklist event seeds (like ASN targets)
        for event_seed in list(self.blacklist.event_seeds):
            children = await event_seed._generate_children(ssl_verify=ssl_verify)
            for child in children:
                self.blacklist.add(child)

        # Widen target scope to include expanded seed hosts (e.g. IP ranges from ASN),
        # but only when seeds and target were originally the same
        if not has_explicit_scope:
            expanded_seed_hosts = set(self.seeds.hosts)
            for host in expanded_seed_hosts:
                if host not in self.target:
                    self.target.add(host)

blacklisted

blacklisted(host)

Check whether a hostname, url, IP, etc. is blacklisted.

Note that host can be a hostname, IP address, CIDR, email address, or any BBOT Event with the host attribute.

Parameters:

  • host (str or IPAddress or Event) –

    The host to check against the blacklist

Examples:

Check if a URL's host is blacklisted:

>>> preset.blacklisted("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
def blacklisted(self, host):
    """
    Check whether a hostname, url, IP, etc. is blacklisted.

    Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

    Args:
        host (str or IPAddress or Event): The host to check against the blacklist

    Examples:
        Check if a URL's host is blacklisted:
        >>> preset.blacklisted("http://www.evilcorp.com")
        True
    """
    return self.blacklist.get(host) is not None

generate_children async

generate_children(ssl_verify=False)

Generate children for the target, for seed types that expand into other seed types. E.g. ASN targets are expanded into their constituent IP ranges.

Source code in bbot/scanner/target.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
async def generate_children(self, ssl_verify=False):
    """
    Generate children for the target, for seed types that expand into other seed types.
    E.g. ASN targets are expanded into their constituent IP ranges.
    """
    # If the user explicitly set a narrower target scope than their seeds
    # (e.g. `-t evilcorp.com -s AS1234`), don't widen the target with expanded seeds
    has_explicit_scope = set(self.target.inputs) != set(self.seeds.inputs)

    # Expand seeds first
    for event_seed in list(self.seeds.event_seeds):
        children = await event_seed._generate_children(ssl_verify=ssl_verify)
        for child in children:
            self.seeds.add(child)

    # Also expand blacklist event seeds (like ASN targets)
    for event_seed in list(self.blacklist.event_seeds):
        children = await event_seed._generate_children(ssl_verify=ssl_verify)
        for child in children:
            self.blacklist.add(child)

    # Widen target scope to include expanded seed hosts (e.g. IP ranges from ASN),
    # but only when seeds and target were originally the same
    if not has_explicit_scope:
        expanded_seed_hosts = set(self.seeds.hosts)
        for host in expanded_seed_hosts:
            if host not in self.target:
                self.target.add(host)

in_scope

in_scope(host)

Check whether a hostname, url, IP, etc. is in scope. Accepts either events or string data.

This method checks both target AND blacklist. A host is in-scope if it is in the target AND not blacklisted.

Note: This is different from in_target() which only checks the target. - in_target(): checks if host is in the target - in_scope(): checks if host is in the target AND not blacklisted

Examples:

Check if a URL is in scope:

>>> preset.in_scope("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def in_scope(self, host):
    """
    Check whether a hostname, url, IP, etc. is in scope.
    Accepts either events or string data.

    This method checks both target AND blacklist.
    A host is in-scope if it is in the target AND not blacklisted.

    Note: This is different from `in_target()` which only checks the target.
    - `in_target()`: checks if host is in the target
    - `in_scope()`: checks if host is in the target AND not blacklisted

    Examples:
        Check if a URL is in scope:
        >>> preset.in_scope("http://www.evilcorp.com")
        True
    """
    if self.blacklisted(host):
        return False
    return self.in_target(host)

in_target

in_target(host)

Check whether a hostname, url, IP, etc. is in the target.

This method ONLY checks the target, NOT the blacklist. Use in_scope() to check both target AND blacklist.

Note that host can be a hostname, IP address, CIDR, email address, or any BBOT Event with the host attribute.

Parameters:

  • host (str or IPAddress or Event) –

    The host to check against the target

Examples:

Check if a URL's host is in target:

>>> preset.in_target("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
def in_target(self, host):
    """
    Check whether a hostname, url, IP, etc. is in the target.

    This method ONLY checks the target, NOT the blacklist.
    Use `in_scope()` to check both target AND blacklist.

    Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

    Args:
        host (str or IPAddress or Event): The host to check against the target

    Examples:
        Check if a URL's host is in target:
        >>> preset.in_target("http://www.evilcorp.com")
        True
    """
    return self.target.get(host) is not None