Skip to content

BaseTarget

Bases: RadixTarget

A collection of BBOT events that represent a scan target.

The purpose of this class is to hold a potentially huge target list in a space-efficient way, while allowing lightning fast scope lookups.

This class is inherited by all three components of the BBOT target
  • Whitelist
  • Blacklist
  • Seeds
Source code in bbot/scanner/target.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class BaseTarget(RadixTarget):
    """
    A collection of BBOT events that represent a scan target.

    The purpose of this class is to hold a potentially huge target list in a space-efficient way,
    while allowing lightning fast scope lookups.

    This class is inherited by all three components of the BBOT target:
        - Whitelist
        - Blacklist
        - Seeds
    """

    accept_target_types = ["TARGET"]

    def __init__(self, *targets, **kwargs):
        # ignore blank targets (sometimes happens as a symptom of .splitlines())
        targets = [stripped for t in targets if (stripped := (t.strip() if isinstance(t, str) else t))]
        self.event_seeds = set()
        super().__init__(*targets, **kwargs)

    @property
    def inputs(self):
        return set(e.input for e in self.event_seeds)

    def get(self, event, **kwargs):
        """
        Here we override RadixTarget's get() method, which normally only accepts hosts, to also accept events for convenience.
        """
        host = None
        raise_error = kwargs.get("raise_error", False)
        # if it's already an event or event seed, use its host
        if is_event(event) or isinstance(event, BaseEventSeed):
            host = event.host
        # save resources by checking if the event is an IP or DNS name
        elif is_ip(event, include_network=True) or is_dns_name(event):
            host = event
        # if it's a string, autodetect its type and parse out its host
        elif isinstance(event, str):
            event_seed = self._make_event_seed(event, raise_error=raise_error)
            host = event_seed.host
            if not host:
                return
        else:
            raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(event)}")
        if not host:
            msg = f"Host not found: '{event}'"
            if raise_error:
                raise KeyError(msg)
            else:
                log.warning(msg)
                return
        results = super().get(host, **kwargs)
        return results

    def _make_event_seed(self, target, raise_error=False):
        try:
            return EventSeed(target)
        except ValidationError:
            msg = f"Invalid target: '{target}'"
            if raise_error:
                raise KeyError(msg)
            else:
                log.warning(msg)

    def add(self, targets, data=None):
        if not isinstance(targets, (list, set, tuple)):
            targets = [targets]
        event_seeds = set()
        for target in targets:
            event_seed = EventSeed(target)
            if not event_seed._target_type in self.accept_target_types:
                log.warning(f"Invalid target type for {self.__class__.__name__}: {event_seed.type}")
                continue
            event_seeds.add(event_seed)

        # sort by host size to ensure consistency
        event_seeds = sorted(event_seeds, key=lambda e: ((0, 0) if not e.host else host_size_key(e.host)))
        for event_seed in event_seeds:
            self.event_seeds.add(event_seed)
            self._add(event_seed.host, data=(event_seed if data is None else data))

    def __iter__(self):
        yield from self.event_seeds

get

get(event, **kwargs)

Here we override RadixTarget's get() method, which normally only accepts hosts, to also accept events for convenience.

Source code in bbot/scanner/target.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def get(self, event, **kwargs):
    """
    Here we override RadixTarget's get() method, which normally only accepts hosts, to also accept events for convenience.
    """
    host = None
    raise_error = kwargs.get("raise_error", False)
    # if it's already an event or event seed, use its host
    if is_event(event) or isinstance(event, BaseEventSeed):
        host = event.host
    # save resources by checking if the event is an IP or DNS name
    elif is_ip(event, include_network=True) or is_dns_name(event):
        host = event
    # if it's a string, autodetect its type and parse out its host
    elif isinstance(event, str):
        event_seed = self._make_event_seed(event, raise_error=raise_error)
        host = event_seed.host
        if not host:
            return
    else:
        raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(event)}")
    if not host:
        msg = f"Host not found: '{event}'"
        if raise_error:
            raise KeyError(msg)
        else:
            log.warning(msg)
            return
    results = super().get(host, **kwargs)
    return results

ScanSeeds

Bases: BaseTarget

Initial events used to seed a scan.

These are the targets specified by the user, e.g. via -t on the CLI.

Source code in bbot/scanner/target.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class ScanSeeds(BaseTarget):
    """
    Initial events used to seed a scan.

    These are the targets specified by the user, e.g. via `-t` on the CLI.
    """

    def get(self, event, single=True, **kwargs):
        results = super().get(event, **kwargs)
        if results and single:
            return next(iter(results))
        return results

    def _add(self, host, data):
        """
        Overrides the base method to enable having multiple events for the same host.

        The "data" attribute of the node is now a set of events.

        This is useful for seeds, because it lets us have both evilcorp.com:80 and https://evilcorp.com
            as separate events even though they have the same host.
        """
        if host:
            try:
                event_set = self.get(host, raise_error=True, single=False)
                event_set.add(data)
            except KeyError:
                event_set = {data}
            super()._add(host, data=event_set)

    def _hash_value(self):
        # seeds get hashed by event data
        return sorted(str(e.data).encode() for e in self.event_seeds)

ScanWhitelist

Bases: ACLTarget

A collection of BBOT events that represent a scan's whitelist.

Source code in bbot/scanner/target.py
143
144
145
146
147
148
class ScanWhitelist(ACLTarget):
    """
    A collection of BBOT events that represent a scan's whitelist.
    """

    pass

ScanBlacklist

Bases: ACLTarget

A collection of BBOT events that represent a scan's blacklist.

Source code in bbot/scanner/target.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
class ScanBlacklist(ACLTarget):
    """
    A collection of BBOT events that represent a scan's blacklist.
    """

    accept_target_types = ["TARGET", "BLACKLIST"]

    def __init__(self, *args, **kwargs):
        self.blacklist_regexes = set()
        super().__init__(*args, **kwargs)

    def get(self, host, **kwargs):
        """
        Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.
        """
        if not (is_ip_type(host) or isinstance(host, str)):
            raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(host)}")
        raise_error = kwargs.get("raise_error", False)
        # first, check event's host against blacklist
        try:
            event_seed = self._make_event_seed(host, raise_error=raise_error)
            host = event_seed.host
            to_match = event_seed.data
        except ValidationError:
            to_match = str(host)
        try:
            event_result = super().get(host, raise_error=True)
        except KeyError:
            event_result = None
        if event_result is not None:
            return event_result
        # next, check event's host against regexes
        for regex in self.blacklist_regexes:
            if regex.search(to_match):
                return host
        if raise_error:
            raise KeyError(f"Host not found: '{host}'")
        return None

    def _add(self, host, data):
        if getattr(data, "type", "") == "BLACKLIST_REGEX":
            self.blacklist_regexes.add(re.compile(data.data))
        if host is not None:
            super()._add(host, data)

    def _hash_value(self):
        # regexes are included in blacklist hash
        regex_patterns = [str(r.pattern).encode() for r in self.blacklist_regexes]
        hosts = [str(h).encode() for h in self.sorted_hosts]
        return hosts + regex_patterns

    def __len__(self):
        return super().__len__() + len(self.blacklist_regexes)

    def __bool__(self):
        return bool(len(self))

get

get(host, **kwargs)

Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.

Source code in bbot/scanner/target.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def get(self, host, **kwargs):
    """
    Blacklists only accept IPs or strings. This is cleaner since we need to search for regex patterns.
    """
    if not (is_ip_type(host) or isinstance(host, str)):
        raise ValueError(f"Invalid target type for {self.__class__.__name__}: {type(host)}")
    raise_error = kwargs.get("raise_error", False)
    # first, check event's host against blacklist
    try:
        event_seed = self._make_event_seed(host, raise_error=raise_error)
        host = event_seed.host
        to_match = event_seed.data
    except ValidationError:
        to_match = str(host)
    try:
        event_result = super().get(host, raise_error=True)
    except KeyError:
        event_result = None
    if event_result is not None:
        return event_result
    # next, check event's host against regexes
    for regex in self.blacklist_regexes:
        if regex.search(to_match):
            return host
    if raise_error:
        raise KeyError(f"Host not found: '{host}'")
    return None

BBOTTarget

A convenient abstraction of a scan target that contains three subtargets
  • seeds
  • whitelist
  • blacklist

Provides high-level functions like in_scope(), which includes both whitelist and blacklist checks.

Source code in bbot/scanner/target.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
class BBOTTarget:
    """
    A convenient abstraction of a scan target that contains three subtargets:
        - seeds
        - whitelist
        - blacklist

    Provides high-level functions like in_scope(), which includes both whitelist and blacklist checks.
    """

    def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False):
        self.strict_scope = strict_scope
        self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_scope)
        if whitelist is None:
            whitelist = self.seeds.hosts
        self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_scope)
        if blacklist is None:
            blacklist = []
        self.blacklist = ScanBlacklist(*blacklist)

    @property
    def json(self):
        return {
            "seeds": sorted(self.seeds.inputs),
            "whitelist": sorted(self.whitelist.inputs),
            "blacklist": sorted(self.blacklist.inputs),
            "strict_scope": self.strict_scope,
            "hash": self.hash.hex(),
            "seed_hash": self.seeds.hash.hex(),
            "whitelist_hash": self.whitelist.hash.hex(),
            "blacklist_hash": self.blacklist.hash.hex(),
            "scope_hash": self.scope_hash.hex(),
        }

    @property
    def hash(self):
        sha1_hash = sha1()
        for target_hash in [t.hash for t in (self.seeds, self.whitelist, self.blacklist)]:
            sha1_hash.update(target_hash)
        return sha1_hash.digest()

    @property
    def scope_hash(self):
        sha1_hash = sha1()
        # Consider only the hash values of the whitelist and blacklist
        for target_hash in [t.hash for t in (self.whitelist, self.blacklist)]:
            sha1_hash.update(target_hash)
        return sha1_hash.digest()

    def in_scope(self, host):
        """
        Check whether a hostname, url, IP, etc. is in scope.
        Accepts either events or string data.

        Checks whitelist and blacklist.
        If `host` is an event and its scope distance is zero, it will automatically be considered in-scope.

        Examples:
            Check if a URL is in scope:
            >>> preset.in_scope("http://www.evilcorp.com")
            True
        """
        blacklisted = self.blacklisted(host)
        whitelisted = self.whitelisted(host)
        return whitelisted and not blacklisted

    def blacklisted(self, host):
        """
        Check whether a hostname, url, IP, etc. is blacklisted.

        Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

        Args:
            host (str or IPAddress or Event): The host to check against the blacklist

        Examples:
            Check if a URL's host is blacklisted:
            >>> preset.blacklisted("http://www.evilcorp.com")
            True
        """
        return host in self.blacklist

    def whitelisted(self, host):
        """
        Check whether a hostname, url, IP, etc. is whitelisted.

        Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

        Args:
            host (str or IPAddress or Event): The host to check against the whitelist

        Examples:
            Check if a URL's host is whitelisted:
            >>> preset.whitelisted("http://www.evilcorp.com")
            True
        """
        return host in self.whitelist

    def __eq__(self, other):
        return self.hash == other.hash

blacklisted

blacklisted(host)

Check whether a hostname, url, IP, etc. is blacklisted.

Note that host can be a hostname, IP address, CIDR, email address, or any BBOT Event with the host attribute.

Parameters:

  • host (str or IPAddress or Event) –

    The host to check against the blacklist

Examples:

Check if a URL's host is blacklisted:

>>> preset.blacklisted("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def blacklisted(self, host):
    """
    Check whether a hostname, url, IP, etc. is blacklisted.

    Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

    Args:
        host (str or IPAddress or Event): The host to check against the blacklist

    Examples:
        Check if a URL's host is blacklisted:
        >>> preset.blacklisted("http://www.evilcorp.com")
        True
    """
    return host in self.blacklist

in_scope

in_scope(host)

Check whether a hostname, url, IP, etc. is in scope. Accepts either events or string data.

Checks whitelist and blacklist. If host is an event and its scope distance is zero, it will automatically be considered in-scope.

Examples:

Check if a URL is in scope:

>>> preset.in_scope("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def in_scope(self, host):
    """
    Check whether a hostname, url, IP, etc. is in scope.
    Accepts either events or string data.

    Checks whitelist and blacklist.
    If `host` is an event and its scope distance is zero, it will automatically be considered in-scope.

    Examples:
        Check if a URL is in scope:
        >>> preset.in_scope("http://www.evilcorp.com")
        True
    """
    blacklisted = self.blacklisted(host)
    whitelisted = self.whitelisted(host)
    return whitelisted and not blacklisted

whitelisted

whitelisted(host)

Check whether a hostname, url, IP, etc. is whitelisted.

Note that host can be a hostname, IP address, CIDR, email address, or any BBOT Event with the host attribute.

Parameters:

  • host (str or IPAddress or Event) –

    The host to check against the whitelist

Examples:

Check if a URL's host is whitelisted:

>>> preset.whitelisted("http://www.evilcorp.com")
True
Source code in bbot/scanner/target.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
def whitelisted(self, host):
    """
    Check whether a hostname, url, IP, etc. is whitelisted.

    Note that `host` can be a hostname, IP address, CIDR, email address, or any BBOT `Event` with the `host` attribute.

    Args:
        host (str or IPAddress or Event): The host to check against the whitelist

    Examples:
        Check if a URL's host is whitelisted:
        >>> preset.whitelisted("http://www.evilcorp.com")
        True
    """
    return host in self.whitelist