diff --git a/subbrute/__init__.py b/subbrute/__init__.py index e69de29..49cb630 100644 --- a/subbrute/__init__.py +++ b/subbrute/__init__.py @@ -0,0 +1,11 @@ +"""subbrute package initializer. + +This exposes the subbrute module at package level so code that does +`from subbrute import subbrute` continues to work when the package is +installed or used from a development checkout. +""" + +# Re-export the subbrute module so `from subbrute import subbrute` works +from . import subbrute + +__all__ = ["subbrute"] diff --git a/sublist3r.py b/sublist3r.py index 760e5ce..9625568 100755 --- a/sublist3r.py +++ b/sublist3r.py @@ -3,7 +3,12 @@ # Sublist3r v1.0 # By Ahmed Aboul-Ela - twitter.com/aboul3la +# Define your Virustotal API key +vt_apikey = 'YOUR API KEY' + + # modules in standard library +from bs4 import BeautifulSoup import re import sys import os @@ -72,7 +77,7 @@ def no_color(): def banner(): - print("""%s + print(r"""%s ____ _ _ _ _ _____ / ___| _ _| |__ | (_)___| |_|___ / _ __ \___ \| | | | '_ \| | / __| __| |_ \| '__| @@ -152,7 +157,7 @@ class enumratorBase(object): self.silent = silent self.verbose = verbose self.headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', + 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.8', 'Accept-Encoding': 'gzip', @@ -180,7 +185,7 @@ class enumratorBase(object): def get_response(self, response): if response is None: - return 0 + return '' # return empty string if there was an error instead of 0 return response.text if hasattr(response, "text") else response.content def check_max_subdomains(self, count): @@ -273,17 +278,24 @@ class enumratorBaseThreaded(multiprocessing.Process, enumratorBase): class GoogleEnum(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] - base_url = "https://google.com/search?q={query}&btnG=Search&hl=en-US&biw=&bih=&gbv=1&start={page_no}&filter=0" + base_url = "https://google.com/search?q={query}&btnG=Search&hl=en-US&biw=1366&bih=768&gbv=1&start={page_no}&filter=0" self.engine_name = "Google" self.MAX_DOMAINS = 11 self.MAX_PAGES = 200 super(GoogleEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) self.q = q + self.headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.8', + 'Cache-Control': 'max-age=0', + 'Connection': 'keep-alive' + } return def extract_domains(self, resp): links_list = list() - link_regx = re.compile('(.*?)<\/cite>') + link_regx = re.compile(r'(.*?)<\/cite>') try: links_list = link_regx.findall(resp) for link in links_list: @@ -300,14 +312,14 @@ class GoogleEnum(enumratorBaseThreaded): return links_list def check_response_errors(self, resp): - if (type(resp) is str or type(resp) is unicode) and 'Our systems have detected unusual traffic' in resp: + if isinstance(resp, str) and 'Our systems have detected unusual traffic' in resp: self.print_(R + "[!] Error: Google probably now is blocking our requests" + W) self.print_(R + "[~] Finished now the Google Enumeration ..." + W) return False return True def should_sleep(self): - time.sleep(5) + time.sleep(random.uniform(5, 10)) # Random delay between 5-10 seconds return def generate_query(self): @@ -340,7 +352,7 @@ class YahooEnum(enumratorBaseThreaded): links2 = link_regx2.findall(resp) links_list = links + links2 for link in links_list: - link = re.sub("<(\/)?b>", "", link) + link = re.sub(r"<(\/)?b>", "", link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -384,7 +396,7 @@ class AskEnum(enumratorBaseThreaded): def extract_domains(self, resp): links_list = list() - link_regx = re.compile('

(.*?)

') + link_regx = re.compile('

(.*?)

') #error fixed try: links_list = link_regx.findall(resp) for link in links_list: @@ -436,7 +448,7 @@ class BingEnum(enumratorBaseThreaded): links_list = links + links2 for link in links_list: - link = re.sub('<(\/)?strong>||<|>', '', link) + link = re.sub(r'<(\/)?strong>||<|>', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -637,15 +649,18 @@ class DNSdumpster(enumratorBaseThreaded): return self.get_response(resp) def get_csrftoken(self, resp): - csrf_regex = re.compile('', re.S) - token = csrf_regex.findall(resp)[0] - return token.strip() + soup = BeautifulSoup(resp, features="html.parser") + token = soup.find('input', {'name': 'b_3cbc62d931a69e74b2c856f1a_532c46ab39'}) + if token: + return token['value'].strip() + else: + raise ValueError("[!] Error: Unable to retrieve CSRF token from DNSdumpster") def enumerate(self): self.lock = threading.BoundedSemaphore(value=70) resp = self.req('GET', self.base_url) token = self.get_csrftoken(resp) - params = {'csrfmiddlewaretoken': token, 'targetip': self.domain} + params = {'b_3cbc62d931a69e74b2c856f1a_532c46ab39': token, 'targetip': self.domain} post_resp = self.req('POST', self.base_url, params) self.extract_domains(post_resp) for subdomain in self.subdomains: @@ -655,7 +670,9 @@ class DNSdumpster(enumratorBaseThreaded): return self.live_subdomains def extract_domains(self, resp): - tbl_regex = re.compile('<\/a>Host Records.*?(.*?)', re.S) + if not isinstance(resp, str): + return [] + tbl_regex = re.compile(r'<\/a>Host Records.*?(.*?)', re.S) link_regex = re.compile('(.*?)
', re.S) links = [] try: @@ -672,32 +689,42 @@ class DNSdumpster(enumratorBaseThreaded): self.subdomains.append(subdomain.strip()) return links - class Virustotal(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] - base_url = 'https://www.virustotal.com/ui/domains/{domain}/subdomains' + base_url = 'https://www.virustotal.com/api/v3/domains/{domain}/subdomains' self.engine_name = "Virustotal" self.q = q super(Virustotal, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) self.url = self.base_url.format(domain=self.domain) + + # Virustotal requires specific headers to bypass the bot detection: + self.headers["X-Tool"] = "vt-ui-main" + self.headers["X-VT-Anti-Abuse-Header"] = "hm" + self.headers["Accept-Ianguage"] = self.headers["Accept-Language"] + return - # the main send_req need to be rewritten def send_req(self, url): try: - resp = self.session.get(url, headers=self.headers, timeout=self.timeout) + global vt_apikey + headers = dict(self.headers) + headers['x-apikey'] = vt_apikey + resp = self.session.get(url, headers=headers, timeout=self.timeout) except Exception as e: self.print_(e) resp = None return self.get_response(resp) - # once the send_req is rewritten we don't need to call this function, the stock one should be ok def enumerate(self): while self.url != '': resp = self.send_req(self.url) - resp = json.loads(resp) + if isinstance(resp, str): # Ensure the response is a string before parsing as JSON + resp = json.loads(resp) + else: + self.print_(R + "[!] Error: Unexpected response format" + W) + break if 'error' in resp: self.print_(R + "[!] Error: Virustotal probably now is blocking our requests" + W) break @@ -709,7 +736,7 @@ class Virustotal(enumratorBaseThreaded): return self.subdomains def extract_domains(self, resp): - #resp is already parsed as json + # resp is already parsed as JSON try: for i in resp['data']: if i['type'] == 'domain': @@ -724,6 +751,7 @@ class Virustotal(enumratorBaseThreaded): pass + class ThreatCrowd(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] @@ -895,7 +923,7 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e enable_bruteforce = True # Validate domain - domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") + domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") if not domain_check.match(domain): if not silent: print(R + "Error: Please enter a valid domain" + W)