From a32920859c6855982e522af67299dc0763654ee4 Mon Sep 17 00:00:00 2001 From: FLOURISH Date: Thu, 19 Jun 2025 16:42:04 +0100 Subject: [PATCH] Refactor: Modernize code with AI assistance Key changes include: - Updated User-Agent string. - Added interactive API key prompt for VirusTotal. - Improved error handling for DNSdumpster and VirusTotal. - General code cleanup and Python 3 compatibility improvements. --- sublist3r.py | 172 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 106 insertions(+), 66 deletions(-) diff --git a/sublist3r.py b/sublist3r.py index 760e5ce..da4f863 100755 --- a/sublist3r.py +++ b/sublist3r.py @@ -2,6 +2,7 @@ # coding: utf-8 # Sublist3r v1.0 # By Ahmed Aboul-Ela - twitter.com/aboul3la +# Refactored with AI By CYBWithFlourish - github.com/CYBWithFlourish # modules in standard library import re @@ -72,14 +73,16 @@ def no_color(): def banner(): + # <<< FIXED: Escaped backslashes to remove SyntaxWarning print("""%s ____ _ _ _ _ _____ / ___| _ _| |__ | (_)___| |_|___ / _ __ - \___ \| | | | '_ \| | / __| __| |_ \| '__| - ___) | |_| | |_) | | \__ \ |_ ___) | | - |____/ \__,_|_.__/|_|_|___/\__|____/|_|%s%s + \\___ \\| | | | '_ \\| | / __| __| |_ \\| '__| + ___) | |_| | |_) | | \\__ \\ |_ ___) | | + |____/ \\__,_|_.__/|_|_|___/\\__|____/|_|%s%s # Coded By Ahmed Aboul-Ela - @aboul3la + # Refactored By CYBWithFlourish - @CYBWithFlourish """ % (R, W, Y)) @@ -152,7 +155,7 @@ class enumratorBase(object): self.silent = silent self.verbose = verbose self.headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', # <<< IMPROVED: Modern User-Agent 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.8', 'Accept-Encoding': 'gzip', @@ -283,7 +286,7 @@ class GoogleEnum(enumratorBaseThreaded): def extract_domains(self, resp): links_list = list() - link_regx = re.compile('(.*?)<\/cite>') + link_regx = re.compile(r'(.*?)<\/cite>') try: links_list = link_regx.findall(resp) for link in links_list: @@ -300,14 +303,14 @@ class GoogleEnum(enumratorBaseThreaded): return links_list def check_response_errors(self, resp): - if (type(resp) is str or type(resp) is unicode) and 'Our systems have detected unusual traffic' in resp: + if (type(resp) is str or type(resp) is bytes) and 'Our systems have detected unusual traffic' in str(resp): self.print_(R + "[!] Error: Google probably now is blocking our requests" + W) self.print_(R + "[~] Finished now the Google Enumeration ..." + W) return False return True def should_sleep(self): - time.sleep(5) + time.sleep(random.randint(5, 10)) return def generate_query(self): @@ -320,6 +323,7 @@ class GoogleEnum(enumratorBaseThreaded): return query +# ... (The other enumerator classes like Yahoo, Ask, Bing, etc. remain unchanged) ... class YahooEnum(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] @@ -332,15 +336,15 @@ class YahooEnum(enumratorBaseThreaded): return def extract_domains(self, resp): - link_regx2 = re.compile('(.*?)') - link_regx = re.compile('(.*?)') + link_regx2 = re.compile(r'(.*?)') + link_regx = re.compile(r'(.*?)') links_list = [] try: links = link_regx.findall(resp) links2 = link_regx2.findall(resp) links_list = links + links2 for link in links_list: - link = re.sub("<(\/)?b>", "", link) + link = re.sub(r"<(\/)?b>", "", link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -352,7 +356,6 @@ class YahooEnum(enumratorBaseThreaded): self.subdomains.append(subdomain.strip()) except Exception: pass - return links_list def should_sleep(self): @@ -378,13 +381,13 @@ class AskEnum(enumratorBaseThreaded): self.engine_name = "Ask" self.MAX_DOMAINS = 11 self.MAX_PAGES = 0 - enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) + super(AskEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) self.q = q return def extract_domains(self, resp): links_list = list() - link_regx = re.compile('

(.*?)

') + link_regx = re.compile(r'

(.*?)

') try: links_list = link_regx.findall(resp) for link in links_list: @@ -397,7 +400,6 @@ class AskEnum(enumratorBaseThreaded): self.subdomains.append(subdomain.strip()) except Exception: pass - return links_list def get_page(self, num): @@ -410,10 +412,8 @@ class AskEnum(enumratorBaseThreaded): query = fmt.format(domain=self.domain, found=found) else: query = "site:{domain} -www.{domain}".format(domain=self.domain) - return query - class BingEnum(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] @@ -421,22 +421,21 @@ class BingEnum(enumratorBaseThreaded): self.engine_name = "Bing" self.MAX_DOMAINS = 30 self.MAX_PAGES = 0 - enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent) + super(BingEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent) self.q = q self.verbose = verbose return def extract_domains(self, resp): links_list = list() - link_regx = re.compile('
  • ||<|>', '', link) + link = re.sub(r'<(\/)?strong>||<|>', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -446,7 +445,6 @@ class BingEnum(enumratorBaseThreaded): self.subdomains.append(subdomain.strip()) except Exception: pass - return links_list def generate_query(self): @@ -466,7 +464,7 @@ class BaiduEnum(enumratorBaseThreaded): self.engine_name = "Baidu" self.MAX_DOMAINS = 2 self.MAX_PAGES = 760 - enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) + super(BaiduEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) self.querydomain = self.domain self.q = q return @@ -475,11 +473,11 @@ class BaiduEnum(enumratorBaseThreaded): links = list() found_newdomain = False subdomain_list = [] - link_regx = re.compile('(.*?)') + link_regx = re.compile(r'(.*?)') try: links = link_regx.findall(resp) for link in links: - link = re.sub('<.*?>|>|<| ', '', link) + link = re.sub('<.*?>|>|<| ', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -542,7 +540,7 @@ class NetcraftEnum(enumratorBaseThreaded): return def get_next(self, resp): - link_regx = re.compile('Next Page') + link_regx = re.compile(r'Next Page') link = link_regx.findall(resp) url = 'http://searchdns.netcraft.com' + link[0] return url @@ -551,7 +549,6 @@ class NetcraftEnum(enumratorBaseThreaded): cookies = dict() cookies_list = cookie[0:cookie.find(';')].split("=") cookies[cookies_list[0]] = cookies_list[1] - # hashlib.sha1 requires utf-8 encoded str cookies['netcraft_js_verification_response'] = hashlib.sha1(urllib.unquote(cookies_list[1]).encode('utf-8')).hexdigest() return cookies @@ -578,7 +575,7 @@ class NetcraftEnum(enumratorBaseThreaded): def extract_domains(self, resp): links_list = list() - link_regx = re.compile('', re.S) - token = csrf_regex.findall(resp)[0] - return token.strip() + csrf_regex = re.compile(r'', re.S) + try: + token = csrf_regex.findall(resp)[0] + return token.strip() + except IndexError: + self.print_(R + "[!] Error: Could not find CSRF token for DNSdumpster." + W) + return "" def enumerate(self): self.lock = threading.BoundedSemaphore(value=70) resp = self.req('GET', self.base_url) token = self.get_csrftoken(resp) - params = {'csrfmiddlewaretoken': token, 'targetip': self.domain} + if not token: + return self.subdomains + params = {'csrftoken': token, 'targetip': self.domain} post_resp = self.req('POST', self.base_url, params) self.extract_domains(post_resp) + threads = [] for subdomain in self.subdomains: t = threading.Thread(target=self.check_host, args=(subdomain,)) + threads.append(t) t.start() + for t in threads: t.join() return self.live_subdomains def extract_domains(self, resp): - tbl_regex = re.compile('<\/a>Host Records.*?(.*?)', re.S) - link_regex = re.compile('(.*?)
    ', re.S) + tbl_regex = re.compile(r'
    Host Records.*?(.*?)', re.S) + link_regex = re.compile(r'(.*?)
    ', re.S) links = [] try: results_tbl = tbl_regex.findall(resp)[0] @@ -676,52 +682,77 @@ class DNSdumpster(enumratorBaseThreaded): class Virustotal(enumratorBaseThreaded): def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True): subdomains = subdomains or [] - base_url = 'https://www.virustotal.com/ui/domains/{domain}/subdomains' + base_url = 'https://www.virustotal.com/api/v3/domains/{domain}/subdomains' self.engine_name = "Virustotal" self.q = q super(Virustotal, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose) + + # <<< CUSTOMIZATION: Prompt for API key >>> + # This will only be prompted once when the Virustotal module is initialized. + prompt_message = Y + "[?] Please enter your Virustotal API key (or press Enter to skip): " + W + self.api_key = input(prompt_message).strip() + + if self.api_key: + self.headers['x-apikey'] = self.api_key + self.url = self.base_url.format(domain=self.domain) return - # the main send_req need to be rewritten def send_req(self, url): try: resp = self.session.get(url, headers=self.headers, timeout=self.timeout) except Exception as e: self.print_(e) resp = None - return self.get_response(resp) - # once the send_req is rewritten we don't need to call this function, the stock one should be ok def enumerate(self): - while self.url != '': + # <<< CUSTOMIZATION: Check if an API key was provided before running >>> + if not self.api_key: + self.print_(Y + "[~] Skipping Virustotal enumeration (no API key provided)." + W) + return self.subdomains + + while self.url: resp = self.send_req(self.url) - resp = json.loads(resp) - if 'error' in resp: - self.print_(R + "[!] Error: Virustotal probably now is blocking our requests" + W) + if not resp: break - if 'links' in resp and 'next' in resp['links']: - self.url = resp['links']['next'] + + try: + resp_json = json.loads(resp) + except json.JSONDecodeError: + self.print_(R + "[!] Error: Virustotal returned a non-JSON response. It may be blocking requests." + W) + break + + if 'error' in resp_json: + error_code = resp_json['error'].get('code') + if error_code == 'WrongCredentialsError': + self.print_(R + "[!] Error: Invalid Virustotal API key." + W) + else: + self.print_(R + "[!] Error: Virustotal API returned an error: " + resp_json['error'].get('message', 'Unknown Error') + W) + break + + self.extract_domains(resp_json) + + if 'links' in resp_json and 'next' in resp_json['links']: + self.url = resp_json['links']['next'] else: - self.url = '' - self.extract_domains(resp) + self.url = '' # No more pages + return self.subdomains def extract_domains(self, resp): - #resp is already parsed as json try: - for i in resp['data']: - if i['type'] == 'domain': - subdomain = i['id'] + for i in resp.get('data', []): + if i.get('type') == 'domain': + subdomain = i.get('id') if not subdomain.endswith(self.domain): continue if subdomain not in self.subdomains and subdomain != self.domain: if self.verbose: self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) self.subdomains.append(subdomain.strip()) - except Exception: - pass + except Exception as e: + self.print_(R + f"[!] Error parsing Virustotal data: {e}" + W) class ThreatCrowd(enumratorBaseThreaded): @@ -749,17 +780,22 @@ class ThreatCrowd(enumratorBaseThreaded): def extract_domains(self, resp): try: - links = json.loads(resp)['subdomains'] - for link in links: - subdomain = link.strip() - if not subdomain.endswith(self.domain): - continue - if subdomain not in self.subdomains and subdomain != self.domain: - if self.verbose: - self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) - self.subdomains.append(subdomain.strip()) - except Exception as e: + data = json.loads(resp) + # ThreatCrowd API can return 404/empty response + if 'subdomains' in data: + for link in data['subdomains']: + subdomain = link.strip() + if not subdomain.endswith(self.domain): + continue + if subdomain not in self.subdomains and subdomain != self.domain: + if self.verbose: + self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) + self.subdomains.append(subdomain.strip()) + except (json.JSONDecodeError, TypeError): + # Gracefully handle cases where resp is not valid JSON pass + except Exception as e: + self.print_(R + f"[!] Error parsing ThreatCrowd data: {e}" + W) class CrtSearch(enumratorBaseThreaded): @@ -787,7 +823,7 @@ class CrtSearch(enumratorBaseThreaded): return self.subdomains def extract_domains(self, resp): - link_regx = re.compile('(.*?)') + link_regx = re.compile(r'(.*?)') try: links = link_regx.findall(resp) for link in links: @@ -872,13 +908,17 @@ class portscan(): pass self.lock.release() if len(openports) > 0: - print("%s%s%s - %sFound open ports:%s %s%s%s" % (G, host, W, R, W, Y, ', '.join(openports), W)) + print("%s%s%s - %sFound open ports:%s %s%s%s" % (G, host, W, R, W, Y, ', '.join(map(str,openports)), W)) def run(self): self.lock = threading.BoundedSemaphore(value=20) + threads = [] for subdomain in self.subdomains: t = threading.Thread(target=self.port_scan, args=(subdomain, self.ports)) + threads.append(t) t.start() + for t in threads: + t.join() def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, engines): @@ -895,13 +935,13 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e enable_bruteforce = True # Validate domain - domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") + domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") if not domain_check.match(domain): if not silent: print(R + "Error: Please enter a valid domain" + W) return [] - if not domain.startswith('http://') or not domain.startswith('https://'): + if not domain.startswith('http://') and not domain.startswith('https://'): domain = 'http://' + domain parsed_domain = urlparse.urlparse(domain)