From a32920859c6855982e522af67299dc0763654ee4 Mon Sep 17 00:00:00 2001
From: FLOURISH <project.samclak@gmail.com>
Date: Thu, 19 Jun 2025 16:42:04 +0100
Subject: [PATCH] Refactor: Modernize code with AI assistance

Key changes include:
- Updated User-Agent string.
- Added interactive API key prompt for VirusTotal.
- Improved error handling for DNSdumpster and VirusTotal.
- General code cleanup and Python 3 compatibility improvements.
---
 sublist3r.py | 172 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 106 insertions(+), 66 deletions(-)
diff --git a/sublist3r.py b/sublist3r.py
index 760e5ce..da4f863 100755
--- a/sublist3r.py
+++ b/sublist3r.py
@@ -2,6 +2,7 @@
 # coding: utf-8
 # Sublist3r v1.0
 # By Ahmed Aboul-Ela - twitter.com/aboul3la
+# Refactored with AI By CYBWithFlourish - github.com/CYBWithFlourish
 
 # modules in standard library
 import re
@@ -72,14 +73,16 @@ def no_color():
 
 
 def banner():
+    # <<< FIXED: Escaped backslashes to remove SyntaxWarning
     print("""%s
                  ____        _     _ _     _   _____
                 / ___| _   _| |__ | (_)___| |_|___ / _ __
-                \___ \| | | | '_ \| | / __| __| |_ \| '__|
-                 ___) | |_| | |_) | | \__ \ |_ ___) | |
-                |____/ \__,_|_.__/|_|_|___/\__|____/|_|%s%s
+                \\___ \\| | | | '_ \\| | / __| __| |_ \\| '__|
+                 ___) | |_| | |_) | | \\__ \\ |_ ___) | |
+                |____/ \\__,_|_.__/|_|_|___/\\__|____/|_|%s%s
 
                 # Coded By Ahmed Aboul-Ela - @aboul3la
+                # Refactored By CYBWithFlourish - @CYBWithFlourish
     """ % (R, W, Y))
 
 
@@ -152,7 +155,7 @@ class enumratorBase(object):
         self.silent = silent
         self.verbose = verbose
         self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', # <<< IMPROVED: Modern User-Agent
             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
             'Accept-Language': 'en-US,en;q=0.8',
             'Accept-Encoding': 'gzip',
@@ -283,7 +286,7 @@ class GoogleEnum(enumratorBaseThreaded):
 
     def extract_domains(self, resp):
         links_list = list()
-        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
+        link_regx = re.compile(r'<cite.*?>(.*?)<\/cite>')
         try:
             links_list = link_regx.findall(resp)
             for link in links_list:
@@ -300,14 +303,14 @@ class GoogleEnum(enumratorBaseThreaded):
         return links_list
 
     def check_response_errors(self, resp):
-        if (type(resp) is str or type(resp) is unicode) and 'Our systems have detected unusual traffic' in resp:
+        if (type(resp) is str or type(resp) is bytes) and 'Our systems have detected unusual traffic' in str(resp):
             self.print_(R + "[!] Error: Google probably now is blocking our requests" + W)
             self.print_(R + "[~] Finished now the Google Enumeration ..." + W)
             return False
         return True
 
     def should_sleep(self):
-        time.sleep(5)
+        time.sleep(random.randint(5, 10))
         return
 
     def generate_query(self):
@@ -320,6 +323,7 @@ class GoogleEnum(enumratorBaseThreaded):
         return query
 
 
+# ... (The other enumerator classes like Yahoo, Ask, Bing, etc. remain unchanged) ...
 class YahooEnum(enumratorBaseThreaded):
     def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True):
         subdomains = subdomains or []
@@ -332,15 +336,15 @@ class YahooEnum(enumratorBaseThreaded):
         return
 
     def extract_domains(self, resp):
-        link_regx2 = re.compile('<span class=" fz-.*? fw-m fc-12th wr-bw.*?">(.*?)</span>')
-        link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
+        link_regx2 = re.compile(r'<span class=" fz-.*? fw-m fc-12th wr-bw.*?">(.*?)</span>')
+        link_regx = re.compile(r'<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
         links_list = []
         try:
             links = link_regx.findall(resp)
             links2 = link_regx2.findall(resp)
             links_list = links + links2
             for link in links_list:
-                link = re.sub("<(\/)?b>", "", link)
+                link = re.sub(r"<(\/)?b>", "", link)
                 if not link.startswith('http'):
                     link = "http://" + link
                 subdomain = urlparse.urlparse(link).netloc
@@ -352,7 +356,6 @@ class YahooEnum(enumratorBaseThreaded):
                     self.subdomains.append(subdomain.strip())
         except Exception:
             pass
-
         return links_list
 
     def should_sleep(self):
@@ -378,13 +381,13 @@ class AskEnum(enumratorBaseThreaded):
         self.engine_name = "Ask"
         self.MAX_DOMAINS = 11
         self.MAX_PAGES = 0
-        enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
+        super(AskEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
         self.q = q
         return
 
     def extract_domains(self, resp):
         links_list = list()
-        link_regx = re.compile('<p class="web-result-url">(.*?)</p>')
+        link_regx = re.compile(r'<p class="web-result-url">(.*?)</p>')
         try:
             links_list = link_regx.findall(resp)
             for link in links_list:
@@ -397,7 +400,6 @@ class AskEnum(enumratorBaseThreaded):
                     self.subdomains.append(subdomain.strip())
         except Exception:
             pass
-
         return links_list
 
     def get_page(self, num):
@@ -410,10 +412,8 @@ class AskEnum(enumratorBaseThreaded):
             query = fmt.format(domain=self.domain, found=found)
         else:
             query = "site:{domain} -www.{domain}".format(domain=self.domain)
-
         return query
 
-
 class BingEnum(enumratorBaseThreaded):
     def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True):
         subdomains = subdomains or []
@@ -421,22 +421,21 @@ class BingEnum(enumratorBaseThreaded):
         self.engine_name = "Bing"
         self.MAX_DOMAINS = 30
         self.MAX_PAGES = 0
-        enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent)
+        super(BingEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent)
         self.q = q
         self.verbose = verbose
         return
 
     def extract_domains(self, resp):
         links_list = list()
-        link_regx = re.compile('<li class="b_algo"><h2><a href="(.*?)"')
-        link_regx2 = re.compile('<div class="b_title"><h2><a href="(.*?)"')
+        link_regx = re.compile(r'<li class="b_algo"><h2><a href="(.*?)"')
+        link_regx2 = re.compile(r'<div class="b_title"><h2><a href="(.*?)"')
         try:
             links = link_regx.findall(resp)
             links2 = link_regx2.findall(resp)
             links_list = links + links2
-
             for link in links_list:
-                link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
+                link = re.sub(r'<(\/)?strong>|<span.*?>|<|>', '', link)
                 if not link.startswith('http'):
                     link = "http://" + link
                 subdomain = urlparse.urlparse(link).netloc
@@ -446,7 +445,6 @@ class BingEnum(enumratorBaseThreaded):
                     self.subdomains.append(subdomain.strip())
         except Exception:
             pass
-
         return links_list
 
     def generate_query(self):
@@ -466,7 +464,7 @@ class BaiduEnum(enumratorBaseThreaded):
         self.engine_name = "Baidu"
         self.MAX_DOMAINS = 2
         self.MAX_PAGES = 760
-        enumratorBaseThreaded.__init__(self, base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
+        super(BaiduEnum, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
         self.querydomain = self.domain
         self.q = q
         return
@@ -475,11 +473,11 @@ class BaiduEnum(enumratorBaseThreaded):
         links = list()
         found_newdomain = False
         subdomain_list = []
-        link_regx = re.compile('<a.*?class="c-showurl".*?>(.*?)</a>')
+        link_regx = re.compile(r'<a.*?class="c-showurl".*?>(.*?)</a>')
         try:
             links = link_regx.findall(resp)
             for link in links:
-                link = re.sub('<.*?>|>|<|&nbsp;', '', link)
+                link = re.sub('<.*?>|>|<| ', '', link)
                 if not link.startswith('http'):
                     link = "http://" + link
                 subdomain = urlparse.urlparse(link).netloc
@@ -542,7 +540,7 @@ class NetcraftEnum(enumratorBaseThreaded):
         return
 
     def get_next(self, resp):
-        link_regx = re.compile('<a.*?href="(.*?)">Next Page')
+        link_regx = re.compile(r'<a.*?href="(.*?)">Next Page')
         link = link_regx.findall(resp)
         url = 'http://searchdns.netcraft.com' + link[0]
         return url
@@ -551,7 +549,6 @@ class NetcraftEnum(enumratorBaseThreaded):
         cookies = dict()
         cookies_list = cookie[0:cookie.find(';')].split("=")
         cookies[cookies_list[0]] = cookies_list[1]
-        # hashlib.sha1 requires utf-8 encoded str
         cookies['netcraft_js_verification_response'] = hashlib.sha1(urllib.unquote(cookies_list[1]).encode('utf-8')).hexdigest()
         return cookies
 
@@ -578,7 +575,7 @@ class NetcraftEnum(enumratorBaseThreaded):
 
     def extract_domains(self, resp):
         links_list = list()
-        link_regx = re.compile('<a class="results-table__host" href="(.*?)"')
+        link_regx = re.compile(r'<a class="results-table__host" href="(.*?)"')
         try:
             links_list = link_regx.findall(resp)
             for link in links_list:
@@ -637,26 +634,35 @@ class DNSdumpster(enumratorBaseThreaded):
         return self.get_response(resp)
 
     def get_csrftoken(self, resp):
-        csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S)
-        token = csrf_regex.findall(resp)[0]
-        return token.strip()
+        csrf_regex = re.compile(r'<input type="hidden" name="csrftoken" value="(.*?)">', re.S)
+        try:
+            token = csrf_regex.findall(resp)[0]
+            return token.strip()
+        except IndexError:
+            self.print_(R + "[!] Error: Could not find CSRF token for DNSdumpster." + W)
+            return ""
 
     def enumerate(self):
         self.lock = threading.BoundedSemaphore(value=70)
         resp = self.req('GET', self.base_url)
         token = self.get_csrftoken(resp)
-        params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
+        if not token:
+            return self.subdomains
+        params = {'csrftoken': token, 'targetip': self.domain}
         post_resp = self.req('POST', self.base_url, params)
         self.extract_domains(post_resp)
+        threads = []
         for subdomain in self.subdomains:
             t = threading.Thread(target=self.check_host, args=(subdomain,))
+            threads.append(t)
             t.start()
+        for t in threads:
             t.join()
         return self.live_subdomains
 
     def extract_domains(self, resp):
-        tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>', re.S)
-        link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
+        tbl_regex = re.compile(r'<a name="hostanchor"></a>Host Records.*?<table.*?>(.*?)</table>', re.S)
+        link_regex = re.compile(r'<td class="col-md-4">(.*?)<br>', re.S)
         links = []
         try:
             results_tbl = tbl_regex.findall(resp)[0]
@@ -676,52 +682,77 @@ class DNSdumpster(enumratorBaseThreaded):
 class Virustotal(enumratorBaseThreaded):
     def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True):
         subdomains = subdomains or []
-        base_url = 'https://www.virustotal.com/ui/domains/{domain}/subdomains'
+        base_url = 'https://www.virustotal.com/api/v3/domains/{domain}/subdomains'
         self.engine_name = "Virustotal"
         self.q = q
         super(Virustotal, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
+
+        # <<< CUSTOMIZATION: Prompt for API key >>>
+        # This will only be prompted once when the Virustotal module is initialized.
+        prompt_message = Y + "[?] Please enter your Virustotal API key (or press Enter to skip): " + W
+        self.api_key = input(prompt_message).strip()
+
+        if self.api_key:
+            self.headers['x-apikey'] = self.api_key
+        
         self.url = self.base_url.format(domain=self.domain)
         return
 
-    # the main send_req need to be rewritten
     def send_req(self, url):
         try:
             resp = self.session.get(url, headers=self.headers, timeout=self.timeout)
         except Exception as e:
             self.print_(e)
             resp = None
-
         return self.get_response(resp)
 
-    # once the send_req is rewritten we don't need to call this function, the stock one should be ok
     def enumerate(self):
-        while self.url != '':
+        # <<< CUSTOMIZATION: Check if an API key was provided before running >>>
+        if not self.api_key:
+            self.print_(Y + "[~] Skipping Virustotal enumeration (no API key provided)." + W)
+            return self.subdomains
+            
+        while self.url:
             resp = self.send_req(self.url)
-            resp = json.loads(resp)
-            if 'error' in resp:
-                self.print_(R + "[!] Error: Virustotal probably now is blocking our requests" + W)
+            if not resp:
                 break
-            if 'links' in resp and 'next' in resp['links']:
-                self.url = resp['links']['next']
+            
+            try:
+                resp_json = json.loads(resp)
+            except json.JSONDecodeError:
+                self.print_(R + "[!] Error: Virustotal returned a non-JSON response. It may be blocking requests." + W)
+                break
+
+            if 'error' in resp_json:
+                error_code = resp_json['error'].get('code')
+                if error_code == 'WrongCredentialsError':
+                    self.print_(R + "[!] Error: Invalid Virustotal API key." + W)
+                else:
+                    self.print_(R + "[!] Error: Virustotal API returned an error: " + resp_json['error'].get('message', 'Unknown Error') + W)
+                break
+                
+            self.extract_domains(resp_json)
+
+            if 'links' in resp_json and 'next' in resp_json['links']:
+                self.url = resp_json['links']['next']
             else:
-                self.url = ''
-            self.extract_domains(resp)
+                self.url = '' # No more pages
+
         return self.subdomains
 
     def extract_domains(self, resp):
-        #resp is already parsed as json
         try:
-            for i in resp['data']:
-                if i['type'] == 'domain':
-                    subdomain = i['id']
+            for i in resp.get('data', []):
+                if i.get('type') == 'domain':
+                    subdomain = i.get('id')
                     if not subdomain.endswith(self.domain):
                         continue
                     if subdomain not in self.subdomains and subdomain != self.domain:
                         if self.verbose:
                             self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                         self.subdomains.append(subdomain.strip())
-        except Exception:
-            pass
+        except Exception as e:
+            self.print_(R + f"[!] Error parsing Virustotal data: {e}" + W)
 
 
 class ThreatCrowd(enumratorBaseThreaded):
@@ -749,17 +780,22 @@ class ThreatCrowd(enumratorBaseThreaded):
 
     def extract_domains(self, resp):
         try:
-            links = json.loads(resp)['subdomains']
-            for link in links:
-                subdomain = link.strip()
-                if not subdomain.endswith(self.domain):
-                    continue
-                if subdomain not in self.subdomains and subdomain != self.domain:
-                    if self.verbose:
-                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
-                    self.subdomains.append(subdomain.strip())
-        except Exception as e:
+            data = json.loads(resp)
+            # ThreatCrowd API can return 404/empty response
+            if 'subdomains' in data:
+                for link in data['subdomains']:
+                    subdomain = link.strip()
+                    if not subdomain.endswith(self.domain):
+                        continue
+                    if subdomain not in self.subdomains and subdomain != self.domain:
+                        if self.verbose:
+                            self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
+                        self.subdomains.append(subdomain.strip())
+        except (json.JSONDecodeError, TypeError):
+            # Gracefully handle cases where resp is not valid JSON
             pass
+        except Exception as e:
+            self.print_(R + f"[!] Error parsing ThreatCrowd data: {e}" + W)
 
 
 class CrtSearch(enumratorBaseThreaded):
@@ -787,7 +823,7 @@ class CrtSearch(enumratorBaseThreaded):
         return self.subdomains
 
     def extract_domains(self, resp):
-        link_regx = re.compile('<TD>(.*?)</TD>')
+        link_regx = re.compile(r'<TD>(.*?)</TD>')
         try:
             links = link_regx.findall(resp)
             for link in links:
@@ -872,13 +908,17 @@ class portscan():
                 pass
         self.lock.release()
         if len(openports) > 0:
-            print("%s%s%s - %sFound open ports:%s %s%s%s" % (G, host, W, R, W, Y, ', '.join(openports), W))
+            print("%s%s%s - %sFound open ports:%s %s%s%s" % (G, host, W, R, W, Y, ', '.join(map(str,openports)), W))
 
     def run(self):
         self.lock = threading.BoundedSemaphore(value=20)
+        threads = []
         for subdomain in self.subdomains:
             t = threading.Thread(target=self.port_scan, args=(subdomain, self.ports))
+            threads.append(t)
             t.start()
+        for t in threads:
+            t.join()
 
 
 def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, engines):
@@ -895,13 +935,13 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e
         enable_bruteforce = True
 
     # Validate domain
-    domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
+    domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
     if not domain_check.match(domain):
         if not silent:
             print(R + "Error: Please enter a valid domain" + W)
         return []
 
-    if not domain.startswith('http://') or not domain.startswith('https://'):
+    if not domain.startswith('http://') and not domain.startswith('https://'):
         domain = 'http://' + domain
 
     parsed_domain = urlparse.urlparse(domain)