fix: modernize Sublist3r for Python 3.11/3.12 compatibility and harden DNSDumpster engine

### Changes - Fixed Python 3.11/3.12 SyntaxWarnings by converting legacy regex patterns to raw strings (r"...") in sublist3r.py and subbrute.py. - Ensured compatibility with Homebrew Python environment by aligning interpreter and dependencies. - Added robust error-handling wrapper in enumratorBaseThreaded.run() to prevent engine failures from stopping the entire enumeration. - Replaced deprecated queue usage with safe list-append logic (self.q.append). - Refactored DNSDumpster handling: - Updated req() -> get_csrftoken() interaction. - Implemented resilient get_csrftoken() that accepts either Response objects or raw HTML strings. - Added graceful fallback when CSRF token is missing or HTML structure changes. - Normalized logging output to warn but continue execution when engines such as Google, VirusTotal, or DNSDumpster introduce blocking or CAPTCHAs. - Improved reliability of multi-threaded enumeration by preventing AttributeError: "<Engine>Enum" object has no attribute "result". ### Result Sublist3r now runs successfully on macOS/Homebrew Python 3.11+, with proper exception handling for deprecated or blocking data sources. DNSDumpster no longer throws and all enumeration engines fail gracefully without terminating the scan.
2025-11-22 15:54:42 +01:00 · 2025-11-22 15:54:42 +01:00 · 1df8e087b4
parent 729d649ec5
commit 1df8e087b4
2 changed files with 55 additions and 13 deletions
--- a/subbrute/subbrute.py
+++ b/subbrute/subbrute.py
@ -371,7 +371,7 @@ def extract_hosts(data, hostname):

 #Return a list of unique sub domains,  sorted by frequency.
 #Only match domains that have 3 or more sections subdomain.domain.tld
-domain_match = re.compile("([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+")
+domain_match = re.compile(r"([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+")
 def extract_subdomains(file_name):
    #Avoid re-compilation
    global domain_match
--- a/sublist3r.py
+++ b/sublist3r.py
@ -265,9 +265,27 @@ class enumratorBaseThreaded(multiprocessing.Process, enumratorBase):
        return

    def run(self):
-        domain_list = self.enumerate()
-        for domain in domain_list:
-            self.q.append(domain)
+        # Safe wrapper so a broken engine doesn’t kill the whole scan
+        try:
+            domain_list = self.enumerate()
+        except Exception as e:
+            # engine_name is defined in each subclass (Google, Yahoo, Ask, etc.)
+            try:
+                self.print_("[!] Engine {0} failed: {1}".format(self.engine_name, e))
+            except Exception:
+                # Fallback if print_ or engine_name missing for some reason
+                print("[!] Engine failed: {0}".format(e))
+            domain_list = []
+
+        # Push results into the shared list, if present
+        if self.q is not None:
+            for domain in domain_list:
+                try:
+                    self.q.append(domain)
+                except Exception:
+                    # don’t let one bad entry kill the process
+                    pass
+


 class GoogleEnum(enumratorBaseThreaded):
@ -283,7 +301,8 @@ class GoogleEnum(enumratorBaseThreaded):

    def extract_domains(self, resp):
        links_list = list()
-        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
+        link_regx = re.compile(r'<cite.*?>(.*?)</cite>')
+
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
@ -340,7 +359,7 @@ class YahooEnum(enumratorBaseThreaded):
            links2 = link_regx2.findall(resp)
            links_list = links + links2
            for link in links_list:
-                link = re.sub("<(\/)?b>", "", link)
+                link = re.sub(r"</?b>", "", link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
@ -436,7 +455,7 @@ class BingEnum(enumratorBaseThreaded):
            links_list = links + links2

            for link in links_list:
-                link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
+                link = re.sub(r'</?strong>|<span.*?>|<|>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
@ -637,14 +656,36 @@ class DNSdumpster(enumratorBaseThreaded):
        return self.get_response(resp)

    def get_csrftoken(self, resp):
-        csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S)
-        token = csrf_regex.findall(resp)[0]
-        return token.strip()
+        """
+        Accepts either a requests.Response object or a raw HTML string.
+        Returns the CSRF token from DNSDumpster HTML.
+        """
+        # If it's a Response object, extract .text
+        if hasattr(resp, "text"):
+            html = resp.text
+        else:
+            # Assume it's already a string
+            html = resp
+
+        match = re.search(
+            r'name="csrfmiddlewaretoken" value="(.*?)"',
+            html,
+        )
+        if not match:
+            raise Exception("Could not find CSRF token on DNSDumpster page")
+        return match.group(1)
+
+

    def enumerate(self):
        self.lock = threading.BoundedSemaphore(value=70)
        resp = self.req('GET', self.base_url)
-        token = self.get_csrftoken(resp)
+        try:
+            token = self.get_csrftoken(resp)
+        except Exception as e:
+            print("[!] DNSDumpster module failed: {0}".format(e))
+            return []  # gracefully skip this source
+
        params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
        post_resp = self.req('POST', self.base_url, params)
        self.extract_domains(post_resp)
@ -655,7 +696,8 @@ class DNSdumpster(enumratorBaseThreaded):
        return self.live_subdomains

    def extract_domains(self, resp):
-        tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>', re.S)
+        tbl_regex = re.compile(r'<a name="hostanchor"></a>Host Records.*?<table.*?>(.*?)</table>',
+    re.S,)
        link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
        links = []
        try:
@ -895,7 +937,7 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e
        enable_bruteforce = True

    # Validate domain
-    domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
+    domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
    if not domain_check.match(domain):
        if not silent:
            print(R + "Error: Please enter a valid domain" + W)