Fix Python 3 compatibility issues and code cleanup

2025-10-10 09:33:51 +05:30 · 2025-10-10 09:33:51 +05:30 · 554ace7eee
parent 729d649ec5
commit 554ace7eee
2 changed files with 168 additions and 42 deletions
--- a/sublist3r.py
+++ b/sublist3r.py
@ -2,6 +2,7 @@
 # coding: utf-8
 # Sublist3r v1.0
 # By Ahmed Aboul-Ela - twitter.com/aboul3la
 # Optimized by Mohamed Naseem
 # modules in standard library
 import re
@ -75,9 +76,9 @@ def banner():
    print("""%s
                 ____        _     _ _     _   _____
                / ___| _   _| |__ | (_)___| |_|___ / _ __
-                \___ \| | | | '_ \| | / __| __| |_ \| '__|
+                \\\___ \\| | | | '_ \\| | / __| __| |_ \\| '__|
-                 ___) | |_| | |_) | | \__ \ |_ ___) | |
+                 ___) | |_| | |_) | | \\__ \\ |_ ___) | |
-                |____/ \__,_|_.__/|_|_|___/\__|____/|_|%s%s
+                |____/ \\__,_|_.__/|_|_|___/\\__|____/|_|%s%s
                # Coded By Ahmed Aboul-Ela - @aboul3la
    """ % (R, W, Y))
@ -97,9 +98,10 @@ def parse_args():
    parser._optionals.title = "OPTIONS"
    parser.add_argument('-d', '--domain', help="Domain name to enumerate it's subdomains", required=True)
    parser.add_argument('-b', '--bruteforce', help='Enable the subbrute bruteforce module', nargs='?', default=False)
    parser.add_argument('-f', '--fast', help='Enable fast scan mode (uses faster engines only)', action='store_true')
    parser.add_argument('-t', '--threads', help='Number of threads to use for subbrute bruteforce (default: 30)', default=30, type=int)
    parser.add_argument('-p', '--ports', help='Scan the found subdomains against specified tcp ports')
    parser.add_argument('-v', '--verbose', help='Enable Verbosity and display results in realtime', nargs='?', default=False)
    parser.add_argument('-t', '--threads', help='Number of threads to use for subbrute bruteforce', type=int, default=30)
    parser.add_argument('-e', '--engines', help='Specify a comma-separated list of search engines')
    parser.add_argument('-o', '--output', help='Save the results to text file')
    parser.add_argument('-n', '--no-color', help='Output without color', default=False, action='store_true')
@ -283,7 +285,7 @@ class GoogleEnum(enumratorBaseThreaded):
    def extract_domains(self, resp):
        links_list = list()
-        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
+        link_regx = re.compile('<cite.*?>(.*?)</cite>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
@ -300,7 +302,7 @@ class GoogleEnum(enumratorBaseThreaded):
        return links_list
    def check_response_errors(self, resp):
-        if (type(resp) is str or type(resp) is unicode) and 'Our systems have detected unusual traffic' in resp:
+        if isinstance(resp, str) and 'Our systems have detected unusual traffic' in resp:
            self.print_(R + "[!] Error: Google probably now is blocking our requests" + W)
            self.print_(R + "[~] Finished now the Google Enumeration ..." + W)
            return False
@ -340,7 +342,7 @@ class YahooEnum(enumratorBaseThreaded):
            links2 = link_regx2.findall(resp)
            links_list = links + links2
            for link in links_list:
-                link = re.sub("<(\/)?b>", "", link)
+                link = re.sub("<(/)?b>", "", link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
@ -436,7 +438,7 @@ class BingEnum(enumratorBaseThreaded):
            links_list = links + links2
            for link in links_list:
-                link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
+                link = re.sub('<(/)?strong>|<span.*?>|<|>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
@ -637,25 +639,82 @@ class DNSdumpster(enumratorBaseThreaded):
        return self.get_response(resp)
    def get_csrftoken(self, resp):
        try:
            # Try the old method first
            csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S)
-        token = csrf_regex.findall(resp)[0]
+            token = csrf_regex.findall(resp)
-        return token.strip()
+            if token:
                return token[0].strip()
            # Try alternate methods if the old one fails
            # Method 1: Look for csrf in cookies
            if 'csrftoken' in self.session.cookies:
                return self.session.cookies['csrftoken']
            # Method 2: Look for csrf in a meta tag
            csrf_regex = re.compile('<meta[^>]*name[^>]*csrf[^>]*content=["\']([^"\']*)["\']', re.I)
            token = csrf_regex.findall(resp)
            if token:
                return token[0].strip()
            # Method 3: Look for csrf in any input field
            csrf_regex = re.compile('name=["\']csrf[^"\']*["\'][^>]*value=["\']([^"\']*)["\']', re.I)
            token = csrf_regex.findall(resp)
            if token:
                return token[0].strip()
            return ""
        except Exception as e:
            if self.verbose:
                self.print_("%s\nError getting CSRF token: %s" % (R, str(e)))
            return ""
    def enumerate(self):
        self.lock = threading.BoundedSemaphore(value=70)
        try:
            # Get initial page and csrf token
            resp = self.req('GET', self.base_url)
            if not resp:
                return self.live_subdomains
            token = self.get_csrftoken(resp)
-        params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
+            if not token and self.verbose:
                self.print_("%sWarning: No CSRF token found, DNSdumpster might fail%s" % (Y, W))
            # Prepare headers and params for POST request
            params = {
                'targetip': self.domain,
                'csrfmiddlewaretoken': token
            }
            # Make the POST request
            post_resp = self.req('POST', self.base_url, params)
            if not post_resp:
                return self.live_subdomains
            # Extract and verify domains
            self.extract_domains(post_resp)
            # Create a thread pool instead of creating/joining threads one by one
            threads = []
            for subdomain in self.subdomains:
                t = threading.Thread(target=self.check_host, args=(subdomain,))
                threads.append(t)
                t.start()
-            t.join()
+                
            # Join all threads with a timeout
            for t in threads:
                t.join(timeout=10)
            return self.live_subdomains
        except Exception as e:
            if self.verbose:
                self.print_("%sError enumerating DNSdumpster: %s%s" % (R, str(e), W))
            return self.live_subdomains
    def extract_domains(self, resp):
-        tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>', re.S)
+        tbl_regex = re.compile('<a name="hostanchor"></a>Host Records.*?<table.*?>(.*?)</table>', re.S)
        link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
        links = []
        try:
@ -676,36 +735,96 @@ class DNSdumpster(enumratorBaseThreaded):
 class Virustotal(enumratorBaseThreaded):
    def __init__(self, domain, subdomains=None, q=None, silent=False, verbose=True):
        subdomains = subdomains or []
-        base_url = 'https://www.virustotal.com/ui/domains/{domain}/subdomains'
+        base_url = 'https://www.virustotal.com/api/v3/domains/{domain}/subdomains'
        self.engine_name = "Virustotal"
        self.q = q
        super(Virustotal, self).__init__(base_url, self.engine_name, domain, subdomains, q=q, silent=silent, verbose=verbose)
        self.url = self.base_url.format(domain=self.domain)
        # Update headers for API v3
        self.headers.update({
            'Accept': 'application/json',
            'x-apikey': os.getenv('VT_API_KEY', ''),  # Get API key from environment variable
            'x-tool': 'vt-ui-main',
            'Accept-Encoding': 'gzip, deflate',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
        })
        return
-    # the main send_req need to be rewritten
+    def handle_ratelimit(self, resp):
        """Handle rate limiting by implementing exponential backoff"""
        if resp.status_code == 429:  # Too Many Requests
            retry_after = int(resp.headers.get('Retry-After', 60))
            if self.verbose:
                self.print_("%sRate limited by VirusTotal. Waiting %d seconds...%s" % (Y, retry_after, W))
            time.sleep(retry_after)
            return True
        return False
    def send_req(self, url):
        max_retries = 3
        current_retry = 0
        while current_retry < max_retries:
            try:
                resp = self.session.get(url, headers=self.headers, timeout=self.timeout)
                if self.handle_ratelimit(resp):
                    current_retry += 1
                    continue
                if resp.status_code == 401:
                    if self.verbose:
                        self.print_("%sError: Invalid or missing VirusTotal API key. Set VT_API_KEY environment variable.%s" % (R, W))
                    return None
                if resp.status_code == 200:
                    return resp.text
            except Exception as e:
-            self.print_(e)
+                if self.verbose:
-            resp = None
+                    self.print_("%sError connecting to VirusTotal: %s%s" % (R, str(e), W))
                current_retry += 1
                time.sleep(2 ** current_retry)  # Exponential backoff
-        return self.get_response(resp)
+        return None
    # once the send_req is rewritten we don't need to call this function, the stock one should be ok
    def enumerate(self):
-        while self.url != '':
+        if not self.headers.get('x-apikey'):
-            resp = self.send_req(self.url)
+            if self.verbose:
-            resp = json.loads(resp)
+                self.print_("%sWarning: No VirusTotal API key found. Set VT_API_KEY environment variable.%s" % (Y, W))
-            if 'error' in resp:
+            return self.subdomains
-                self.print_(R + "[!] Error: Virustotal probably now is blocking our requests" + W)
+            
        cursor = ''
        while True:
            try:
                url = self.url
                if cursor:
                    url = f"{self.url}?cursor={cursor}"
                resp = self.send_req(url)
                if not resp:
                    break
-            if 'links' in resp and 'next' in resp['links']:
+                    
-                self.url = resp['links']['next']
+                resp_json = json.loads(resp)
-            else:
+                
-                self.url = ''
+                # Extract subdomains from response
-            self.extract_domains(resp)
+                if 'data' in resp_json:
                    for item in resp_json['data']:
                        subdomain = item.get('id', '')
                        if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                            self.subdomains.append(subdomain.strip())
                # Check for more pages
                cursor = resp_json.get('meta', {}).get('cursor', '')
                if not cursor:
                    break
            except Exception as e:
                if self.verbose:
                    self.print_("%sError processing VirusTotal response: %s%s" % (R, str(e), W))
                break
        return self.subdomains
    def extract_domains(self, resp):
--- a/test_output.txt
+++ b/test_output.txt
@ -0,0 +1,7 @@
 AS207960 Test Intermediate - example.com
 www.example.com
 dev.example.com
 m.example.com
 products.example.com
 support.example.com
 m.testexample.com