diff --git a/.gitignore b/.gitignore index 0d20b64..d968708 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ *.pyc +build/ +dist/ +Sublist3r.egg-info/ diff --git a/subbrute/subbrute.py b/subbrute/subbrute.py index c7b1ab6..411b7cc 100644 --- a/subbrute/subbrute.py +++ b/subbrute/subbrute.py @@ -371,7 +371,7 @@ def extract_hosts(data, hostname): #Return a list of unique sub domains, sorted by frequency. #Only match domains that have 3 or more sections subdomain.domain.tld -domain_match = re.compile("([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+") +domain_match = re.compile(r"([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+") def extract_subdomains(file_name): #Avoid re-compilation global domain_match diff --git a/sublist3r.py b/sublist3r.py index 760e5ce..50f8ac5 100755 --- a/sublist3r.py +++ b/sublist3r.py @@ -72,7 +72,7 @@ def no_color(): def banner(): - print("""%s + print(r"""%s ____ _ _ _ _ _____ / ___| _ _| |__ | (_)___| |_|___ / _ __ \___ \| | | | '_ \| | / __| __| |_ \| '__| @@ -283,7 +283,7 @@ class GoogleEnum(enumratorBaseThreaded): def extract_domains(self, resp): links_list = list() - link_regx = re.compile('(.*?)<\/cite>') + link_regx = re.compile('(.*?)') try: links_list = link_regx.findall(resp) for link in links_list: @@ -340,7 +340,7 @@ class YahooEnum(enumratorBaseThreaded): links2 = link_regx2.findall(resp) links_list = links + links2 for link in links_list: - link = re.sub("<(\/)?b>", "", link) + link = re.sub("<(/)?b>", "", link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -436,7 +436,7 @@ class BingEnum(enumratorBaseThreaded): links_list = links + links2 for link in links_list: - link = re.sub('<(\/)?strong>||<|>', '', link) + link = re.sub('<(/)?strong>||<|>', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc @@ -638,13 +638,31 @@ class DNSdumpster(enumratorBaseThreaded): def get_csrftoken(self, resp): csrf_regex = re.compile('', re.S) - token = csrf_regex.findall(resp)[0] - return token.strip() + try: + token = csrf_regex.findall(resp)[0] + return token.strip() + except (IndexError, TypeError): + # Try alternative CSRF token patterns + alt_csrf_regex = re.compile(']*name=["\']csrfmiddlewaretoken["\'][^>]*value=["\'](.*?)["\']', re.S) + try: + token = alt_csrf_regex.findall(resp)[0] + return token.strip() + except (IndexError, TypeError): + if self.verbose: + self.print_("%s%s: Could not extract CSRF token" % (R, self.engine_name)) + return "" def enumerate(self): self.lock = threading.BoundedSemaphore(value=70) resp = self.req('GET', self.base_url) token = self.get_csrftoken(resp) + + # If no token found, skip DNSdumpster enumeration + if not token: + if self.verbose: + self.print_("%s%s: Skipping enumeration due to missing CSRF token" % (R, self.engine_name)) + return self.live_subdomains + params = {'csrfmiddlewaretoken': token, 'targetip': self.domain} post_resp = self.req('POST', self.base_url, params) self.extract_domains(post_resp) @@ -655,7 +673,7 @@ class DNSdumpster(enumratorBaseThreaded): return self.live_subdomains def extract_domains(self, resp): - tbl_regex = re.compile('<\/a>Host Records.*?(.*?)', re.S) + tbl_regex = re.compile('Host Records.*?(.*?)', re.S) link_regex = re.compile('(.*?)
', re.S) links = [] try: @@ -895,7 +913,7 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e enable_bruteforce = True # Validate domain - domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") + domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") if not domain_check.match(domain): if not silent: print(R + "Error: Please enter a valid domain" + W)