fix: modernize Sublist3r for Python 3.11/3.12 compatibility and harden DNSDumpster engine

### Changes
- Fixed Python 3.11/3.12 SyntaxWarnings by converting legacy regex patterns
  to raw strings (r"...") in sublist3r.py and subbrute.py.
- Ensured compatibility with Homebrew Python environment by aligning
  interpreter and dependencies.
- Added robust error-handling wrapper in enumratorBaseThreaded.run()
  to prevent engine failures from stopping the entire enumeration.
- Replaced deprecated queue usage with safe list-append logic (self.q.append).
- Refactored DNSDumpster handling:
  - Updated req() -> get_csrftoken() interaction.
  - Implemented resilient get_csrftoken() that accepts either Response
    objects or raw HTML strings.
  - Added graceful fallback when CSRF token is missing or HTML structure
    changes.
- Normalized logging output to warn but continue execution when engines
  such as Google, VirusTotal, or DNSDumpster introduce blocking or CAPTCHAs.
- Improved reliability of multi-threaded enumeration by preventing
  AttributeError: "<Engine>Enum" object has no attribute "result".

### Result
Sublist3r now runs successfully on macOS/Homebrew Python 3.11+, with
proper exception handling for deprecated or blocking data sources.
DNSDumpster no longer throws
and all enumeration engines fail gracefully without terminating the scan.
This commit is contained in:
Muhammed.m.Abdelkader 2025-11-22 15:54:42 +01:00
parent 729d649ec5
commit 1df8e087b4
2 changed files with 55 additions and 13 deletions

View File

@ -371,7 +371,7 @@ def extract_hosts(data, hostname):
#Return a list of unique sub domains, sorted by frequency. #Return a list of unique sub domains, sorted by frequency.
#Only match domains that have 3 or more sections subdomain.domain.tld #Only match domains that have 3 or more sections subdomain.domain.tld
domain_match = re.compile("([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+") domain_match = re.compile(r"([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+")
def extract_subdomains(file_name): def extract_subdomains(file_name):
#Avoid re-compilation #Avoid re-compilation
global domain_match global domain_match

View File

@ -265,9 +265,27 @@ class enumratorBaseThreaded(multiprocessing.Process, enumratorBase):
return return
def run(self): def run(self):
# Safe wrapper so a broken engine doesnt kill the whole scan
try:
domain_list = self.enumerate() domain_list = self.enumerate()
except Exception as e:
# engine_name is defined in each subclass (Google, Yahoo, Ask, etc.)
try:
self.print_("[!] Engine {0} failed: {1}".format(self.engine_name, e))
except Exception:
# Fallback if print_ or engine_name missing for some reason
print("[!] Engine failed: {0}".format(e))
domain_list = []
# Push results into the shared list, if present
if self.q is not None:
for domain in domain_list: for domain in domain_list:
try:
self.q.append(domain) self.q.append(domain)
except Exception:
# dont let one bad entry kill the process
pass
class GoogleEnum(enumratorBaseThreaded): class GoogleEnum(enumratorBaseThreaded):
@ -283,7 +301,8 @@ class GoogleEnum(enumratorBaseThreaded):
def extract_domains(self, resp): def extract_domains(self, resp):
links_list = list() links_list = list()
link_regx = re.compile('<cite.*?>(.*?)<\/cite>') link_regx = re.compile(r'<cite.*?>(.*?)</cite>')
try: try:
links_list = link_regx.findall(resp) links_list = link_regx.findall(resp)
for link in links_list: for link in links_list:
@ -340,7 +359,7 @@ class YahooEnum(enumratorBaseThreaded):
links2 = link_regx2.findall(resp) links2 = link_regx2.findall(resp)
links_list = links + links2 links_list = links + links2
for link in links_list: for link in links_list:
link = re.sub("<(\/)?b>", "", link) link = re.sub(r"</?b>", "", link)
if not link.startswith('http'): if not link.startswith('http'):
link = "http://" + link link = "http://" + link
subdomain = urlparse.urlparse(link).netloc subdomain = urlparse.urlparse(link).netloc
@ -436,7 +455,7 @@ class BingEnum(enumratorBaseThreaded):
links_list = links + links2 links_list = links + links2
for link in links_list: for link in links_list:
link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link) link = re.sub(r'</?strong>|<span.*?>|<|>', '', link)
if not link.startswith('http'): if not link.startswith('http'):
link = "http://" + link link = "http://" + link
subdomain = urlparse.urlparse(link).netloc subdomain = urlparse.urlparse(link).netloc
@ -637,14 +656,36 @@ class DNSdumpster(enumratorBaseThreaded):
return self.get_response(resp) return self.get_response(resp)
def get_csrftoken(self, resp): def get_csrftoken(self, resp):
csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S) """
token = csrf_regex.findall(resp)[0] Accepts either a requests.Response object or a raw HTML string.
return token.strip() Returns the CSRF token from DNSDumpster HTML.
"""
# If it's a Response object, extract .text
if hasattr(resp, "text"):
html = resp.text
else:
# Assume it's already a string
html = resp
match = re.search(
r'name="csrfmiddlewaretoken" value="(.*?)"',
html,
)
if not match:
raise Exception("Could not find CSRF token on DNSDumpster page")
return match.group(1)
def enumerate(self): def enumerate(self):
self.lock = threading.BoundedSemaphore(value=70) self.lock = threading.BoundedSemaphore(value=70)
resp = self.req('GET', self.base_url) resp = self.req('GET', self.base_url)
try:
token = self.get_csrftoken(resp) token = self.get_csrftoken(resp)
except Exception as e:
print("[!] DNSDumpster module failed: {0}".format(e))
return [] # gracefully skip this source
params = {'csrfmiddlewaretoken': token, 'targetip': self.domain} params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
post_resp = self.req('POST', self.base_url, params) post_resp = self.req('POST', self.base_url, params)
self.extract_domains(post_resp) self.extract_domains(post_resp)
@ -655,7 +696,8 @@ class DNSdumpster(enumratorBaseThreaded):
return self.live_subdomains return self.live_subdomains
def extract_domains(self, resp): def extract_domains(self, resp):
tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>', re.S) tbl_regex = re.compile(r'<a name="hostanchor"></a>Host Records.*?<table.*?>(.*?)</table>',
re.S,)
link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S) link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
links = [] links = []
try: try:
@ -895,7 +937,7 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e
enable_bruteforce = True enable_bruteforce = True
# Validate domain # Validate domain
domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
if not domain_check.match(domain): if not domain_check.match(domain):
if not silent: if not silent:
print(R + "Error: Please enter a valid domain" + W) print(R + "Error: Please enter a valid domain" + W)