fix: modernize Sublist3r for Python 3.11/3.12 compatibility and harden DNSDumpster engine
### Changes
- Fixed Python 3.11/3.12 SyntaxWarnings by converting legacy regex patterns
to raw strings (r"...") in sublist3r.py and subbrute.py.
- Ensured compatibility with Homebrew Python environment by aligning
interpreter and dependencies.
- Added robust error-handling wrapper in enumratorBaseThreaded.run()
to prevent engine failures from stopping the entire enumeration.
- Replaced deprecated queue usage with safe list-append logic (self.q.append).
- Refactored DNSDumpster handling:
- Updated req() -> get_csrftoken() interaction.
- Implemented resilient get_csrftoken() that accepts either Response
objects or raw HTML strings.
- Added graceful fallback when CSRF token is missing or HTML structure
changes.
- Normalized logging output to warn but continue execution when engines
such as Google, VirusTotal, or DNSDumpster introduce blocking or CAPTCHAs.
- Improved reliability of multi-threaded enumeration by preventing
AttributeError: "<Engine>Enum" object has no attribute "result".
### Result
Sublist3r now runs successfully on macOS/Homebrew Python 3.11+, with
proper exception handling for deprecated or blocking data sources.
DNSDumpster no longer throws
and all enumeration engines fail gracefully without terminating the scan.
This commit is contained in:
parent
729d649ec5
commit
1df8e087b4
|
|
@ -371,7 +371,7 @@ def extract_hosts(data, hostname):
|
||||||
|
|
||||||
#Return a list of unique sub domains, sorted by frequency.
|
#Return a list of unique sub domains, sorted by frequency.
|
||||||
#Only match domains that have 3 or more sections subdomain.domain.tld
|
#Only match domains that have 3 or more sections subdomain.domain.tld
|
||||||
domain_match = re.compile("([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+")
|
domain_match = re.compile(r"([a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*)+")
|
||||||
def extract_subdomains(file_name):
|
def extract_subdomains(file_name):
|
||||||
#Avoid re-compilation
|
#Avoid re-compilation
|
||||||
global domain_match
|
global domain_match
|
||||||
|
|
|
||||||
66
sublist3r.py
66
sublist3r.py
|
|
@ -265,9 +265,27 @@ class enumratorBaseThreaded(multiprocessing.Process, enumratorBase):
|
||||||
return
|
return
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
domain_list = self.enumerate()
|
# Safe wrapper so a broken engine doesn’t kill the whole scan
|
||||||
for domain in domain_list:
|
try:
|
||||||
self.q.append(domain)
|
domain_list = self.enumerate()
|
||||||
|
except Exception as e:
|
||||||
|
# engine_name is defined in each subclass (Google, Yahoo, Ask, etc.)
|
||||||
|
try:
|
||||||
|
self.print_("[!] Engine {0} failed: {1}".format(self.engine_name, e))
|
||||||
|
except Exception:
|
||||||
|
# Fallback if print_ or engine_name missing for some reason
|
||||||
|
print("[!] Engine failed: {0}".format(e))
|
||||||
|
domain_list = []
|
||||||
|
|
||||||
|
# Push results into the shared list, if present
|
||||||
|
if self.q is not None:
|
||||||
|
for domain in domain_list:
|
||||||
|
try:
|
||||||
|
self.q.append(domain)
|
||||||
|
except Exception:
|
||||||
|
# don’t let one bad entry kill the process
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleEnum(enumratorBaseThreaded):
|
class GoogleEnum(enumratorBaseThreaded):
|
||||||
|
|
@ -283,7 +301,8 @@ class GoogleEnum(enumratorBaseThreaded):
|
||||||
|
|
||||||
def extract_domains(self, resp):
|
def extract_domains(self, resp):
|
||||||
links_list = list()
|
links_list = list()
|
||||||
link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
|
link_regx = re.compile(r'<cite.*?>(.*?)</cite>')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
links_list = link_regx.findall(resp)
|
links_list = link_regx.findall(resp)
|
||||||
for link in links_list:
|
for link in links_list:
|
||||||
|
|
@ -340,7 +359,7 @@ class YahooEnum(enumratorBaseThreaded):
|
||||||
links2 = link_regx2.findall(resp)
|
links2 = link_regx2.findall(resp)
|
||||||
links_list = links + links2
|
links_list = links + links2
|
||||||
for link in links_list:
|
for link in links_list:
|
||||||
link = re.sub("<(\/)?b>", "", link)
|
link = re.sub(r"</?b>", "", link)
|
||||||
if not link.startswith('http'):
|
if not link.startswith('http'):
|
||||||
link = "http://" + link
|
link = "http://" + link
|
||||||
subdomain = urlparse.urlparse(link).netloc
|
subdomain = urlparse.urlparse(link).netloc
|
||||||
|
|
@ -436,7 +455,7 @@ class BingEnum(enumratorBaseThreaded):
|
||||||
links_list = links + links2
|
links_list = links + links2
|
||||||
|
|
||||||
for link in links_list:
|
for link in links_list:
|
||||||
link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
|
link = re.sub(r'</?strong>|<span.*?>|<|>', '', link)
|
||||||
if not link.startswith('http'):
|
if not link.startswith('http'):
|
||||||
link = "http://" + link
|
link = "http://" + link
|
||||||
subdomain = urlparse.urlparse(link).netloc
|
subdomain = urlparse.urlparse(link).netloc
|
||||||
|
|
@ -637,14 +656,36 @@ class DNSdumpster(enumratorBaseThreaded):
|
||||||
return self.get_response(resp)
|
return self.get_response(resp)
|
||||||
|
|
||||||
def get_csrftoken(self, resp):
|
def get_csrftoken(self, resp):
|
||||||
csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S)
|
"""
|
||||||
token = csrf_regex.findall(resp)[0]
|
Accepts either a requests.Response object or a raw HTML string.
|
||||||
return token.strip()
|
Returns the CSRF token from DNSDumpster HTML.
|
||||||
|
"""
|
||||||
|
# If it's a Response object, extract .text
|
||||||
|
if hasattr(resp, "text"):
|
||||||
|
html = resp.text
|
||||||
|
else:
|
||||||
|
# Assume it's already a string
|
||||||
|
html = resp
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'name="csrfmiddlewaretoken" value="(.*?)"',
|
||||||
|
html,
|
||||||
|
)
|
||||||
|
if not match:
|
||||||
|
raise Exception("Could not find CSRF token on DNSDumpster page")
|
||||||
|
return match.group(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def enumerate(self):
|
def enumerate(self):
|
||||||
self.lock = threading.BoundedSemaphore(value=70)
|
self.lock = threading.BoundedSemaphore(value=70)
|
||||||
resp = self.req('GET', self.base_url)
|
resp = self.req('GET', self.base_url)
|
||||||
token = self.get_csrftoken(resp)
|
try:
|
||||||
|
token = self.get_csrftoken(resp)
|
||||||
|
except Exception as e:
|
||||||
|
print("[!] DNSDumpster module failed: {0}".format(e))
|
||||||
|
return [] # gracefully skip this source
|
||||||
|
|
||||||
params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
|
params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
|
||||||
post_resp = self.req('POST', self.base_url, params)
|
post_resp = self.req('POST', self.base_url, params)
|
||||||
self.extract_domains(post_resp)
|
self.extract_domains(post_resp)
|
||||||
|
|
@ -655,7 +696,8 @@ class DNSdumpster(enumratorBaseThreaded):
|
||||||
return self.live_subdomains
|
return self.live_subdomains
|
||||||
|
|
||||||
def extract_domains(self, resp):
|
def extract_domains(self, resp):
|
||||||
tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>', re.S)
|
tbl_regex = re.compile(r'<a name="hostanchor"></a>Host Records.*?<table.*?>(.*?)</table>',
|
||||||
|
re.S,)
|
||||||
link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
|
link_regex = re.compile('<td class="col-md-4">(.*?)<br>', re.S)
|
||||||
links = []
|
links = []
|
||||||
try:
|
try:
|
||||||
|
|
@ -895,7 +937,7 @@ def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, e
|
||||||
enable_bruteforce = True
|
enable_bruteforce = True
|
||||||
|
|
||||||
# Validate domain
|
# Validate domain
|
||||||
domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
|
domain_check = re.compile(r"^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
|
||||||
if not domain_check.match(domain):
|
if not domain_check.match(domain):
|
||||||
if not silent:
|
if not silent:
|
||||||
print(R + "Error: Please enter a valid domain" + W)
|
print(R + "Error: Please enter a valid domain" + W)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue