#!/usr/bin/env python
# SubList3r v0.1
# By Ahmed Aboul-Ela - twitter.com/aboul3la
import re
import sys
import os
import argparse
import time
import requests
import urlparse
import urllib
import hashlib
import multiprocessing
import threading
import dns.resolver
from subbrute import subbrute
from collections import Counter

#Console Colors
G = '\033[92m' #green
Y = '\033[93m' #yellow
B = '\033[94m' #blue
R = '\033[91m' #red
W = '\033[0m'  #white

def banner():
    print """%s
                 ____        _     _ _     _   _____      
                / ___| _   _| |__ | (_)___| |_|___ / _ __ 
                \___ \| | | | '_ \| | / __| __| |_ \| '__|
                 ___) | |_| | |_) | | \__ \ |_ ___) | |   
                |____/ \__,_|_.__/|_|_|___/\__|____/|_|%s%s

    # Fast Subomains Enumeration tool using Search Engines and BruteForce
    # Coded By Ahmed Aboul-Ela - @aboul3la
    # Special Thanks to Ibrahim Mosaad - @ibrahim_mosaad for his contributions%s
    """%(R,W,Y,W)

def parser_error(errmsg):
        banner()
        print "Usage: python "+sys.argv[0]+" [Options] use -h for help"
        print R+"Error: "+errmsg+W
        sys.exit()
        
def parse_args():
        #parse the arguments 
        parser = argparse.ArgumentParser(epilog='\tExample: \r\npython '+sys.argv[0]+" -d google.com")
        parser.error = parser_error
        parser._optionals.title = "OPTIONS"        
        parser.add_argument('-d','--domain', help='Domain name to enumrate it\'s subdomains', required=True)
        parser.add_argument('-b','--bruteforce', help='Enable the subbrute bruteforce module', nargs='?', default=False)
        parser.add_argument('-v','--verbose', help='Enable Verbosity and display results in realtime', nargs='?', default=False)
        parser.add_argument('-t','--threads', help='Number of threads to use for subbrute bruteforce', type=int, default=10)
        parser.add_argument('-o','--output', help='Save the results to text file') 
        return parser.parse_args()

def write_file(filename,subdomains):
	#saving subdomains results to output file
    print "%s[-] Saving results to file: %s%s%s%s"%(Y,W,R,filename,W)
    f = open(str(filename),'wb')
    for subdomain in subdomains:
        f.write(subdomain+"\r\n")
    f.close()   

class enumratorBase(object):
    def __init__(self, base_url, engine_name, domain , subdomains=[]):
        self.domain = urlparse.urlparse(domain).netloc
        self.session=requests.Session()
        self.subdomains = []
        self.timeout = 10
        self.base_url = base_url
        self.engine_name = engine_name
        self.print_banner()

    def print_banner(self):
        """ subclass can override this if they want a fancy banner :)"""
        print G+"[-] Searching now in %s.." %(self.engine_name)+W
        return 

    def send_req(self, query, page_no=1):

        headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-GB,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive'
        }          
        
        url =  self.base_url.format(query=query, page_no=page_no)
        try:
            resp = self.session.get(url, headers=headers, timeout=self.timeout)
        except Exception as e:
            print e
            raise
        return resp.text
    
    def check_max_subdomains(self,count):
        if self.MAX_DOMAINS == 0:
            return False
        if count >= self.MAX_DOMAINS:
            return True
        else:
            return False

    def check_max_pages(self,num):
        if self.MAX_PAGES == 0:
            return False
        if num >= self.MAX_PAGES:
            return True
        else:
            return False                      

    #Override 
    def extract_domains(self, resp):
        """ chlid class should override this function """
        return

    #override
    def check_response_errors(self,resp):
        """ chlid class should override this function 
        The function should return True if there are no errors and False otherwise
        """
        return True

    def should_sleep(self):
        """Some enumrators require sleeping to avoid bot detections like Google enumerator"""
        return

    def generate_query(self):
        """ chlid class should override this function """
        return


    def get_page(self,num):
    	""" chlid class that user different pagnation counter should override this function """
        return num+10

    def enumerate(self,altquery=False):
        flag = True
        page_no = 0
        prev_links =[]
        prev_subdomains = []
        retries = 0

        while flag:
            query = self.generate_query()
            count = query.count(self.domain) #finding the number of subdomains found so far           
            
            #if they we reached the maximum number of subdomains in search query then we should go over the pages
            if self.check_max_subdomains(count): 
                page_no= self.get_page(page_no)


            if self.check_max_pages(page_no): #maximum pages for Google to avoid getting blocked 
                return self.subdomains
            
            resp = self.send_req(query, page_no)

            #check if there is any error occured
            if not self.check_response_errors(resp):
                return self.subdomains

            links = self.extract_domains(resp)
        
        #if the previous page hyperlinks was the similar to the current one, then maybe we have reached the last page
            if links == prev_links:
                retries+=1
                page_no= self.get_page(page_no)

        #make another retry maybe it isn't the last page 
                if retries >= 3:
                    return self.subdomains

            prev_links = links
            self.should_sleep()

        return self.subdomains


class enumratorBaseThreaded(multiprocessing.Process, enumratorBase):
    def __init__(self, base_url, engine_name, domain , subdomains=[], q=None,lock=threading.Lock()):
        enumratorBase.__init__(self, base_url, engine_name, domain, subdomains)
        multiprocessing.Process.__init__(self)
        self.lock = lock
        self.q=q
        return

    def run(self):
        domain_list = self.enumerate()
        self.q.put(domain_list)


class GoogleEnum(enumratorBaseThreaded):
    def __init__(self, domain , subdomains=[], q=None):
        base_url = "https://google.com/search?q={query}&btnG=Search&hl=en-US&biw=&bih=&gbv=1&start={page_no}&filter=0"
        self.engine_name="Google"
        self.MAX_DOMAINS  = 11
        self.MAX_PAGES = 200
        super(GoogleEnum, self).__init__(base_url, self.engine_name,domain, subdomains,q=q)
        self.q=q
        return

    def extract_domains(self, resp):
        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                link = re.sub('<span.*>','',link)
                if not link.startswith('http'):
                    link="http://"+link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain not in self.subdomains and subdomain != self.domain and subdomain != '':
                    if verbose:
                        print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                    self.subdomains.append(subdomain)
        except Exception as e:
            pass
        return links_list 

    def check_response_errors(self, resp):
        if 'Our systems have detected unusual traffic' in resp:
            print R+"Error: Google now probably is blocking our requests"+W
            print R+"Exiting..."+W
            return False
        return True


    def should_sleep(self):
        time.sleep(5)
        return

    def generate_query(self):
        if len(self.subdomains) > 0:
            query = "site:{domain} -www.{domain} -{found}".format(domain=self.domain, found=' -'.join(self.subdomains[:self.MAX_DOMAINS-2]))
        else:
            query = "site:{domain} -www.{domain}".format(domain=self.domain)
        return query        

class YahooEnum(enumratorBaseThreaded):
    def __init__(self, domain , subdomains=[], q=None):
        base_url = "https://search.yahoo.com/search?p={query}&b={page_no}"
        self.engine_name="Yahoo"
        self.MAX_DOMAINS  = 10
        self.MAX_PAGES = 0
        super(YahooEnum, self).__init__(base_url, self.engine_name,domain, subdomains, q=q)
        self.q=q
        return

    def extract_domains(self, resp):
        link_regx2 = re.compile('<span class=" fz-15px fw-m fc-12th wr-bw">(.*?)</span>')
        link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links+links2
            for link in links_list:
                link = re.sub("<(\/)?b>","",link)

                if not link.startswith('http'):
                    link="http://"+link

                subdomain = urlparse.urlparse(link).netloc
                
                if not subdomain.endswith(self.domain):
                    continue

                if subdomain not in self.subdomains and subdomain != self.domain and subdomain != self.domain and subdomain != '':
                    if verbose:
                        print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                    self.subdomains.append(subdomain)
        except Exception as e:
            pass

        return links_list 

    def should_sleep(self):
        return

    def get_page(self,num):
        return num+10
    
    def generate_query(self):
        if len(self.subdomains) > 0:
            query = "site:{domain} -domain:www.{domain} -domain:{found}".format(domain=self.domain, found=' -domain:'.join(self.subdomains[:77]))
        else:
            query = "site:{domain}".format(domain=self.domain)
        return query

class AskEnum(enumratorBaseThreaded):
    def __init__(self, domain , subdomains=[], q=None):
        base_url = 'http://www.ask.com/web?q={query}&page={page_no}&qid=8D6EE6BF52E0C04527E51F64F22C4534&o=0&l=dir&qsrc=998&qo=pagination'
        self.engine_name="Ask"
        self.MAX_DOMAINS  = 11
        self.MAX_PAGES = 0
        enumratorBaseThreaded.__init__(self, base_url, self.engine_name,domain, subdomains, q=q)
        self.q=q
        return

    def extract_domains(self, resp):
        link_regx = re.compile('<p class="web-result-url">(.*?)</p>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                if not link.startswith('http'):
                    link="http://"+link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if verbose:
                        print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                    self.subdomains.append(subdomain) 
        except Exception as e:
            pass

        return links_list

    def get_page(self,num):
        return num+1

    def generate_query(self):
        if len(self.subdomains) > 0:
            query = "site:{domain} -www.{domain} -{found}".format(domain=self.domain, found=' -'.join(self.subdomains[:self.MAX_DOMAINS]))
        else:
            query = "site:{domain} -www.{domain}".format(domain=self.domain)

        return query

class BingEnum(enumratorBaseThreaded):
    def __init__(self, domain , subdomains=[], q=None):
        base_url = 'https://www.bing.com/search?q={query}&go=Submit&first={page_no}'
        self.engine_name="Bing"
        self.MAX_DOMAINS  = 30
        self.MAX_PAGES = 0
        enumratorBaseThreaded.__init__(self, base_url, self.engine_name,domain, subdomains,q=q)
        self.q=q
        return

    def extract_domains(self, resp):
        link_regx = re.compile('<li class="b_algo"><h2><a href="(.*?)"')
        link_regx2 = re.compile('<div class="b_title"><h2><a href="(.*?)"')
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links+links2

            for link in links_list:
                link = re.sub('<(\/)?strong>|<span.*?>|<|>','',link)
                if not link.startswith('http'):
                    link="http://"+link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if verbose:
                        print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                    self.subdomains.append(subdomain)

        except Exception as e:
        	pass

        return links_list

    def generate_query(self):
        if len(self.subdomains) > 0:
            query = "domain:{domain} -www.{domain} -{found}".format(domain=self.domain, found=' -'.join(self.subdomains[:self.MAX_DOMAINS]))
        else:
            query = "domain:{domain} -www.{domain}".format(domain=self.domain)
        return query        


class BaiduEnum(enumratorBaseThreaded):
    def __init__(self, domain , subdomains=[], q=None):
        base_url = 'http://www.baidu.com/s?pn={page_no}&wd={query}'
        self.engine_name="Baidu"
        self.MAX_DOMAINS  = 2
        self.MAX_PAGES = 760
        enumratorBaseThreaded.__init__(self, base_url, self.engine_name,domain, subdomains, q=q)
        self.querydomain = self.domain
        self.q=q
        return

    def extract_domains(self, resp):
        found_newdomain = False
        subdomain_list = []
        link_regx = re.compile('<a.*?class="c-showurl".*?>(.*?)</a>')
        try:
            links = link_regx.findall(resp)
            for link in links:
                link = re.sub('<.*?>|>|<|&nbsp;','',link)
                if not link.startswith('http'):
                    link="http://"+link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain.endswith(self.domain):
                    subdomain_list.append(subdomain)
                    if subdomain not in self.subdomains and subdomain != self.domain:
                        found_newdomain = True
                        if verbose:
                            print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                        self.subdomains.append(subdomain)
        except Exception as e:
            pass

        if not found_newdomain and len(subdomain_list) != 0:
            self.querydomain = self.findsubs(subdomain_list)
        return links

    def findsubs(self,subdomains):
        count = Counter(subdomains)
        subdomain1 = max(count, key=count.get)
        count.pop(subdomain1,"None")

        if len(count) > 0:
            subdomain2 = max(count, key=count.get)
        else:
            subdomain2 = '' 

        return (subdomain1,subdomain2)


    def check_response_errors(self, resp):
        return True

    def should_sleep(self):
        return

    def generate_query(self):
        if len(self.subdomains) > 0 and self.querydomain != self.domain:
            query = "site:{domain} -site:{found} ".format(domain=self.domain, found=' -site:'.join(self.querydomain))
        else:
            query = "site:{domain}".format(domain=self.domain)

        return query

class NetcraftEnum(multiprocessing.Process):
    def __init__(self, domain , subdomains=[], q=None,lock=threading.Lock()):
        self.base_url = 'http://searchdns.netcraft.com/?restriction=site+ends+with&host={domain}'
        self.domain = urlparse.urlparse(domain).netloc
        self.subdomains = []
        self.session=requests.Session()
        self.engine_name="Netcraft"
        multiprocessing.Process.__init__(self)
        self.lock = lock
        self.q=q
        self.timeout = 10
        self.print_banner()
        return

    def run(self):
        domain_list = self.enumerate()
        self.q.put(domain_list)
        return

    def print_banner(self):
        print G+"[-] Searching now in %s.." %(self.engine_name)+W
        return 

    def req(self,url,cookies=dict()):
        headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/40.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-GB,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        }          
        try:
            resp = self.session.get(url, headers=headers, timeout=self.timeout,cookies=cookies)
        except Exception as e:
            print e
            raise
        return resp


    def get_next(self,resp):
        link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
        link = link_regx.findall(resp)
        link = re.sub('host=.*?%s'%self.domain,'host=%s'%self.domain,link[0])
        url = 'http://searchdns.netcraft.com'+link
        return url


    def create_cookies(self,cookie):
        cookies = dict()
        cookies_list = cookie[0:cookie.find(';')].split("=")
        cookies[cookies_list[0]] = cookies_list[1]
        cookies['netcraft_js_verification_response'] = hashlib.sha1(urllib.unquote(cookies_list[1])).hexdigest()
        return cookies
        
    
    def enumerate(self):
        start_url = self.base_url.format(domain='example.com')
        resp = self.req(start_url)
        cookies = self.create_cookies(resp.headers['set-cookie'])
        url = self.base_url.format(domain=self.domain)
        while True:
            resp = self.req(url,cookies).text
            self.extract_domains(resp)
            if not 'Next page' in resp:
                return self.subdomains
                break
            url = self.get_next(resp)
    
    def extract_domains(self, resp):
        link_regx = re.compile('<a href="http://toolbar.netcraft.com/site_report\?url=(.*)">')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                subdomain = urlparse.urlparse(link).netloc
                if not subdomain.endswith(self.domain):
                    continue
                if subdomain not in self.subdomains and subdomain != self.domain and subdomain != '':
                    if verbose:
                        print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                    self.subdomains.append(subdomain)
        except Exception as e:
            pass
        return links_list


class DNSdumpster(multiprocessing.Process):
    def __init__(self, domain , subdomains=[], q=None,lock=threading.Lock()):
        self.base_url = 'https://dnsdumpster.com/'
        self.domain = urlparse.urlparse(domain).netloc
        self.subdomains = []
        self.session=requests.Session()
        self.engine_name="DNSdumpster"
        multiprocessing.Process.__init__(self)
        self.lock = lock
        self.q=q
        self.timeout = 10
        self.print_banner()
        return

    def run(self):
        domain_list = self.enumerate()
        self.q.put(domain_list)
        return

    def print_banner(self):
        print G+"[-] Searching now in %s.." %(self.engine_name)+W
        return 

    def check_host(self,host):
        is_valid = False
        Resolver = dns.resolver.Resolver()
        Resolver.nameservers = ['8.8.8.8', '8.8.4.4']
        try:
            ip = Resolver.query(host, 'A')[0].to_text()
            if ip != '' and ip is not None:
                is_valid = True
        except:
            pass
        return is_valid

    def req(self,req_method,url,params=dict()):

        headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/40.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-GB,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Referer':'https://dnsdumpster.com'
        }     

        try:
            if req_method == 'GET':
                resp = self.session.get(url, headers=headers, timeout=self.timeout)
            else:
                resp = self.session.post(url,data=params,headers=headers,timeout=self.timeout)

        except Exception as e:
            print e
            raise
        return resp.text


    def get_csrftoken(self,resp):
        csrf_regex = re.compile("<input type='hidden' name='csrfmiddlewaretoken' value='(.*?)' />",re.S)
        token = csrf_regex.findall(resp)[0]
        return token.strip()
        
    def enumerate(self):
        resp = self.req('GET',self.base_url)
        token = self.get_csrftoken(resp)
        params = {'csrfmiddlewaretoken':token,'targetip':self.domain}
        post_resp = self.req('POST',self.base_url,params)
        self.extract_domains(post_resp)
        return self.subdomains
    
    def extract_domains(self, resp):
        tbl_regex = re.compile('<a name="hostanchor"><\/a>Host Records.*?<table.*?>(.*?)</table>',re.S)
        link_regex = re.compile('<td class="col-md-4">(.*?)<br>',re.S)
        links = []
        results_tbl = tbl_regex.findall(resp)[0]
        links_list = link_regex.findall(results_tbl)
        links = list(set(links_list))
        for link in links:
            subdomain = link.strip()
            if not subdomain.endswith(self.domain):
                continue
            if self.check_host(subdomain) and subdomain not in self.subdomains and subdomain != self.domain and subdomain != '':
                if verbose:
                    print "%s%s: %s%s"%(R,self.engine_name,W,subdomain)
                self.subdomains.append(subdomain)
        return links
        

def main():
    args = parse_args()
    domain = args.domain
    threads = args.threads
    savefile = args.output
    google_list = []
    bing_list = []
    baidu_list = []
    bruteforce_list = set()
    subdomains_queue = multiprocessing.Queue()
    
    #Check Verbosity
    global verbose
    verbose = args.verbose
    if verbose or verbose is None:
        verbose = True

    #Check Bruteforce Status
    enable_bruteforce = args.bruteforce
    if enable_bruteforce or enable_bruteforce is None:
        enable_bruteforce = True

    #Validate domain
    domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}$")
    if not domain_check.match(domain):
        print R+"Error: Please enter a valid domain"+W
        sys.exit()

    if not domain.startswith('http://') or not domain.startswith('https://'):
            domain = 'http://'+domain

    #Print the Banner
    banner()
    parsed_domain = urlparse.urlparse(domain)
    
    print B+"[-] Enumerating subdomains now for %s"%parsed_domain.netloc+W
    
    if verbose:
        print Y+"[-] verbosity is enabled, will show the subdomains results in realtime"+W
    
    #Start the engines enumeration
    enum_baidu = BaiduEnum(domain, verbose,q=subdomains_queue)
    enum_yahoo = YahooEnum(domain, verbose,q=subdomains_queue)
    enum_google = GoogleEnum(domain, verbose, q=subdomains_queue)
    enum_bing  = BingEnum(domain, verbose, q=subdomains_queue)
    enum_ask = AskEnum(domain, verbose, q=subdomains_queue)
    enum_netcraft = NetcraftEnum(domain, verbose, q=subdomains_queue)
    enum_dnsdumpester = DNSdumpster(domain, verbose, q=subdomains_queue)

    enum_baidu.start()
    enum_yahoo.start()
    enum_google.start()
    enum_bing.start()
    enum_ask.start()
    enum_netcraft.start()
    enum_dnsdumpester.start()

    enum_baidu.join()
    enum_yahoo.join()
    enum_google.join()
    enum_bing.join()
    enum_ask.join()
    enum_netcraft.join()
    enum_dnsdumpester.join()

    search_list = set()
    
    while not subdomains_queue.empty():
        search_list= search_list.union(subdomains_queue.get())

    if enable_bruteforce:
        print G+"[-] Starting bruteforce module now using subbrute.."+W 
        record_type = False 
        subs = os.path.join(os.path.dirname(os.path.realpath(__file__)), "subbrute/names.txt")
        resolvers=os.path.join(os.path.dirname(os.path.realpath(__file__)) ,"subbrute/resolvers.txt")
        process_count=threads
        output = False 
        json_output=False
        bruteforce_list = subbrute.print_target(parsed_domain.netloc, record_type, subs, resolvers, process_count, output, json_output, search_list,verbose)
        
    subdomains = search_list.union(bruteforce_list)

    if len(subdomains) > 0:
        if savefile is not None:
            write_file(savefile,subdomains)
        print Y+"[-] Total Unique Subdomains Found: %s"%len(subdomains)+W           
        for subdomain in subdomains:
            print G+subdomain+W

if __name__=="__main__":
    main()