From 1ea535a23c1c4be225042a25b60ebdfb811c8fad Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Thu, 14 Jan 2021 13:19:29 -0500 Subject: [PATCH 01/15] - Modify code to work with Python 3 - remove requesocks hard dependency - Ran code through Black linter/formater - Modify to use Geckodriver by default, PhantomJS seemed defunct --- Dockerfile | 6 +- httpscreenshot.py | 1235 +++++++++++++++++++++++---------------- install-dependencies.sh | 29 +- requirements.txt | 2 +- 4 files changed, 731 insertions(+), 541 deletions(-) diff --git a/Dockerfile b/Dockerfile index cbd71ed..f4c2655 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # docker pull andmyhacks/httpscreenshot -FROM ubuntu:latest +FROM ubuntu:20.04 -MAINTAINER Keith Hoodlet +MAINTAINER Jesse Osiecki RUN mkdir -p /etc/httpscreenshot WORKDIR /etc/httpscreenshot @@ -10,7 +10,7 @@ WORKDIR /etc/httpscreenshot COPY . /etc/httpscreenshot/ RUN apt-get update -RUN apt-get install -y wget libfontconfig vim +RUN apt-get install -y wget libfontconfig RUN ./install-dependencies.sh diff --git a/httpscreenshot.py b/httpscreenshot.py index 539c5a4..b3afbef 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -1,21 +1,20 @@ -#!/usr/bin/python - -''' +#!/usr/bin/python3 +""" Installation on Ubuntu: apt-get install python-requests python-m2crypto phantomjs If you run into: 'module' object has no attribute 'PhantomJS' then pip install selenium (or pip install --upgrade selenium) -''' +""" from selenium import webdriver -from urlparse import urlparse -from random import shuffle -from PIL import Image -from PIL import ImageDraw -from PIL import ImageFont +from urllib.parse import urlparse +from random import shuffle +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont from libnmap.parser import NmapParser import multiprocessing -import Queue +import queue import argparse import sys import traceback @@ -27,24 +26,22 @@ import signal import shutil import hashlib +from importlib import reload from pyvirtualdisplay import Display from selenium.webdriver.common.desired_capabilities import DesiredCapabilities - - try: from urllib.parse import quote -except: - from urllib import quote +except Exception: + from urllib.parse import quote try: - import requesocks as requests -except: - print "requesocks library not found - proxy support will not be available" - import requests + import requesocks as requests +except Exception: + print("requesocks library not found - proxy support will not be available") + import requests reload(sys) -sys.setdefaultencoding("utf8") def timeoutFn(func, args=(), kwargs={}, timeout_duration=1, default=None): @@ -70,515 +67,731 @@ def handler(signum, frame): def addUrlsForService(host, urlList, servicesList, scheme): - if(servicesList == None or servicesList == []): - return - for service in servicesList: - state = service.findPreviousSibling("state") - if(state != None and state != [] and state['state'] == 'open'): - urlList.append(scheme+host+':'+str(service.parent['portid'])) + if servicesList == None or servicesList == []: + return + for service in servicesList: + state = service.findPreviousSibling("state") + if state != None and state != [] and state["state"] == "open": + urlList.append(scheme + host + ":" + str(service.parent["portid"])) def detectFileType(inFile): - #Check to see if file is of type gnmap - firstLine = inFile.readline() - secondLine = inFile.readline() - thirdLine = inFile.readline() - - #Be polite and reset the file pointer - inFile.seek(0) - - if ((firstLine.find('nmap') != -1 or firstLine.find('Masscan') != -1) and thirdLine.find('Host:') != -1): - #Looks like a gnmap file - this wont be true for other nmap output types - #Check to see if -sV flag was used, if not, warn - if(firstLine.find('-sV') != -1 or firstLine.find('-A') != -1): - return 'gnmap' - else: - print("Nmap version detection not used! Discovery module may miss some hosts!") - return 'gnmap' - elif ((firstLine.find('xml version') != -1) and secondLine.find('DOCTYPE nmaprun') != -1): - return 'xml' - else: - return None + # Check to see if file is of type gnmap + firstLine = inFile.readline() + secondLine = inFile.readline() + thirdLine = inFile.readline() + + # Be polite and reset the file pointer + inFile.seek(0) + + if (firstLine.find("nmap") != -1 or + firstLine.find("Masscan") != -1) and thirdLine.find("Host:") != -1: + # Looks like a gnmap file - this wont be true for other nmap output types + # Check to see if -sV flag was used, if not, warn + if firstLine.find("-sV") != -1 or firstLine.find("-A") != -1: + return "gnmap" + else: + print( + "Nmap version detection not used! Discovery module may miss some hosts!" + ) + return "gnmap" + elif (firstLine.find("xml version") != + -1) and secondLine.find("DOCTYPE nmaprun") != -1: + return "xml" + else: + return None + def parsexml(inFile): - targets = {} - infile = NmapParser.parse_fromfile(args.input) - for host in infile.hosts: - if host.services: - currentTarget = [] - for s in host.services: - if s.state != 'closed' and 'http' in s.service: - ip = host.address - port = str(s.port) - https = False - if 'https' in s.service or 'ssl' in s.service: - https = True - - currentTarget.append([port,https]) - - if(len(currentTarget) > 0): - targets[ip] = currentTarget - - return targets - print "Parsing is complete, continue on..." + targets = {} + infile = NmapParser.parse_fromfile(args.input) + for host in infile.hosts: + if host.services: + currentTarget = [] + for s in host.services: + if s.state != "closed" and "http" in s.service: + ip = host.address + port = str(s.port) + https = False + if "https" in s.service or "ssl" in s.service: + https = True + + currentTarget.append([port, https]) + + if len(currentTarget) > 0: + targets[ip] = currentTarget + + return targets + print("Parsing is complete, continue on...") + def parseGnmap(inFile, autodetect): - ''' - Parse a gnmap file into a dictionary. The dictionary key is the ip address or hostname. - Each key item is a list of ports and whether or not that port is https/ssl. For example: - >>> targets - {'127.0.0.1': [[443, True], [8080, False]]} - ''' - targets = {} - for hostLine in inFile: - if hostLine.strip() == '': - break - currentTarget = [] - #Pull out the IP address (or hostnames) and HTTP service ports - fields = hostLine.split(' ') - ip = fields[1] #not going to regex match this with ip address b/c could be a hostname - for item in fields: - #Make sure we have an open port with an http type service on it - if (item.find('http') != -1 or autodetect) and re.findall('\d+/open',item): - port = None - https = False - ''' - nmap has a bunch of ways to list HTTP like services, for example: - 8089/open/tcp//ssl|http - 8000/closed/tcp//http-alt/// - 8008/closed/tcp//http/// - 8080/closed/tcp//http-proxy// - 443/open/tcp//ssl|https?/// - 8089/open/tcp//ssl|http - Since we want to detect them all, let's just match on the word http - and make special cases for things containing https and ssl when we - construct the URLs. - ''' - port = item.split('/')[0] - - if item.find('https') != -1 or item.find('ssl') != -1: - https = True - #Add the current service item to the currentTarget list for this host - currentTarget.append([port,https]) - - if(len(currentTarget) > 0): - targets[ip] = currentTarget - return targets - - -def setupBrowserProfile(headless,proxy): - browser = None - if(proxy is not None): - service_args=['--ignore-ssl-errors=true','--ssl-protocol=any','--proxy='+proxy,'--proxy-type=socks5'] - else: - service_args=['--ignore-ssl-errors=true','--ssl-protocol=any'] - - while(browser is None): - try: - if(not headless): - capabilities = DesiredCapabilities.FIREFOX - capabilities['acceptSslCerts'] = True - fp = webdriver.FirefoxProfile() - fp.set_preference("webdriver.accept.untrusted.certs",True) - fp.set_preference("security.enable_java", False) - fp.set_preference("webdriver.load.strategy", "fast"); - if(proxy is not None): - proxyItems = proxy.split(":") - fp.set_preference("network.proxy.socks",proxyItems[0]) - fp.set_preference("network.proxy.socks_port",int(proxyItems[1])) - fp.set_preference("network.proxy.type",1) - browser = webdriver.Firefox(firefox_profile=fp,capabilities=capabilities) - else: - browser = webdriver.PhantomJS(service_args=service_args, executable_path="phantomjs") - browser.set_window_size(1024, 768) - - except Exception as e: - print e - time.sleep(1) - continue - return browser + """ + Parse a gnmap file into a dictionary. The dictionary key is the ip address or hostname. + Each key item is a list of ports and whether or not that port is https/ssl. For example: + >>> targets + {'127.0.0.1': [[443, True], [8080, False]]} + """ + targets = {} + for hostLine in inFile: + if hostLine.strip() == "": + break + currentTarget = [] + # Pull out the IP address (or hostnames) and HTTP service ports + fields = hostLine.split(" ") + ip = fields[ + 1] # not going to regex match this with ip address b/c could be a hostname + for item in fields: + # Make sure we have an open port with an http type service on it + if (item.find("http") != -1 or autodetect) and re.findall( + "\d+/open", item): + port = None + https = False + """ + nmap has a bunch of ways to list HTTP like services, for example: + 8089/open/tcp//ssl|http + 8000/closed/tcp//http-alt/// + 8008/closed/tcp//http/// + 8080/closed/tcp//http-proxy// + 443/open/tcp//ssl|https?/// + 8089/open/tcp//ssl|http + Since we want to detect them all, let's just match on the word http + and make special cases for things containing https and ssl when we + construct the URLs. + """ + port = item.split("/")[0] + + if item.find("https") != -1 or item.find("ssl") != -1: + https = True + # Add the current service item to the currentTarget list for this host + currentTarget.append([port, https]) + + if len(currentTarget) > 0: + targets[ip] = currentTarget + return targets + + +def setupBrowserProfile(headless, proxy): + browser = None + if proxy is not None: + service_args = [ + "--ignore-ssl-errors=true", + "--ssl-protocol=any", + "--proxy=" + proxy, + "--proxy-type=socks5", + ] + else: + service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"] + + while browser is None: + try: + if not headless: + capabilities = DesiredCapabilities.FIREFOX + capabilities["acceptSslCerts"] = True + fp = webdriver.FirefoxProfile() + fp.set_preference("webdriver.accept.untrusted.certs", True) + fp.set_preference("security.enable_java", False) + fp.set_preference("webdriver.load.strategy", "fast") + if proxy is not None: + proxyItems = proxy.split(":") + fp.set_preference("network.proxy.socks", proxyItems[0]) + fp.set_preference("network.proxy.socks_port", + int(proxyItems[1])) + fp.set_preference("network.proxy.type", 1) + browser = webdriver.Firefox(firefox_profile=fp, + capabilities=capabilities) + else: + capabilities = DesiredCapabilities.FIREFOX + capabilities["acceptSslCerts"] = True + fireFoxOptions = webdriver.FirefoxOptions() + fireFoxOptions.set_headless() + browser = webdriver.Firefox(firefox_options=fireFoxOptions, capabilities=capabilities) + browser.set_window_size(1024, 768) + + except Exception as e: + print(e) + time.sleep(1) + continue + return browser def writeImage(text, filename, fontsize=40, width=1024, height=200): - image = Image.new("RGBA", (width,height), (255,255,255)) - draw = ImageDraw.Draw(image) - if (os.path.exists("/usr/share/httpscreenshot/LiberationSerif-BoldItalic.ttf")): - font_path = "/usr/share/httpscreenshot/LiberationSerif-BoldItalic.ttf" + image = Image.new("RGBA", (width, height), (255, 255, 255)) + draw = ImageDraw.Draw(image) + if os.path.exists( + "/usr/share/httpscreenshot/LiberationSerif-BoldItalic.ttf"): + font_path = "/usr/share/httpscreenshot/LiberationSerif-BoldItalic.ttf" + else: + font_path = (os.path.dirname(os.path.realpath(__file__)) + + "/LiberationSerif-BoldItalic.ttf") + font = ImageFont.truetype(font_path, fontsize) + draw.text((10, 0), text, (0, 0, 0), font=font) + image.save(filename) + + +def worker( + urlQueue, + tout, + debug, + headless, + doProfile, + vhosts, + subs, + extraHosts, + tryGUIOnFail, + smartFetch, + proxy, +): + if debug: + print("[*] Starting worker") + + browser = None + display = None + try: + if tryGUIOnFail or not headless: + display = Display(visible=0, size=(800, 600)) + display.start() + + browser = setupBrowserProfile(headless, proxy) + + except: + print("[-] Oh no! Couldn't create the browser, Selenium blew up") + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + print("".join("!! " + line for line in lines)) + browser.quit() + display.stop() + return + + while True: + # Try to get a URL from the Queue + if urlQueue.qsize() > 0: + try: + curUrl = urlQueue.get(timeout=tout) + except queue.Empty: + continue + print("[+] " + str(urlQueue.qsize()) + " URLs remaining") + screenshotName = quote(curUrl[0], safe="") + if debug: + print("[+] Got URL: " + curUrl[0]) + print("[+] screenshotName: " + screenshotName) + if os.path.exists(screenshotName + ".png"): + if debug: + print("[-] Screenshot already exists, skipping") + continue else: - font_path = os.path.dirname(os.path.realpath(__file__))+"/LiberationSerif-BoldItalic.ttf" - font = ImageFont.truetype(font_path, fontsize) - draw.text((10, 0), text, (0,0,0), font=font) - image.save(filename) - - -def worker(urlQueue, tout, debug, headless, doProfile, vhosts, subs, extraHosts, tryGUIOnFail, smartFetch,proxy): - if(debug): - print '[*] Starting worker' - - browser = None - display = None - try: - if(tryGUIOnFail or not headless): - display = Display(visible=0, size=(800, 600)) - display.start() - - browser = setupBrowserProfile(headless,proxy) - - except: - print "[-] Oh no! Couldn't create the browser, Selenium blew up" - exc_type, exc_value, exc_traceback = sys.exc_info() - lines = traceback.format_exception(exc_type, exc_value, exc_traceback) - print ''.join('!! ' + line for line in lines) - browser.quit() - display.stop() - return - - while True: - #Try to get a URL from the Queue - if urlQueue.qsize() > 0: - try: - curUrl = urlQueue.get(timeout=tout) - except Queue.Empty: - continue - print '[+] '+str(urlQueue.qsize())+' URLs remaining' - screenshotName = quote(curUrl[0], safe='') - if(debug): - print '[+] Got URL: '+curUrl[0] - print '[+] screenshotName: '+screenshotName - if(os.path.exists(screenshotName+".png")): - if(debug): - print "[-] Screenshot already exists, skipping" - continue - else: - if(debug): - print'[-] URL queue is empty, quitting.' - browser.quit() - return - - try: - if(doProfile): - [resp,curUrl] = autodetectRequest(curUrl, timeout=tout, vhosts=vhosts, urlQueue=urlQueue, subs=subs, extraHosts=extraHosts,proxy=proxy) - else: - resp = doGet(curUrl, verify=False, timeout=tout, vhosts=vhosts, urlQueue=urlQueue, subs=subs, extraHosts=extraHosts,proxy=proxy) - if(resp is not None and resp.status_code == 401): - print curUrl[0]+" Requires HTTP Basic Auth" - f = open(screenshotName+".html",'w') - f.write(resp.headers.get('www-authenticate','NONE')) - f.write('Basic Auth') - f.close() - writeImage(resp.headers.get('www-authenticate','NO WWW-AUTHENTICATE HEADER'),screenshotName+".png") - continue - - elif(resp is not None): - if(resp.text is not None): - resp_hash = hashlib.md5(resp.text).hexdigest() - else: - resp_hash = None - - if smartFetch and resp_hash is not None and resp_hash in hash_basket: - #We have this exact same page already, copy it instead of grabbing it again - print "[+] Pre-fetch matches previously imaged service, no need to do it again!" - shutil.copy2(hash_basket[resp_hash]+".html",screenshotName+".html") - shutil.copy2(hash_basket[resp_hash]+".png",screenshotName+".png") - else: - if smartFetch: - hash_basket[resp_hash] = screenshotName - - - #browser.set_window_size(1024, 768) - browser.set_page_load_timeout((tout)) - old_url = browser.current_url - browser.get(curUrl[0].strip()) - if(browser.current_url == old_url): - print "[-] Error fetching in browser but successfully fetched with Requests: "+curUrl[0] - if(headless): - browser2 = None - if(debug): - print "[+] Trying with sslv3 instead of TLS - known phantomjs bug: "+curUrl[0] - if(proxy is not None): - browser2 = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true','--proxy='+proxy,'--proxy-type=socks5'], executable_path="phantomjs") - else: - browser2 = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true'], executable_path="phantomjs") - #print "Launched browser2: "+str(browser2.service.process.pid) - - old_url = browser2.current_url - try: - browser2.get(curUrl[0].strip()) - if(browser2.current_url == old_url): - if(debug): - print "[-] Didn't work with SSLv3 either..."+curUrl[0] - browser2.quit() - else: - print '[+] Saving: '+screenshotName - html_source = browser2.page_source - f = open(screenshotName+".html",'w') - f.write(html_source) - f.close() - browser2.save_screenshot(screenshotName+".png") - browser2.quit() - continue - except: - browser2.quit() - print "[-] Didn't work with SSLv3 either - exception..."+curUrl[0] - - if(tryGUIOnFail and headless): - display = Display(visible=0, size=(1024, 768)) - display.start() - print "[+] Attempting to fetch with FireFox: "+curUrl[0] - browser2 = setupBrowserProfile(False,proxy) - old_url = browser2.current_url - try: - browser2.get(curUrl[0].strip()) - if(browser2.current_url == old_url): - print "[-] Error fetching in GUI browser as well..."+curUrl[0] - browser2.quit() - continue - else: - print '[+] Saving: '+screenshotName - html_source = browser2.page_source - f = open(screenshotName+".html",'w') - f.write(html_source) - f.close() - browser2.save_screenshot(screenshotName+".png") - browser2.quit() - continue - except: - browser2.quit() - display.stop() - print "[-] Error fetching in GUI browser as well..."+curUrl[0] - - else: - continue - - print '[+] Saving: '+screenshotName - html_source = browser.page_source - f = open(screenshotName+".html",'w') - f.write(html_source) - f.close() - browser.save_screenshot(screenshotName+".png") - - except Exception as e: - print e - print '[-] Something bad happened with URL: '+curUrl[0] - if(curUrl[2] > 0): - curUrl[2] = curUrl[2] - 1; - urlQueue.put(curUrl) - if(debug): - exc_type, exc_value, exc_traceback = sys.exc_info() - lines = traceback.format_exception(exc_type, exc_value, exc_traceback) - print ''.join('!! ' + line for line in lines) - browser.quit() - browser = setupBrowserProfile(headless,proxy) - continue - browser.quit() - display.stop() + if debug: + print("[-] URL queue is empty, quitting.") + browser.quit() + return + + try: + if doProfile: + [resp, curUrl] = autodetectRequest( + curUrl, + timeout=tout, + vhosts=vhosts, + urlQueue=urlQueue, + subs=subs, + extraHosts=extraHosts, + proxy=proxy, + ) + else: + resp = doGet( + curUrl, + verify=False, + timeout=tout, + vhosts=vhosts, + urlQueue=urlQueue, + subs=subs, + extraHosts=extraHosts, + proxy=proxy, + ) + if resp is not None and resp.status_code == 401: + print(curUrl[0] + " Requires HTTP Basic Auth") + f = open(screenshotName + ".html", "w") + f.write(resp.headers.get("www-authenticate", "NONE")) + f.write("Basic Auth") + f.close() + writeImage( + resp.headers.get("www-authenticate", + "NO WWW-AUTHENTICATE HEADER"), + screenshotName + ".png", + ) + continue + + elif resp is not None: + if resp.text is not None: + resp_hash = hashlib.md5(resp.text.encode('utf-8')).hexdigest() + else: + resp_hash = None + + if smartFetch and resp_hash is not None and resp_hash in hash_basket: + # We have this exact same page already, copy it instead of grabbing it again + print( + "[+] Pre-fetch matches previously imaged service, no need to do it again!" + ) + shutil.copy2(hash_basket[resp_hash] + ".html", + screenshotName + ".html") + shutil.copy2(hash_basket[resp_hash] + ".png", + screenshotName + ".png") + else: + if smartFetch: + hash_basket[resp_hash] = screenshotName + + # browser.set_window_size(1024, 768) + browser.set_page_load_timeout((tout)) + old_url = browser.current_url + browser.get(curUrl[0].strip()) + if browser.current_url == old_url: + print( + "[-] Error fetching in browser but successfully fetched with Requests: " + + curUrl[0]) + if headless: + browser2 = None + if debug: + print( + "[+] Trying with sslv3 instead of TLS - known phantomjs bug: " + + curUrl[0]) + if proxy is not None: + browser2 = webdriver.PhantomJS( + service_args=[ + "--ignore-ssl-errors=true", + "--proxy=" + proxy, + "--proxy-type=socks5", + ], + executable_path="phantomjs", + ) + else: + browser2 = webdriver.PhantomJS( + service_args=["--ignore-ssl-errors=true"], + executable_path="phantomjs", + ) + # print "Launched browser2: "+str(browser2.service.process.pid) + + old_url = browser2.current_url + try: + browser2.get(curUrl[0].strip()) + if browser2.current_url == old_url: + if debug: + print( + "[-] Didn't work with SSLv3 either..." + + curUrl[0]) + browser2.quit() + else: + print("[+] Saving: " + screenshotName) + html_source = browser2.page_source + f = open(screenshotName + ".html", "w") + f.write(html_source) + f.close() + browser2.save_screenshot(screenshotName + + ".png") + browser2.quit() + continue + except: + browser2.quit() + print( + "[-] Didn't work with SSLv3 either - exception..." + + curUrl[0]) + + if tryGUIOnFail and headless: + display = Display(visible=0, size=(1024, 768)) + display.start() + print("[+] Attempting to fetch with FireFox: " + + curUrl[0]) + browser2 = setupBrowserProfile(False, proxy) + old_url = browser2.current_url + try: + browser2.get(curUrl[0].strip()) + if browser2.current_url == old_url: + print( + "[-] Error fetching in GUI browser as well..." + + curUrl[0]) + browser2.quit() + continue + else: + print("[+] Saving: " + screenshotName) + html_source = browser2.page_source + f = open(screenshotName + ".html", "w") + f.write(html_source) + f.close() + browser2.save_screenshot(screenshotName + + ".png") + browser2.quit() + continue + except: + browser2.quit() + display.stop() + print( + "[-] Error fetching in GUI browser as well..." + + curUrl[0]) + + else: + continue + + print("[+] Saving: " + screenshotName) + html_source = browser.page_source + f = open(screenshotName + ".html", "w") + f.write(html_source) + f.close() + browser.save_screenshot(screenshotName + ".png") + + except Exception as e: + print(e) + print("[-] Something bad happened with URL: " + curUrl[0]) + if curUrl[2] > 0: + curUrl[2] = curUrl[2] - 1 + urlQueue.put(curUrl) + if debug: + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, + exc_traceback) + print("".join("!! " + line for line in lines)) + browser.quit() + browser = setupBrowserProfile(headless, proxy) + continue + browser.quit() + display.stop() + def doGet(*args, **kwargs): - url = args[0] - doVhosts = kwargs.pop('vhosts' ,None) - urlQueue = kwargs.pop('urlQueue' ,None) - subs = kwargs.pop('subs' ,None) - extraHosts = kwargs.pop('extraHosts',None) - proxy = kwargs.pop('proxy',None) - - kwargs['allow_redirects'] = False - session = requests.session() - if(proxy is not None): - session.proxies={'http':'socks5://'+proxy,'https':'socks5://'+proxy} - resp = session.get(url[0],**kwargs) - - #If we have an https URL and we are configured to scrape hosts from the cert... - if(url[0].find('https') != -1 and url[1] == True): - #Pull hostnames from cert, add as additional URLs and flag as not to pull certs - host = urlparse(url[0]).hostname - port = urlparse(url[0]).port - if(port is None): - port = 443 - names = [] - try: - cert = ssl.get_server_certificate((host,port),ssl_version=ssl.PROTOCOL_SSLv23) - x509 = M2Crypto.X509.load_cert_string(cert.decode('string_escape')) - subjText = x509.get_subject().as_text() - names = re.findall("CN=([^\s]+)",subjText) - altNames = x509.get_ext('subjectAltName').get_value() - names.extend(re.findall("DNS:([^,]*)",altNames)) - except: - pass - - for name in names: - if(name.find('*.') != -1): - for sub in subs: - try: - sub = sub.strip() - hostname = name.replace('*.',sub+'.') - if(hostname not in extraHosts): - extraHosts[hostname] = 1 - address = socket.gethostbyname(hostname) - urlQueue.put(['https://'+hostname+':'+str(port),False,url[2]]) - print '[+] Discovered subdomain '+address - except: - pass - name = name.replace('*.','') - if(name not in extraHosts): - extraHosts[name] = 1 - urlQueue.put(['https://'+name+':'+str(port),False,url[2]]) - print '[+] Added host '+name - else: - if (name not in extraHosts): - extraHosts[name] = 1 - urlQueue.put(['https://'+name+':'+str(port),False,url[2]]) - print '[+] Added host '+name - return resp - else: - return resp - - -def autodetectRequest(url, timeout, vhosts=False, urlQueue=None, subs=None, extraHosts=None,proxy=None): - '''Takes a URL, ignores the scheme. Detect if the host/port is actually an HTTP or HTTPS - server''' - resp = None - host = urlparse(url[0]).hostname - port = urlparse(url[0]).port - - if(port is None): - if('https' in url[0]): - port = 443 - else: - port = 80 - - try: - #cert = ssl.get_server_certificate((host,port)) - - cert = timeoutFn(ssl.get_server_certificate,kwargs={'addr':(host,port),'ssl_version':ssl.PROTOCOL_SSLv23},timeout_duration=3) - - if(cert is not None): - if('https' not in url[0]): - url[0] = url[0].replace('http','https') - #print 'Got cert, changing to HTTPS '+url[0] - - else: - url[0] = url[0].replace('https','http') - #print 'Changing to HTTP '+url[0] - - - except Exception as e: - url[0] = url[0].replace('https','http') - #print 'Changing to HTTP '+url[0] - try: - resp = doGet(url,verify=False, timeout=timeout, vhosts=vhosts, urlQueue=urlQueue, subs=subs, extraHosts=extraHosts, proxy=proxy) - except Exception as e: - print 'HTTP GET Error: '+str(e) - print url[0] - - return [resp,url] + url = args[0] + doVhosts = kwargs.pop("vhosts", None) + urlQueue = kwargs.pop("urlQueue", None) + subs = kwargs.pop("subs", None) + extraHosts = kwargs.pop("extraHosts", None) + proxy = kwargs.pop("proxy", None) + + kwargs["allow_redirects"] = False + session = requests.session() + if proxy is not None: + session.proxies = { + "http": "socks5://" + proxy, + "https": "socks5://" + proxy + } + resp = session.get(url[0], **kwargs) + + # If we have an https URL and we are configured to scrape hosts from the cert... + if url[0].find("https") != -1 and url[1] == True: + # Pull hostnames from cert, add as additional URLs and flag as not to pull certs + host = urlparse(url[0]).hostname + port = urlparse(url[0]).port + if port is None: + port = 443 + names = [] + try: + cert = ssl.get_server_certificate((host, port), + ssl_version=ssl.PROTOCOL_SSLv23) + x509 = M2Crypto.X509.load_cert_string(cert.decode("string_escape")) + subjText = x509.get_subject().as_text() + names = re.findall("CN=([^\s]+)", subjText) + altNames = x509.get_ext("subjectAltName").get_value() + names.extend(re.findall("DNS:([^,]*)", altNames)) + except: + pass + + for name in names: + if name.find("*.") != -1: + for sub in subs: + try: + sub = sub.strip() + hostname = name.replace("*.", sub + ".") + if hostname not in extraHosts: + extraHosts[hostname] = 1 + address = socket.gethostbyname(hostname) + urlQueue.put([ + "https://" + hostname + ":" + str(port), False, + url[2] + ]) + print("[+] Discovered subdomain " + address) + except: + pass + name = name.replace("*.", "") + if name not in extraHosts: + extraHosts[name] = 1 + urlQueue.put( + ["https://" + name + ":" + str(port), False, url[2]]) + print("[+] Added host " + name) + else: + if name not in extraHosts: + extraHosts[name] = 1 + urlQueue.put( + ["https://" + name + ":" + str(port), False, url[2]]) + print("[+] Added host " + name) + return resp + else: + return resp + + +def autodetectRequest(url, + timeout, + vhosts=False, + urlQueue=None, + subs=None, + extraHosts=None, + proxy=None): + """Takes a URL, ignores the scheme. Detect if the host/port is actually an HTTP or HTTPS + server""" + resp = None + host = urlparse(url[0]).hostname + port = urlparse(url[0]).port + + if port is None: + if "https" in url[0]: + port = 443 + else: + port = 80 + + try: + # cert = ssl.get_server_certificate((host,port)) + + cert = timeoutFn( + ssl.get_server_certificate, + kwargs={ + "addr": (host, port), + "ssl_version": ssl.PROTOCOL_SSLv23 + }, + timeout_duration=3, + ) + + if cert is not None: + if "https" not in url[0]: + url[0] = url[0].replace("http", "https") + # print 'Got cert, changing to HTTPS '+url[0] + + else: + url[0] = url[0].replace("https", "http") + # print 'Changing to HTTP '+url[0] + + except Exception as e: + url[0] = url[0].replace("https", "http") + # print 'Changing to HTTP '+url[0] + try: + resp = doGet( + url, + verify=False, + timeout=timeout, + vhosts=vhosts, + urlQueue=urlQueue, + subs=subs, + extraHosts=extraHosts, + proxy=proxy, + ) + except Exception as e: + print("HTTP GET Error: " + str(e)) + print(url[0]) + + return [resp, url] def sslError(e): - if('the handshake operation timed out' in str(e) or 'unknown protocol' in str(e) or 'Connection reset by peer' in str(e) or 'EOF occurred in violation of protocol' in str(e)): - return True - else: - return False + if ("the handshake operation timed out" in str(e) + or "unknown protocol" in str(e) + or "Connection reset by peer" in str(e) + or "EOF occurred in violation of protocol" in str(e)): + return True + else: + return False + def signal_handler(signal, frame): - print "[-] Ctrl-C received! Killing Thread(s)..." - os._exit(0) + print("[-] Ctrl-C received! Killing Thread(s)...") + os._exit(0) + + signal.signal(signal.SIGINT, signal_handler) -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument("-l","--list",help='List of input URLs') - parser.add_argument("-i","--input",help='nmap gnmap/xml output file') - parser.add_argument("-p","--headless",action='store_true',default=False,help='Run in headless mode (using phantomjs)') - parser.add_argument("-w","--workers",default=1,type=int,help='number of threads') - parser.add_argument("-t","--timeout",type=int,default=10,help='time to wait for pageload before killing the browser') - parser.add_argument("-v","--verbose",action='store_true',default=False,help='turn on verbose debugging') - parser.add_argument("-a","--autodetect",action='store_true',default=False,help='Automatically detect if listening services are HTTP or HTTPS. Ignores NMAP service detction and URL schemes.') - parser.add_argument("-vH","--vhosts",action='store_true',default=False,help='Attempt to scrape hostnames from SSL certificates and add these to the URL queue') - parser.add_argument("-dB","--dns_brute",help='Specify a DNS subdomain wordlist for bruteforcing on wildcard SSL certs') - parser.add_argument("-uL","--uri_list",help='Specify a list of URIs to fetch in addition to the root') - parser.add_argument("-r","--retries",type=int,default=0,help='Number of retries if a URL fails or timesout') - parser.add_argument("-tG","--trygui",action='store_true',default=False,help='Try to fetch the page with FireFox when headless fails') - parser.add_argument("-sF","--smartfetch",action='store_true',default=False,help='Enables smart fetching to reduce network traffic, also increases speed if certain conditions are met.') - parser.add_argument("-pX","--proxy",default=None,help='SOCKS5 Proxy in host:port format') - - - args = parser.parse_args() - - if(len(sys.argv) < 2): - parser.print_help() - sys.exit(0) - - - #read in the URI list if specificed - uris = [''] - if(args.uri_list != None): - uris = open(args.uri_list,'r').readlines() - uris.append('') - - if(args.input is not None): - inFile = open(args.input,'rU') - if(detectFileType(inFile) == 'gnmap'): - hosts = parseGnmap(inFile,args.autodetect) - urls = [] - for host,ports in hosts.items(): - for port in ports: - for uri in uris: - url = '' - if port[1] == True: - url = ['https://'+host+':'+port[0]+uri.strip(),args.vhosts,args.retries] - else: - url = ['http://'+host+':'+port[0]+uri.strip(),args.vhosts,args.retries] - urls.append(url) - elif(detectFileType(inFile) == 'xml'): - hosts = parsexml(inFile) - urls = [] - for host,ports in hosts.items(): - for port in ports: - for uri in uris: - url = '' - if port[1] == True: - url = ['https://'+host+':'+port[0]+uri.strip(),args.vhosts,args.retries] - else: - url = ['http://'+host+':'+port[0]+uri.strip(),args.vhosts,args.retries] - urls.append(url) - else: - print 'Invalid input file - must be Nmap GNMAP or Nmap XML' - - elif (args.list is not None): - f = open(args.list,'r') - lst = f.readlines() - urls = [] - for url in lst: - urls.append([url.strip(),args.vhosts,args.retries]) - else: - print "No input specified" - sys.exit(0) - - - #shuffle the url list - shuffle(urls) - - #read in the subdomain bruteforce list if specificed - subs = [] - if(args.dns_brute != None): - subs = open(args.dns_brute,'r').readlines() - - #Fire up the workers - urlQueue = multiprocessing.Queue() - manager = multiprocessing.Manager() - hostsDict = manager.dict() - workers = [] - hash_basket = {} - - for i in range(args.workers): - p = multiprocessing.Process(target=worker, args=(urlQueue, args.timeout, args.verbose, args.headless, args.autodetect, args.vhosts, subs, hostsDict, args.trygui, args.smartfetch,args.proxy)) - workers.append(p) - p.start() - - for url in urls: - urlQueue.put(url) - - for p in workers: - p.join() - +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("-l", "--list", help="List of input URLs") + parser.add_argument("-i", "--input", help="nmap gnmap/xml output file") + parser.add_argument( + "-p", + "--headless", + action="store_true", + default=False, + help="Run in headless mode (using phantomjs)", + ) + parser.add_argument("-w", + "--workers", + default=1, + type=int, + help="number of threads") + parser.add_argument( + "-t", + "--timeout", + type=int, + default=10, + help="time to wait for pageload before killing the browser", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + default=False, + help="turn on verbose debugging", + ) + parser.add_argument( + "-a", + "--autodetect", + action="store_true", + default=False, + help= + "Automatically detect if listening services are HTTP or HTTPS. Ignores NMAP service detction and URL schemes.", + ) + parser.add_argument( + "-vH", + "--vhosts", + action="store_true", + default=False, + help= + "Attempt to scrape hostnames from SSL certificates and add these to the URL queue", + ) + parser.add_argument( + "-dB", + "--dns_brute", + help= + "Specify a DNS subdomain wordlist for bruteforcing on wildcard SSL certs", + ) + parser.add_argument( + "-uL", + "--uri_list", + help="Specify a list of URIs to fetch in addition to the root", + ) + parser.add_argument( + "-r", + "--retries", + type=int, + default=0, + help="Number of retries if a URL fails or timesout", + ) + parser.add_argument( + "-tG", + "--trygui", + action="store_true", + default=False, + help="Try to fetch the page with FireFox when headless fails", + ) + parser.add_argument( + "-sF", + "--smartfetch", + action="store_true", + default=False, + help= + "Enables smart fetching to reduce network traffic, also increases speed if certain conditions are met.", + ) + parser.add_argument("-pX", + "--proxy", + default=None, + help="SOCKS5 Proxy in host:port format") + + args = parser.parse_args() + + if len(sys.argv) < 2: + parser.print_help() + sys.exit(0) + + # read in the URI list if specificed + uris = [""] + if args.uri_list != None: + uris = open(args.uri_list, "r").readlines() + uris.append("") + + if args.input is not None: + inFile = open(args.input, "rU") + if detectFileType(inFile) == "gnmap": + hosts = parseGnmap(inFile, args.autodetect) + urls = [] + for host, ports in list(hosts.items()): + for port in ports: + for uri in uris: + url = "" + if port[1] == True: + url = [ + "https://" + host + ":" + port[0] + + uri.strip(), + args.vhosts, + args.retries, + ] + else: + url = [ + "http://" + host + ":" + port[0] + uri.strip(), + args.vhosts, + args.retries, + ] + urls.append(url) + elif detectFileType(inFile) == "xml": + hosts = parsexml(inFile) + urls = [] + for host, ports in list(hosts.items()): + for port in ports: + for uri in uris: + url = "" + if port[1] == True: + url = [ + "https://" + host + ":" + port[0] + + uri.strip(), + args.vhosts, + args.retries, + ] + else: + url = [ + "http://" + host + ":" + port[0] + uri.strip(), + args.vhosts, + args.retries, + ] + urls.append(url) + else: + print("Invalid input file - must be Nmap GNMAP or Nmap XML") + + elif args.list is not None: + f = open(args.list, "r") + lst = f.readlines() + urls = [] + for url in lst: + urls.append([url.strip(), args.vhosts, args.retries]) + else: + print("No input specified") + sys.exit(0) + + # shuffle the url list + shuffle(urls) + + # read in the subdomain bruteforce list if specificed + subs = [] + if args.dns_brute != None: + subs = open(args.dns_brute, "r").readlines() + + # Fire up the workers + urlQueue = multiprocessing.Queue() + manager = multiprocessing.Manager() + hostsDict = manager.dict() + workers = [] + hash_basket = {} + + for i in range(args.workers): + p = multiprocessing.Process( + target=worker, + args=( + urlQueue, + args.timeout, + args.verbose, + args.headless, + args.autodetect, + args.vhosts, + subs, + hostsDict, + args.trygui, + args.smartfetch, + args.proxy, + ), + ) + workers.append(p) + p.start() + + for url in urls: + urlQueue.put(url) + + for p in workers: + p.join() diff --git a/install-dependencies.sh b/install-dependencies.sh index c2aa19c..7b44c36 100755 --- a/install-dependencies.sh +++ b/install-dependencies.sh @@ -6,7 +6,7 @@ # Error out if one fails set -e -apt-get install -y swig swig3.0 libssl-dev python-dev libjpeg-dev xvfb +apt-get install -y swig swig3.0 libssl-dev python3-dev libjpeg-dev xvfb firefox firefox-geckodriver # Newer version in PyPI #apt-get install -y python-requests @@ -18,28 +18,5 @@ apt-get install -y swig swig3.0 libssl-dev python-dev libjpeg-dev xvfb #apt-get install -y python-pil # Install pip and install pytnon requirements through it -apt-get install -y python-pip -pip install -r requirements.txt - -# This binary is distributed with the code base, version is -# more recent then the one in the ubuntu repo (1.9.1 vs 1.9.0) -#apt-get install -y phantomjs - -# Grab the latest of phantomjs it directly from the source -wget https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-x86_64.tar.bz2 - -phantom_md5sum=`md5sum phantomjs-2.1.1-linux-x86_64.tar.bz2 | cut -d' ' -f1` -checksum="1c947d57fce2f21ce0b43fe2ed7cd361" - -if [ "$phantom_md5sum" != "$checksum" ] -then - echo "phantomjs checksum mismatch" - exit 254 -fi - -tar xvf phantomjs-2.1.1-linux-x86_64.tar.bz2 -mv phantomjs-2.1.1-linux-x86_64/bin/phantomjs /usr/bin/phantomjs - -wget https://github.com/mozilla/geckodriver/releases/download/v0.11.1/geckodriver-v0.11.1-linux64.tar.gz -tar xzvf geckodriver-v0.11.1-linux64.tar.gz -mv geckodriver /usr/bin/geckodriver +apt-get install -y python3-pip +pip3 install -r requirements.txt diff --git a/requirements.txt b/requirements.txt index f3f17e1..f894278 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ requests selenium beautifulsoup4 pillow -requesocks +PySocks python-libnmap pyvirtualdisplay From 6439a9fc9057819c5bf7c49ce699025ab553089b Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Thu, 14 Jan 2021 16:13:57 -0500 Subject: [PATCH 02/15] Remove PhantomJS --- httpscreenshot.py | 96 +++++++++++------------------------------------ 1 file changed, 21 insertions(+), 75 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index b3afbef..ad970cb 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -1,10 +1,4 @@ #!/usr/bin/python3 -""" -Installation on Ubuntu: -apt-get install python-requests python-m2crypto phantomjs -If you run into: 'module' object has no attribute 'PhantomJS' -then pip install selenium (or pip install --upgrade selenium) -""" from selenium import webdriver from urllib.parse import urlparse @@ -185,28 +179,28 @@ def setupBrowserProfile(headless, proxy): while browser is None: try: - if not headless: - capabilities = DesiredCapabilities.FIREFOX - capabilities["acceptSslCerts"] = True - fp = webdriver.FirefoxProfile() - fp.set_preference("webdriver.accept.untrusted.certs", True) - fp.set_preference("security.enable_java", False) - fp.set_preference("webdriver.load.strategy", "fast") - if proxy is not None: - proxyItems = proxy.split(":") - fp.set_preference("network.proxy.socks", proxyItems[0]) - fp.set_preference("network.proxy.socks_port", - int(proxyItems[1])) - fp.set_preference("network.proxy.type", 1) - browser = webdriver.Firefox(firefox_profile=fp, - capabilities=capabilities) - else: - capabilities = DesiredCapabilities.FIREFOX - capabilities["acceptSslCerts"] = True - fireFoxOptions = webdriver.FirefoxOptions() + capabilities = DesiredCapabilities.FIREFOX + capabilities["acceptSslCerts"] = True + fp = webdriver.FirefoxProfile() + fp.set_preference("webdriver.accept.untrusted.certs", True) + fp.set_preference("security.enable_java", False) + fp.set_preference("webdriver.load.strategy", "fast") + if proxy is not None: + proxyItems = proxy.split(":") + fp.set_preference("network.proxy.socks", proxyItems[0]) + fp.set_preference("network.proxy.socks_port", + int(proxyItems[1])) + fp.set_preference("network.proxy.type", 1) + + fireFoxOptions = webdriver.FirefoxOptions() + + if headless: fireFoxOptions.set_headless() - browser = webdriver.Firefox(firefox_options=fireFoxOptions, capabilities=capabilities) - browser.set_window_size(1024, 768) + + browser = webdriver.Firefox(firefox_profile=fp, + capabilities=capabilities, + options=fireFoxOptions) + browser.set_window_size(1024, 768) except Exception as e: print(e) @@ -339,7 +333,6 @@ def worker( if smartFetch: hash_basket[resp_hash] = screenshotName - # browser.set_window_size(1024, 768) browser.set_page_load_timeout((tout)) old_url = browser.current_url browser.get(curUrl[0].strip()) @@ -347,53 +340,6 @@ def worker( print( "[-] Error fetching in browser but successfully fetched with Requests: " + curUrl[0]) - if headless: - browser2 = None - if debug: - print( - "[+] Trying with sslv3 instead of TLS - known phantomjs bug: " - + curUrl[0]) - if proxy is not None: - browser2 = webdriver.PhantomJS( - service_args=[ - "--ignore-ssl-errors=true", - "--proxy=" + proxy, - "--proxy-type=socks5", - ], - executable_path="phantomjs", - ) - else: - browser2 = webdriver.PhantomJS( - service_args=["--ignore-ssl-errors=true"], - executable_path="phantomjs", - ) - # print "Launched browser2: "+str(browser2.service.process.pid) - - old_url = browser2.current_url - try: - browser2.get(curUrl[0].strip()) - if browser2.current_url == old_url: - if debug: - print( - "[-] Didn't work with SSLv3 either..." - + curUrl[0]) - browser2.quit() - else: - print("[+] Saving: " + screenshotName) - html_source = browser2.page_source - f = open(screenshotName + ".html", "w") - f.write(html_source) - f.close() - browser2.save_screenshot(screenshotName + - ".png") - browser2.quit() - continue - except: - browser2.quit() - print( - "[-] Didn't work with SSLv3 either - exception..." - + curUrl[0]) - if tryGUIOnFail and headless: display = Display(visible=0, size=(1024, 768)) display.start() From dbc98fe8563f996bc58cd6635323e2d02acf9d82 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Fri, 15 Jan 2021 13:46:12 -0500 Subject: [PATCH 03/15] Added XML match for masscan xml - yapf reordered imports --- httpscreenshot.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index ad970cb..ab686c3 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -1,27 +1,26 @@ #!/usr/bin/python3 -from selenium import webdriver -from urllib.parse import urlparse -from random import shuffle -from PIL import Image -from PIL import ImageDraw -from PIL import ImageFont -from libnmap.parser import NmapParser -import multiprocessing -import queue import argparse -import sys -import traceback +import hashlib +import multiprocessing import os.path -import ssl -import M2Crypto +import queue import re -import time -import signal import shutil -import hashlib +import signal +import ssl +import sys +import time +import traceback from importlib import reload +from random import shuffle +from urllib.parse import urlparse + +import M2Crypto +from libnmap.parser import NmapParser +from PIL import Image, ImageDraw, ImageFont from pyvirtualdisplay import Display +from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities try: @@ -90,7 +89,8 @@ def detectFileType(inFile): ) return "gnmap" elif (firstLine.find("xml version") != - -1) and secondLine.find("DOCTYPE nmaprun") != -1: + -1) and (secondLine.find("DOCTYPE nmaprun") != -1 + or secondLine.find("masscan") != -1): return "xml" else: return None @@ -198,8 +198,8 @@ def setupBrowserProfile(headless, proxy): fireFoxOptions.set_headless() browser = webdriver.Firefox(firefox_profile=fp, - capabilities=capabilities, - options=fireFoxOptions) + capabilities=capabilities, + options=fireFoxOptions) browser.set_window_size(1024, 768) except Exception as e: @@ -316,7 +316,8 @@ def worker( elif resp is not None: if resp.text is not None: - resp_hash = hashlib.md5(resp.text.encode('utf-8')).hexdigest() + resp_hash = hashlib.md5( + resp.text.encode('utf-8')).hexdigest() else: resp_hash = None From 56deb0c4ad792e60156f277fe51924d119e9d45f Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Fri, 15 Jan 2021 14:43:00 -0500 Subject: [PATCH 04/15] PEP8 cleanup Change Firefox headless option to use non-deprecated method add import socket --- httpscreenshot.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index ab686c3..8ef3c3c 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -8,6 +8,7 @@ import re import shutil import signal +import socket import ssl import sys import time @@ -38,8 +39,6 @@ def timeoutFn(func, args=(), kwargs={}, timeout_duration=1, default=None): - import signal - class TimeoutError(Exception): pass @@ -60,11 +59,11 @@ def handler(signum, frame): def addUrlsForService(host, urlList, servicesList, scheme): - if servicesList == None or servicesList == []: + if servicesList is None or servicesList == []: return for service in servicesList: state = service.findPreviousSibling("state") - if state != None and state != [] and state["state"] == "open": + if state is not None and state != [] and state["state"] == "open": urlList.append(scheme + host + ":" + str(service.parent["portid"])) @@ -195,7 +194,7 @@ def setupBrowserProfile(headless, proxy): fireFoxOptions = webdriver.FirefoxOptions() if headless: - fireFoxOptions.set_headless() + fireFoxOptions.headless = True browser = webdriver.Firefox(firefox_profile=fp, capabilities=capabilities, @@ -248,7 +247,7 @@ def worker( browser = setupBrowserProfile(headless, proxy) - except: + except Exception: print("[-] Oh no! Couldn't create the browser, Selenium blew up") exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) @@ -366,7 +365,7 @@ def worker( ".png") browser2.quit() continue - except: + except Exception: browser2.quit() display.stop() print( @@ -419,7 +418,7 @@ def doGet(*args, **kwargs): resp = session.get(url[0], **kwargs) # If we have an https URL and we are configured to scrape hosts from the cert... - if url[0].find("https") != -1 and url[1] == True: + if url[0].find("https") != -1 and url[1] is True: # Pull hostnames from cert, add as additional URLs and flag as not to pull certs host = urlparse(url[0]).hostname port = urlparse(url[0]).port @@ -434,8 +433,8 @@ def doGet(*args, **kwargs): names = re.findall("CN=([^\s]+)", subjText) altNames = x509.get_ext("subjectAltName").get_value() names.extend(re.findall("DNS:([^,]*)", altNames)) - except: - pass + except Exception as e: + print(e) for name in names: if name.find("*.") != -1: @@ -451,7 +450,7 @@ def doGet(*args, **kwargs): url[2] ]) print("[+] Discovered subdomain " + address) - except: + except Exception: pass name = name.replace("*.", "") if name not in extraHosts: @@ -510,7 +509,7 @@ def autodetectRequest(url, url[0] = url[0].replace("https", "http") # print 'Changing to HTTP '+url[0] - except Exception as e: + except Exception: url[0] = url[0].replace("https", "http") # print 'Changing to HTTP '+url[0] try: @@ -641,7 +640,7 @@ def signal_handler(signal, frame): # read in the URI list if specificed uris = [""] - if args.uri_list != None: + if args.uri_list is not None: uris = open(args.uri_list, "r").readlines() uris.append("") @@ -654,7 +653,7 @@ def signal_handler(signal, frame): for port in ports: for uri in uris: url = "" - if port[1] == True: + if port[1]: url = [ "https://" + host + ":" + port[0] + uri.strip(), @@ -675,7 +674,7 @@ def signal_handler(signal, frame): for port in ports: for uri in uris: url = "" - if port[1] == True: + if port[1]: url = [ "https://" + host + ":" + port[0] + uri.strip(), @@ -707,7 +706,7 @@ def signal_handler(signal, frame): # read in the subdomain bruteforce list if specificed subs = [] - if args.dns_brute != None: + if args.dns_brute is not None: subs = open(args.dns_brute, "r").readlines() # Fire up the workers From fdb7ed0e6f18cb31b0037ae7959b01b7515f3254 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Fri, 15 Jan 2021 14:45:37 -0500 Subject: [PATCH 05/15] Setup.cfg for linter --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2bcd70e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 88 From 510b31865fe3781b869c921e3f86021d776e5af1 Mon Sep 17 00:00:00 2001 From: b Date: Fri, 26 Mar 2021 14:40:04 -0400 Subject: [PATCH 06/15] Fixed gnmap parsing to support masscan format properly --- httpscreenshot.py | 23 ++++++++++++++++++----- requirements.txt | 1 + 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index 8ef3c3c..6b6ed02 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -125,16 +125,26 @@ def parseGnmap(inFile, autodetect): >>> targets {'127.0.0.1': [[443, True], [8080, False]]} """ + hostRe=re.compile('Host:\s*[^\s]+') + servicesRe=re.compile('Ports:\s*.*') targets = {} for hostLine in inFile: if hostLine.strip() == "": break currentTarget = [] # Pull out the IP address (or hostnames) and HTTP service ports - fields = hostLine.split(" ") - ip = fields[ - 1] # not going to regex match this with ip address b/c could be a hostname - for item in fields: + + ipHostRes = hostRe.search(hostLine) + + if ipHostRes is None: + continue + + ipHost = ipHostRes.group() + ip = ipHost.split(':')[1].strip() + + services = servicesRe.search(hostLine).group().split() + + for item in services: # Make sure we have an open port with an http type service on it if (item.find("http") != -1 or autodetect) and re.findall( "\d+/open", item): @@ -160,7 +170,10 @@ def parseGnmap(inFile, autodetect): currentTarget.append([port, https]) if len(currentTarget) > 0: - targets[ip] = currentTarget + if ip in targets: + targets[ip].extend(currentTarget) + else: + targets[ip] = currentTarget return targets diff --git a/requirements.txt b/requirements.txt index f894278..e84896d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ pillow PySocks python-libnmap pyvirtualdisplay +reload From 1f0349ad41726d480f20d7f556b1268ae5521d89 Mon Sep 17 00:00:00 2001 From: b Date: Tue, 3 Aug 2021 13:08:39 -0400 Subject: [PATCH 07/15] Updated screenshot clustering output --- screenshotClustering/cluster.py | 36 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/screenshotClustering/cluster.py b/screenshotClustering/cluster.py index 41e9b95..13a651f 100755 --- a/screenshotClustering/cluster.py +++ b/screenshotClustering/cluster.py @@ -16,11 +16,11 @@ def addAttrToBag(attrName,url,link,wordBags,soup): for tag in soup.findAll('',{attrName:True}): if(isinstance(tag[attrName],str) or isinstance(tag[attrName],unicode)): - tagStr = tag[attrName].encode('utf-8').strip() + tagStr = tag[attrName].encode('ISO-8859-1').strip() elif(isinstance(tag[attrName],list)): - tagStr = tag[attrName][0].encode('utf-8').strip() + tagStr = tag[attrName][0].encode('ISO-8859-1').strip() else: - print '[-] Strange tag type detected - '+str(type(tag[attrName])) + print('[-] Strange tag type detected - '+str(type(tag[attrName]))) tagStr = 'XXXXXXXXX' if(tagStr != ''): @@ -51,7 +51,7 @@ def createWordBags(htmlList): wordBags={} for f in htmlList: - htmlContent = open(f,'r').read() + htmlContent = open(f,'r', encoding='ISO-8859-1').read() wordBags[f]={} soup = BeautifulSoup(htmlContent, 'html.parser') addAttrToBag('name',f,False,wordBags,soup) @@ -77,11 +77,11 @@ def computeScore(wordBag1,wordBag2,debug=0): if(len(wordBag1) == 0 and len(wordBag2) == 0): if debug: - print 'Both have no words - return true' + print('Both have no words - return true') return 1 elif (len(wordBag1) == 0 or len(wordBag2) == 0): if debug: - print 'One has no words - return false' + print('One has no words - return false') return 0 for word in wordBag1.keys(): @@ -90,17 +90,17 @@ def computeScore(wordBag1,wordBag2,debug=0): score = (float(commonWords)/float(wordBag1Length)*(float(commonWords)/float(wordBag2Length))) if debug: - print "Common Words: "+str(commonWords) - print "WordBag1 Length: "+str(wordBag1Length) - print "WordBag2 Length: "+str(wordBag2Length) - print score + print("Common Words: "+str(commonWords)) + print("WordBag1 Length: "+str(wordBag1Length)) + print("WordBag2 Length: "+str(wordBag2Length)) + print(score) return score def createClusters(wordBags,threshold): clusterData = {} i = 0 - siteList = wordBags.keys() + siteList = list(wordBags.keys()) for i in range(0,len(siteList)): clusterData[siteList[i]] = [threshold, i] @@ -126,7 +126,7 @@ def getScopeHtml(scopeFile): def getPageTitle(htmlFile): """Simple function to yank page title from html""" - with open(htmlFile, 'r') as f: + with open(htmlFile, 'r', encoding='ISO-8859-1') as f: soup = BeautifulSoup(f, "lxml") try: return soup.title.string.encode('ascii', 'ignore') @@ -151,12 +151,16 @@ def renderClusterHtml(clust,width,height,scopeFile=None): ''' for cluster, siteList in clust.items(): + try: + title = getPageTitle(siteList[0]).decode("ISO-8859-1") + except (UnicodeDecodeError, AttributeError): + title = getPageTitle(siteList[0]) html = html + """ + """ + title + """ @@ -166,9 +170,9 @@ def renderClusterHtml(clust,width,height,scopeFile=None): for site in siteList: screenshotName = quote(site[0:-5], safe='./') if site != siteList[-1]: - html = html + '' + html = html + f"" else: - html = html + '
- """ + getPageTitle(siteList[0]) + """
' + html = html + f" " footer = '' @@ -362,7 +366,7 @@ def doCluster(htmlList): clusterData = createClusters(siteWordBags,0.6) clusterDict = {} - for site,data in clusterData.iteritems(): + for site,data in clusterData.items(): if data[1] in clusterDict: clusterDict[data[1]].append(site) else: From a30c1400032e86290699c54912b3e826c9ffe956 Mon Sep 17 00:00:00 2001 From: Justin Kennedy Date: Mon, 10 Jan 2022 14:04:30 -0500 Subject: [PATCH 08/15] various fixes updated headless browser to use chrome driver manager updated the URL queue to a defaultdict from collections updated file parsing to now parse gnmap and xml from nmap and masscan updated requirements.txt to include webdriver_manager (updated headless) and lxml (used by cluster script) updated install-dependencies.sh based on a fresh install of Ubuntu 20.04.3 LTS --- httpscreenshot.py | 305 ++++++++++++++-------------------------- install-dependencies.sh | 21 ++- requirements.txt | 2 + 3 files changed, 117 insertions(+), 211 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index 6b6ed02..ceb1ff3 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -8,7 +8,6 @@ import re import shutil import signal -import socket import ssl import sys import time @@ -16,13 +15,20 @@ from importlib import reload from random import shuffle from urllib.parse import urlparse +from collections import defaultdict + +import warnings + +warnings.filterwarnings("ignore") import M2Crypto -from libnmap.parser import NmapParser from PIL import Image, ImageDraw, ImageFont from pyvirtualdisplay import Display from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.desired_capabilities import DesiredCapabilities +from webdriver_manager.chrome import ChromeDriverManager try: from urllib.parse import quote @@ -32,7 +38,6 @@ try: import requesocks as requests except Exception: - print("requesocks library not found - proxy support will not be available") import requests reload(sys) @@ -76,8 +81,7 @@ def detectFileType(inFile): # Be polite and reset the file pointer inFile.seek(0) - if (firstLine.find("nmap") != -1 or - firstLine.find("Masscan") != -1) and thirdLine.find("Host:") != -1: + if (firstLine.find("nmap") != -1 or firstLine.find("Masscan") != -1) and thirdLine.find("Host:") != -1: # Looks like a gnmap file - this wont be true for other nmap output types # Check to see if -sV flag was used, if not, warn if firstLine.find("-sV") != -1 or firstLine.find("-A") != -1: @@ -96,44 +100,34 @@ def detectFileType(inFile): def parsexml(inFile): - targets = {} - infile = NmapParser.parse_fromfile(args.input) - for host in infile.hosts: - if host.services: - currentTarget = [] - for s in host.services: - if s.state != "closed" and "http" in s.service: - ip = host.address - port = str(s.port) - https = False - if "https" in s.service or "ssl" in s.service: - https = True - - currentTarget.append([port, https]) - - if len(currentTarget) > 0: - targets[ip] = currentTarget + import xml.etree.ElementTree as ET + + tree = ET.parse(inFile) + root = tree.getroot() + + targets = defaultdict(list) + + for host in root.findall("host"): + ip = host.find('address').get('addr') + + for port in host.find('ports').findall("port"): + if port.find("state").get("state") == "open": + targets[ip].append(port.get("portid")) return targets - print("Parsing is complete, continue on...") def parseGnmap(inFile, autodetect): - """ - Parse a gnmap file into a dictionary. The dictionary key is the ip address or hostname. - Each key item is a list of ports and whether or not that port is https/ssl. For example: - >>> targets - {'127.0.0.1': [[443, True], [8080, False]]} - """ - hostRe=re.compile('Host:\s*[^\s]+') - servicesRe=re.compile('Ports:\s*.*') - targets = {} + hostRe = re.compile('Host:\s*[^\s]+') + servicesRe = re.compile('Ports:\s*.*') + + targets = defaultdict(list) + for hostLine in inFile: if hostLine.strip() == "": break - currentTarget = [] # Pull out the IP address (or hostnames) and HTTP service ports - + ipHostRes = hostRe.search(hostLine) if ipHostRes is None: @@ -142,12 +136,14 @@ def parseGnmap(inFile, autodetect): ipHost = ipHostRes.group() ip = ipHost.split(':')[1].strip() - services = servicesRe.search(hostLine).group().split() + try: + services = servicesRe.search(hostLine).group().split() + except: + continue for item in services: # Make sure we have an open port with an http type service on it - if (item.find("http") != -1 or autodetect) and re.findall( - "\d+/open", item): + if re.findall("\d+/open", item): port = None https = False """ @@ -163,56 +159,43 @@ def parseGnmap(inFile, autodetect): construct the URLs. """ port = item.split("/")[0] + targets[ip].append(port) - if item.find("https") != -1 or item.find("ssl") != -1: - https = True - # Add the current service item to the currentTarget list for this host - currentTarget.append([port, https]) - - if len(currentTarget) > 0: - if ip in targets: - targets[ip].extend(currentTarget) - else: - targets[ip] = currentTarget return targets def setupBrowserProfile(headless, proxy): browser = None - if proxy is not None: - service_args = [ - "--ignore-ssl-errors=true", - "--ssl-protocol=any", - "--proxy=" + proxy, - "--proxy-type=socks5", - ] + if (proxy is not None): + service_args = ['--ignore-ssl-errors=true', '--ssl-protocol=any', '--proxy=' + proxy, '--proxy-type=socks5'] else: - service_args = ["--ignore-ssl-errors=true", "--ssl-protocol=any"] + service_args = ['--ignore-ssl-errors=true', '--ssl-protocol=any'] - while browser is None: + while (browser is None): try: - capabilities = DesiredCapabilities.FIREFOX - capabilities["acceptSslCerts"] = True - fp = webdriver.FirefoxProfile() - fp.set_preference("webdriver.accept.untrusted.certs", True) - fp.set_preference("security.enable_java", False) - fp.set_preference("webdriver.load.strategy", "fast") - if proxy is not None: - proxyItems = proxy.split(":") - fp.set_preference("network.proxy.socks", proxyItems[0]) - fp.set_preference("network.proxy.socks_port", - int(proxyItems[1])) - fp.set_preference("network.proxy.type", 1) - - fireFoxOptions = webdriver.FirefoxOptions() - - if headless: - fireFoxOptions.headless = True - - browser = webdriver.Firefox(firefox_profile=fp, - capabilities=capabilities, - options=fireFoxOptions) - browser.set_window_size(1024, 768) + if (not headless): + capabilities = DesiredCapabilities.FIREFOX + capabilities['acceptSslCerts'] = True + fp = webdriver.FirefoxProfile() + fp.set_preference("webdriver.accept.untrusted.certs", True) + fp.set_preference("security.enable_java", False) + fp.set_preference("webdriver.load.strategy", "fast"); + if (proxy is not None): + proxyItems = proxy.split(":") + fp.set_preference("network.proxy.socks", proxyItems[0]) + fp.set_preference("network.proxy.socks_port", int(proxyItems[1])) + fp.set_preference("network.proxy.type", 1) + browser = webdriver.Firefox(firefox_profile=fp, capabilities=capabilities) + else: + service = Service(ChromeDriverManager(log_level=0).install()) + coptions = Options() + coptions.add_argument("--headless") + coptions.add_argument("--no-sandbox") + coptions.add_argument("--window-size=1024x768") + coptions.add_argument("--ignore-certificate-errors") + coptions.add_argument("--ssl-version-min=tls1") + + browser = webdriver.Chrome(service=service, options=coptions) except Exception as e: print(e) @@ -236,17 +219,17 @@ def writeImage(text, filename, fontsize=40, width=1024, height=200): def worker( - urlQueue, - tout, - debug, - headless, - doProfile, - vhosts, - subs, - extraHosts, - tryGUIOnFail, - smartFetch, - proxy, + urlQueue, + tout, + debug, + headless, + doProfile, + vhosts, + subs, + extraHosts, + tryGUIOnFail, + smartFetch, + proxy, ): if debug: print("[*] Starting worker") @@ -277,9 +260,9 @@ def worker( except queue.Empty: continue print("[+] " + str(urlQueue.qsize()) + " URLs remaining") - screenshotName = quote(curUrl[0], safe="") + screenshotName = quote(curUrl, safe="") if debug: - print("[+] Got URL: " + curUrl[0]) + print("[+] Got URL: " + curUrl) print("[+] screenshotName: " + screenshotName) if os.path.exists(screenshotName + ".png"): if debug: @@ -314,7 +297,7 @@ def worker( proxy=proxy, ) if resp is not None and resp.status_code == 401: - print(curUrl[0] + " Requires HTTP Basic Auth") + print(curUrl + " Requires HTTP Basic Auth") f = open(screenshotName + ".html", "w") f.write(resp.headers.get("www-authenticate", "NONE")) f.write("Basic Auth") @@ -348,24 +331,24 @@ def worker( browser.set_page_load_timeout((tout)) old_url = browser.current_url - browser.get(curUrl[0].strip()) + browser.get(curUrl.strip()) if browser.current_url == old_url: print( "[-] Error fetching in browser but successfully fetched with Requests: " - + curUrl[0]) + + curUrl) if tryGUIOnFail and headless: display = Display(visible=0, size=(1024, 768)) display.start() print("[+] Attempting to fetch with FireFox: " + - curUrl[0]) + curUrl) browser2 = setupBrowserProfile(False, proxy) old_url = browser2.current_url try: - browser2.get(curUrl[0].strip()) + browser2.get(curUrl.strip()) if browser2.current_url == old_url: print( "[-] Error fetching in GUI browser as well..." - + curUrl[0]) + + curUrl) browser2.quit() continue else: @@ -383,7 +366,7 @@ def worker( display.stop() print( "[-] Error fetching in GUI browser as well..." - + curUrl[0]) + + curUrl) else: continue @@ -396,11 +379,6 @@ def worker( browser.save_screenshot(screenshotName + ".png") except Exception as e: - print(e) - print("[-] Something bad happened with URL: " + curUrl[0]) - if curUrl[2] > 0: - curUrl[2] = curUrl[2] - 1 - urlQueue.put(curUrl) if debug: exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, @@ -428,20 +406,19 @@ def doGet(*args, **kwargs): "http": "socks5://" + proxy, "https": "socks5://" + proxy } - resp = session.get(url[0], **kwargs) + resp = session.get(url, **kwargs) # If we have an https URL and we are configured to scrape hosts from the cert... - if url[0].find("https") != -1 and url[1] is True: + if url.find("https") != -1 and doVhosts is True: # Pull hostnames from cert, add as additional URLs and flag as not to pull certs - host = urlparse(url[0]).hostname - port = urlparse(url[0]).port + host = urlparse(url).hostname + port = urlparse(url).port if port is None: port = 443 names = [] try: - cert = ssl.get_server_certificate((host, port), - ssl_version=ssl.PROTOCOL_SSLv23) - x509 = M2Crypto.X509.load_cert_string(cert.decode("string_escape")) + cert = ssl.get_server_certificate((host, port)) + x509 = M2Crypto.X509.load_cert_string(cert) subjText = x509.get_subject().as_text() names = re.findall("CN=([^\s]+)", subjText) altNames = x509.get_ext("subjectAltName").get_value() @@ -450,33 +427,10 @@ def doGet(*args, **kwargs): print(e) for name in names: - if name.find("*.") != -1: - for sub in subs: - try: - sub = sub.strip() - hostname = name.replace("*.", sub + ".") - if hostname not in extraHosts: - extraHosts[hostname] = 1 - address = socket.gethostbyname(hostname) - urlQueue.put([ - "https://" + hostname + ":" + str(port), False, - url[2] - ]) - print("[+] Discovered subdomain " + address) - except Exception: - pass - name = name.replace("*.", "") - if name not in extraHosts: - extraHosts[name] = 1 - urlQueue.put( - ["https://" + name + ":" + str(port), False, url[2]]) - print("[+] Added host " + name) - else: - if name not in extraHosts: - extraHosts[name] = 1 - urlQueue.put( - ["https://" + name + ":" + str(port), False, url[2]]) - print("[+] Added host " + name) + if name not in extraHosts: + extraHosts[name] = 1 + urlQueue.put(f"https://{name}:{port}") + print(f"[+] Added host https://{name}:{port}") return resp else: return resp @@ -492,39 +446,26 @@ def autodetectRequest(url, """Takes a URL, ignores the scheme. Detect if the host/port is actually an HTTP or HTTPS server""" resp = None - host = urlparse(url[0]).hostname - port = urlparse(url[0]).port - - if port is None: - if "https" in url[0]: - port = 443 - else: - port = 80 + host = urlparse(url).hostname + port = urlparse(url).port try: # cert = ssl.get_server_certificate((host,port)) cert = timeoutFn( ssl.get_server_certificate, - kwargs={ - "addr": (host, port), - "ssl_version": ssl.PROTOCOL_SSLv23 - }, + kwargs={"addr": (host, port)}, timeout_duration=3, ) if cert is not None: - if "https" not in url[0]: - url[0] = url[0].replace("http", "https") - # print 'Got cert, changing to HTTPS '+url[0] - + if "https" not in url: + url = url.replace("http://", "https://") else: - url[0] = url[0].replace("https", "http") - # print 'Changing to HTTP '+url[0] + url = url.replace("https://", "http://") except Exception: - url[0] = url[0].replace("https", "http") - # print 'Changing to HTTP '+url[0] + url = url.replace("https://", "http://") try: resp = doGet( url, @@ -538,7 +479,7 @@ def autodetectRequest(url, ) except Exception as e: print("HTTP GET Error: " + str(e)) - print(url[0]) + print(url) return [resp, url] @@ -658,58 +599,26 @@ def signal_handler(signal, frame): uris.append("") if args.input is not None: - inFile = open(args.input, "rU") + inFile = open(args.input, "r") if detectFileType(inFile) == "gnmap": hosts = parseGnmap(inFile, args.autodetect) - urls = [] - for host, ports in list(hosts.items()): - for port in ports: - for uri in uris: - url = "" - if port[1]: - url = [ - "https://" + host + ":" + port[0] + - uri.strip(), - args.vhosts, - args.retries, - ] - else: - url = [ - "http://" + host + ":" + port[0] + uri.strip(), - args.vhosts, - args.retries, - ] - urls.append(url) elif detectFileType(inFile) == "xml": hosts = parsexml(inFile) - urls = [] - for host, ports in list(hosts.items()): - for port in ports: - for uri in uris: - url = "" - if port[1]: - url = [ - "https://" + host + ":" + port[0] + - uri.strip(), - args.vhosts, - args.retries, - ] - else: - url = [ - "http://" + host + ":" + port[0] + uri.strip(), - args.vhosts, - args.retries, - ] - urls.append(url) else: print("Invalid input file - must be Nmap GNMAP or Nmap XML") + urls = [] + + for host in hosts: + for port in hosts[host]: + urls.append(f"http://{host}:{port}") + elif args.list is not None: f = open(args.list, "r") lst = f.readlines() urls = [] for url in lst: - urls.append([url.strip(), args.vhosts, args.retries]) + urls.append(url.strip()) else: print("No input specified") sys.exit(0) diff --git a/install-dependencies.sh b/install-dependencies.sh index 7b44c36..9b0ae20 100755 --- a/install-dependencies.sh +++ b/install-dependencies.sh @@ -1,4 +1,4 @@ -# Installation Script - tested on an ubuntu/trusty64 vagrant box +# Installation Script - tested on a fresh install of Ubuntu 20.04.3 LTS # Show all commands being run #set -x @@ -6,17 +6,12 @@ # Error out if one fails set -e -apt-get install -y swig swig3.0 libssl-dev python3-dev libjpeg-dev xvfb firefox firefox-geckodriver +# Pull packages from apt +sudo apt install -y python3-pip build-essential libssl-dev swig python3-dev -# Newer version in PyPI -#apt-get install -y python-requests +# Install Google Chrome +wget -O /tmp/google-chrome-stable_current_amd64.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +sudo apt install -y /tmp/google-chrome-stable_current_amd64.deb -# Newer version in PyPI -#apt-get install -y python-m2crypto - -# Installing pillow from PIP for the latest -#apt-get install -y python-pil - -# Install pip and install pytnon requirements through it -apt-get install -y python3-pip -pip3 install -r requirements.txt +# Install required python packages +pip3 install -r requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e84896d..d805fbe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ PySocks python-libnmap pyvirtualdisplay reload +webdriver_manager +lxml \ No newline at end of file From 341de08866fabceef3fab316fc0b3bb5d02e8337 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki <532841+jesse-osiecki@users.noreply.github.com> Date: Wed, 26 Jan 2022 18:07:41 -0500 Subject: [PATCH 09/15] Update Dockerfile Changing dockerfile comment pointing to https://hub.docker.com/r/andmyhacks/httpscreenshot as it's 5 years out of date --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f4c2655..77e1b16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# docker pull andmyhacks/httpscreenshot +# docker pull jesse-osiecki/httpscreenshot FROM ubuntu:20.04 From 27859be89c0de97a4070138a13f2b4dfe43dfaa2 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki <532841+jesse-osiecki@users.noreply.github.com> Date: Wed, 26 Jan 2022 18:09:05 -0500 Subject: [PATCH 10/15] Update Dockerfile typo --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 77e1b16..18cefde 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# docker pull jesse-osiecki/httpscreenshot +# docker pull jesseosiecki/httpscreenshot FROM ubuntu:20.04 From 5e73d39dd26e89ff65b9aa6f44db7bf2bed9a540 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki <532841+jesse-osiecki@users.noreply.github.com> Date: Wed, 26 Jan 2022 18:23:11 -0500 Subject: [PATCH 11/15] Update README.md --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index eaae0dd..56ab68f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ # httpscreenshot +### Installation via Docker + +`docker pull jesseosiecki/httpscreenshot` +`docker run -v + ### Installation on Ubuntu #### Via Script Run `install-dependencies.sh` script as root. -This script has been tested on Ubuntu 14.04. +This script has been tested on Ubuntu 20.04 as *root* (sudo). ### Manually From 4397b57b51714e8d8739fce0aaed8728f6842a55 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Wed, 26 Jan 2022 18:27:52 -0500 Subject: [PATCH 12/15] adding Dockerfile run instructions --- Dockerfile | 2 ++ README.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 18cefde..43bc347 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,3 +19,5 @@ RUN ln -s /etc/httpscreenshot/httpscreenshot.py /usr/bin/httpscreenshot RUN mkdir -p /etc/httpscreenshot/images WORKDIR /etc/httpscreenshot/images + +ENTRYPOINT ["httpscreenshot"] diff --git a/README.md b/README.md index 56ab68f..2d87285 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ### Installation via Docker `docker pull jesseosiecki/httpscreenshot` -`docker run -v +`docker run jesseosiecki/httpscreenshot` ### Installation on Ubuntu From cba2861830f0452ee9d297d9185da4e1a7db8ec7 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Wed, 26 Jan 2022 17:49:28 -0500 Subject: [PATCH 13/15] - Add switch to cli to allow choice between Chrome/Firefox, keeping the -p headless flag as an option for both Default is Firefox as the previous changes defaulting to Chrome breaks previous functionality --- httpscreenshot.py | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/httpscreenshot.py b/httpscreenshot.py index ceb1ff3..6794d8d 100644 --- a/httpscreenshot.py +++ b/httpscreenshot.py @@ -164,7 +164,7 @@ def parseGnmap(inFile, autodetect): return targets -def setupBrowserProfile(headless, proxy): +def setupBrowserProfile(headless, proxy, browserType): browser = None if (proxy is not None): service_args = ['--ignore-ssl-errors=true', '--ssl-protocol=any', '--proxy=' + proxy, '--proxy-type=socks5'] @@ -173,7 +173,18 @@ def setupBrowserProfile(headless, proxy): while (browser is None): try: - if (not headless): + if (browserType == 'Chrome' or browserType == 'Chromium'): + service = Service(ChromeDriverManager(log_level=0).install()) + coptions = Options() + if headless: + coptions.add_argument("--headless") + coptions.add_argument("--no-sandbox") + coptions.add_argument("--window-size=1024x768") + coptions.add_argument("--ignore-certificate-errors") + coptions.add_argument("--ssl-version-min=tls1") + + browser = webdriver.Chrome(service=service, options=coptions) + else: capabilities = DesiredCapabilities.FIREFOX capabilities['acceptSslCerts'] = True fp = webdriver.FirefoxProfile() @@ -185,17 +196,15 @@ def setupBrowserProfile(headless, proxy): fp.set_preference("network.proxy.socks", proxyItems[0]) fp.set_preference("network.proxy.socks_port", int(proxyItems[1])) fp.set_preference("network.proxy.type", 1) - browser = webdriver.Firefox(firefox_profile=fp, capabilities=capabilities) - else: - service = Service(ChromeDriverManager(log_level=0).install()) - coptions = Options() - coptions.add_argument("--headless") - coptions.add_argument("--no-sandbox") - coptions.add_argument("--window-size=1024x768") - coptions.add_argument("--ignore-certificate-errors") - coptions.add_argument("--ssl-version-min=tls1") - browser = webdriver.Chrome(service=service, options=coptions) + fireFoxOptions = webdriver.FirefoxOptions() + if headless: + fireFoxOptions.headless = True + + browser = webdriver.Firefox(firefox_profile=fp, + capabilities=capabilities, + options=fireFoxOptions) + browser.set_window_size(1024, 768) except Exception as e: print(e) @@ -230,6 +239,7 @@ def worker( tryGUIOnFail, smartFetch, proxy, + browserType ): if debug: print("[*] Starting worker") @@ -241,7 +251,7 @@ def worker( display = Display(visible=0, size=(800, 600)) display.start() - browser = setupBrowserProfile(headless, proxy) + browser = setupBrowserProfile(headless, proxy, browserType) except Exception: print("[-] Oh no! Couldn't create the browser, Selenium blew up") @@ -341,7 +351,7 @@ def worker( display.start() print("[+] Attempting to fetch with FireFox: " + curUrl) - browser2 = setupBrowserProfile(False, proxy) + browser2 = setupBrowserProfile(False, proxy, "Firefox") old_url = browser2.current_url try: browser2.get(curUrl.strip()) @@ -385,7 +395,7 @@ def worker( exc_traceback) print("".join("!! " + line for line in lines)) browser.quit() - browser = setupBrowserProfile(headless, proxy) + browser = setupBrowserProfile(headless, proxy, "Firefox") continue browser.quit() display.stop() @@ -513,6 +523,12 @@ def signal_handler(signal, frame): default=False, help="Run in headless mode (using phantomjs)", ) + parser.add_argument( + "-b", + "--browsertype", + default="Firefox", + help="Choose webdriver {Firefox, Chrome}" + ) parser.add_argument("-w", "--workers", default=1, @@ -653,6 +669,7 @@ def signal_handler(signal, frame): args.trygui, args.smartfetch, args.proxy, + args.browsertype ), ) workers.append(p) From 8acef3a78d8dc2e7bfcc0aacf9655d8c23bdeb81 Mon Sep 17 00:00:00 2001 From: Jesse Osiecki Date: Wed, 26 Jan 2022 18:30:16 -0500 Subject: [PATCH 14/15] remove sudo --- install-dependencies.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/install-dependencies.sh b/install-dependencies.sh index 9b0ae20..1d504cd 100755 --- a/install-dependencies.sh +++ b/install-dependencies.sh @@ -1,4 +1,4 @@ -# Installation Script - tested on a fresh install of Ubuntu 20.04.3 LTS +# Installation Script - tested on a fresh install of Ubuntu 20.04.3 LTS as root (sudo) # Show all commands being run #set -x @@ -7,11 +7,11 @@ set -e # Pull packages from apt -sudo apt install -y python3-pip build-essential libssl-dev swig python3-dev +apt install -y python3-pip build-essential libssl-dev swig python3-dev # Install Google Chrome wget -O /tmp/google-chrome-stable_current_amd64.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb -sudo apt install -y /tmp/google-chrome-stable_current_amd64.deb +apt install -y /tmp/google-chrome-stable_current_amd64.deb # Install required python packages -pip3 install -r requirements.txt \ No newline at end of file +pip3 install -r requirements.txt From f081c69ad6cb9f066270bbb86f14db83a6745b18 Mon Sep 17 00:00:00 2001 From: Justin Kennedy Date: Tue, 1 Oct 2024 12:04:23 -0400 Subject: [PATCH 15/15] Update requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d805fbe..4a8609e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ python-libnmap pyvirtualdisplay reload webdriver_manager -lxml \ No newline at end of file +lxml +urllib3<2.0.0