Index: run.py |
=================================================================== |
--- a/run.py |
+++ b/run.py |
@@ -41,17 +41,20 @@ class CrawlerApp: |
request_body_size = int(environ.get('CONTENT_LENGTH', 0)) |
except (ValueError): |
start_response('400 Bad Request', []) |
return '' |
data = json.loads(environ['wsgi.input'].read(request_body_size)) |
self.urls.remove(data['url']) |
- parsedurl = urlparse.urlparse(data['url']) |
+ fullurl = data['url'] |
+ if not urlparse.urlparse(fullurl).scheme: |
+ fullurl = 'http://' + fullurl |
+ parsedurl = urlparse.urlparse(fullurl) |
urlhash = hashlib.new('md5', data['url']).hexdigest() |
timestamp = datetime.datetime.fromtimestamp(data['startTime'] / 1000.0).strftime('%Y-%m-%dT%H%M%S.%f') |
basename = "%s-%s-%s" % (parsedurl.hostname, timestamp, urlhash) |
datapath = os.path.join(self.parameters.outdir, basename + ".json") |
screenshotpath = os.path.join(self.parameters.outdir, basename + ".jpg") |
sourcepath = os.path.join(self.parameters.outdir, basename + ".xml") |
try: |