Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/stats/bin/logprocessor.py

Issue 6231865702744064: Issue 2239 - Make logprocessor close files and wait for subprocesses (Closed)
Patch Set: Created March 31, 2015, 10:06 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/stats/bin/logprocessor.py
===================================================================
--- a/sitescripts/stats/bin/logprocessor.py
+++ b/sitescripts/stats/bin/logprocessor.py
@@ -42,29 +42,48 @@
log_regexp = None
gecko_apps = None
-def open_stats_file(path):
- parseresult = urlparse.urlparse(path)
- if parseresult.scheme == "ssh" and parseresult.username and parseresult.hostname and parseresult.path:
- command = [
- "ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k",
- "-l", parseresult.username,
- parseresult.hostname,
- parseresult.path.lstrip("/")
- ]
- if parseresult.port:
- command[1:1] = ["-P", str(parseresult.port)]
- result = subprocess.Popen(command, stdout=subprocess.PIPE).stdout
- elif parseresult.scheme in ("http", "https"):
- result = urllib.urlopen(path)
- elif os.path.exists(path):
- result = open(path, "rb")
- else:
- raise IOError("Path '%s' not recognized" % path)
+class StatsFile:
+ def __init__(self, path):
+ self._inner_file = None
+ self._processes = []
- if path.endswith(".gz"):
- # Built-in gzip module doesn't support streaming (fixed in Python 3.2)
- result = subprocess.Popen(["gzip", "-cd"], stdin=result, stdout=subprocess.PIPE).stdout
- return result
+ parseresult = urlparse.urlparse(path)
+ if parseresult.scheme == "ssh" and parseresult.username and parseresult.hostname and parseresult.path:
+ command = [
+ "ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k",
+ "-l", parseresult.username,
+ parseresult.hostname,
+ parseresult.path.lstrip("/")
+ ]
+ if parseresult.port:
+ command[1:1] = ["-P", str(parseresult.port)]
+ ssh_process = subprocess.Popen(command, stdout=subprocess.PIPE)
+ self._processes.push(ssh_process)
+ self._file = ssh_process.stdout
+ elif parseresult.scheme in ("http", "https"):
+ self._file = urllib.urlopen(path)
+ elif os.path.exists(path):
+ self._file = open(path, "rb")
+ else:
+ raise IOError("Path '%s' not recognized" % path)
+
+ self._files.append(result)
+
+ if path.endswith(".gz"):
+ # Built-in gzip module doesn't support streaming (fixed in Python 3.2)
+ gzip_process = subprocess.Popen(["gzip", "-cd"], stdin=self._file, stdout=subprocess.PIPE)
+ self._processes.append(gzip_process)
+ self._file, self._inner_file = gzip_process.stdout, self._file
+
+ def __getattr__(self, name):
+ return getattr(self._file, name)
+
+ def close(self):
+ self._file.close()
+ if self._inner_file:
+ self._inner_file.close()
+ for process in self._processes:
+ process.wait()
def get_stats_files():
config = get_config()
@@ -508,7 +527,7 @@
geov6 = pygeoip.GeoIP(get_config().get("stats", "geoipv6_db"), pygeoip.MEMORY_CACHE)
ignored = set()
- fileobj = open_stats_file(log_file)
+ fileobj = StatsFile(log_file)
try:
data = parse_fileobj(mirror_name, fileobj, geo, geov6, ignored)
finally:
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld