Index: sitescripts/stats/bin/logprocessor.py |
=================================================================== |
--- a/sitescripts/stats/bin/logprocessor.py |
+++ b/sitescripts/stats/bin/logprocessor.py |
@@ -42,29 +42,48 @@ |
log_regexp = None |
gecko_apps = None |
-def open_stats_file(path): |
- parseresult = urlparse.urlparse(path) |
- if parseresult.scheme == "ssh" and parseresult.username and parseresult.hostname and parseresult.path: |
- command = [ |
- "ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", |
- "-l", parseresult.username, |
- parseresult.hostname, |
- parseresult.path.lstrip("/") |
- ] |
- if parseresult.port: |
- command[1:1] = ["-P", str(parseresult.port)] |
- result = subprocess.Popen(command, stdout=subprocess.PIPE).stdout |
- elif parseresult.scheme in ("http", "https"): |
- result = urllib.urlopen(path) |
- elif os.path.exists(path): |
- result = open(path, "rb") |
- else: |
- raise IOError("Path '%s' not recognized" % path) |
+class StatsFile: |
+ def __init__(self, path): |
+ self._inner_file = None |
+ self._processes = [] |
- if path.endswith(".gz"): |
- # Built-in gzip module doesn't support streaming (fixed in Python 3.2) |
- result = subprocess.Popen(["gzip", "-cd"], stdin=result, stdout=subprocess.PIPE).stdout |
- return result |
+ parseresult = urlparse.urlparse(path) |
+ if parseresult.scheme == "ssh" and parseresult.username and parseresult.hostname and parseresult.path: |
+ command = [ |
+ "ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", |
+ "-l", parseresult.username, |
+ parseresult.hostname, |
+ parseresult.path.lstrip("/") |
+ ] |
+ if parseresult.port: |
+ command[1:1] = ["-P", str(parseresult.port)] |
+ ssh_process = subprocess.Popen(command, stdout=subprocess.PIPE) |
+ self._processes.push(ssh_process) |
+ self._file = ssh_process.stdout |
+ elif parseresult.scheme in ("http", "https"): |
+ self._file = urllib.urlopen(path) |
+ elif os.path.exists(path): |
+ self._file = open(path, "rb") |
+ else: |
+ raise IOError("Path '%s' not recognized" % path) |
+ |
+ self._files.append(result) |
+ |
+ if path.endswith(".gz"): |
+ # Built-in gzip module doesn't support streaming (fixed in Python 3.2) |
+ gzip_process = subprocess.Popen(["gzip", "-cd"], stdin=self._file, stdout=subprocess.PIPE) |
+ self._processes.append(gzip_process) |
+ self._file, self._inner_file = gzip_process.stdout, self._file |
+ |
+ def __getattr__(self, name): |
+ return getattr(self._file, name) |
+ |
+ def close(self): |
+ self._file.close() |
+ if self._inner_file: |
+ self._inner_file.close() |
+ for process in self._processes: |
+ process.wait() |
def get_stats_files(): |
config = get_config() |
@@ -508,7 +527,7 @@ |
geov6 = pygeoip.GeoIP(get_config().get("stats", "geoipv6_db"), pygeoip.MEMORY_CACHE) |
ignored = set() |
- fileobj = open_stats_file(log_file) |
+ fileobj = StatsFile(log_file) |
try: |
data = parse_fileobj(mirror_name, fileobj, geo, geov6, ignored) |
finally: |