Index: sitescripts/extensions/bin/updateExternalFilterlists.py |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/sitescripts/extensions/bin/updateExternalFilterlists.py |
@@ -0,0 +1,100 @@ |
+# coding: utf-8 |
Wladimir Palant
2013/06/24 12:35:10
Probably better to have that script under manageme
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ |
+# This file is part of the Adblock Plus web scripts, |
+# Copyright (C) 2006-2013 Eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ |
+import os, subprocess, codecs, urllib, re |
+from sitescripts.utils import get_config, setupStderr |
+from tempfile import mkdtemp |
+from shutil import rmtree |
+ |
+def resolveRepositoryPath(repositories, path): |
+ repoName = path |
+ if path.find(':') >= 0: |
+ repoName, target = path.split(':', 1) |
+ if not repoName in repositories: |
+ raise Exception('Cannot resolve path to repository "%s", this repository is unknown' % repoName) |
+ |
+ repoPath = repositories[repoName] |
+ relTargetDir = '' |
+ if not repoName == path: |
+ targetDir = os.path.join(repoPath, target) |
+ relTargetDir = os.path.relpath(targetDir, repoPath) |
+ |
+ return repoPath, relTargetDir |
+ |
+ |
+def updateExternalFilterlists(repositories): |
+ settings = readSettings() |
+ for name, setting in settings.iteritems(): |
+ tempdir = mkdtemp(prefix='adblockplus') |
Wladimir Palant
2013/06/24 12:35:10
A more generic specific prefix might be better her
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ repoPath, targetDir = resolveRepositoryPath(repositories, setting['target']) |
+ try: |
Wladimir Palant
2013/06/24 12:35:10
This try statement should immediately follow mkdte
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ subprocess.Popen(['hg', 'clone', '-U', repoPath, tempdir], stdout=subprocess.PIPE).communicate() |
+ subprocess.Popen(['hg', 'up', '-R', tempdir, '-r', 'default'], stdout=subprocess.PIPE).communicate() |
+ |
+ path = os.path.join(tempdir, targetDir) |
+ if not os.path.exists(path): |
+ os.makedirs(path) |
+ |
+ filename = name + '.txt' |
Wladimir Palant
2013/06/24 12:35:10
The settings should really specify the full file n
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ path = os.path.join(path, filename) |
+ exists = os.path.exists(path) |
+ file = codecs.open(path, 'wb', encoding='utf-8') |
+ data = urllib.urlopen(setting['source']).read() |
+ for line in str(data).splitlines(): |
Wladimir Palant
2013/06/24 12:35:10
Why split lines and write/decode each line separat
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ if not line: |
+ continue |
+ print >>file, line.strip().decode('iso-8859-1') |
Wladimir Palant
2013/06/24 12:35:10
The source file encoding should always be UTF-8, d
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ file.close(); |
Wladimir Palant
2013/06/24 12:35:10
No semicolon please.
Thomas Greiner
2013/06/24 14:24:45
Done. :)
|
+ |
+ message = 'Updated copy of external filterlist %s' |
+ if not exists: |
+ subprocess.Popen(['hg', 'add', '-R', tempdir], stdout=subprocess.PIPE).communicate() |
Wladimir Palant
2013/06/24 12:35:10
No need to add the file explicitly, use hg commit
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ message = 'Added copy of external filterlist %s' |
+ subprocess.Popen(['hg', 'commit', '-R', tempdir, '-u', 'hgbot', '-m', message % filename], stdout=subprocess.PIPE).communicate() |
+ subprocess.Popen(['hg', 'push', '-R', tempdir], stdout=subprocess.PIPE).communicate() |
Wladimir Palant
2013/06/24 12:35:10
Please use -q command line parameter for all Mercu
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ finally: |
+ rmtree(tempdir) |
+ |
+def readSettings(): |
+ result = {} |
+ for option in get_config().options('externalFilterlists'): |
Wladimir Palant
2013/06/24 12:35:10
Better:
for option, value in get_config().items('
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ if option.find('_') < 0: |
+ continue |
+ name, setting = option.rsplit('_', 2) |
+ if not setting in ('source', 'target'): |
+ continue |
+ |
+ if not name in result: |
+ result[name] = { |
+ 'source': None, |
+ 'target': None |
+ } |
+ if isinstance(result[name][setting], list): |
Wladimir Palant
2013/06/24 12:35:10
What is that check for? I think we want exactly on
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ result[name][setting] = get_config().get('externalFilterlists', option).split(' ') |
+ else: |
+ result[name][setting] = get_config().get('externalFilterlists', option) |
+ return result |
+ |
+if __name__ == '__main__': |
+ setupStderr() |
+ |
+ repositories = {} |
+ for option, value in get_config().items('subscriptionDownloads'): |
+ if option.endswith('_repository'): |
+ repositories[re.sub(r'_repository$', '', option)] = value |
Wladimir Palant
2013/06/24 12:35:10
This script shouldn't really combine settings from
Thomas Greiner
2013/06/24 14:24:45
Done.
|
+ |
+ updateExternalFilterlists(repositories) |