Index: abp/filters/parser.py |
=================================================================== |
--- a/abp/filters/parser.py |
+++ b/abp/filters/parser.py |
@@ -140,7 +140,7 @@ |
Include = _line_type('Include', 'target', '%include {0.target}%') |
-METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') |
+METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)') |
INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
@@ -149,13 +149,6 @@ |
) |
-def _parse_comment(text): |
- match = METADATA_REGEXP.match(text) |
- if match: |
- return Metadata(match.group(1), match.group(2)) |
- return Comment(text[1:].strip()) |
- |
- |
def _parse_header(text): |
match = HEADER_REGEXP.match(text) |
if not match: |
@@ -284,7 +277,7 @@ |
if content == '': |
line = EmptyLine() |
elif content.startswith('!'): |
- line = _parse_comment(content) |
+ line = Comment(content[1:].lstrip()) |
elif content.startswith('%') and content.endswith('%'): |
line = _parse_instruction(content) |
elif content.startswith('[') and content.endswith(']'): |
@@ -317,5 +310,32 @@ |
If `lines` is not iterable. |
""" |
+ metadata_closed = False |
+ |
for line in lines: |
- yield parse_line(line) |
+ result = parse_line(line) |
+ |
+ if isinstance(result, Comment): |
+ match = METADATA_REGEXP.match(result.text) |
+ if match: |
+ # The regular expression matches as well if we see a |
+ # malformed key (e.g. "Last modified"). In that case we |
+ # want to keep looking for more metadata, but yield a |
+ # Comment instead of a Metadata object. |
+ # |
+ # Historically, checksums can occur at the bottom of the |
+ # filter list. Checksums are no longer used by Adblock Plus, |
+ # but in order to strip them (in abp.filters.renderer), |
+ # we have to make sure to still parse them regardless of |
+ # their position in the filter list. |
+ key, value = match.groups() |
+ if key and (not metadata_closed or key.lower() == 'checksum'): |
+ result = Metadata(key, value) |
+ |
+ yield result |
+ continue |
+ |
+ if not isinstance(result, Header): |
+ metadata_closed = True |
+ |
+ yield result |