Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)
Patch Set: Fixed typo and moved logic to parse_filterlist() Created Sept. 4, 2018, 3:43 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: abp/filters/parser.py
===================================================================
--- a/abp/filters/parser.py
+++ b/abp/filters/parser.py
@@ -140,7 +140,7 @@
Include = _line_type('Include', 'target', '%include {0.target}%')
-METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)')
+METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)')
INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
@@ -149,13 +149,6 @@
)
-def _parse_comment(text):
- match = METADATA_REGEXP.match(text)
- if match:
- return Metadata(match.group(1), match.group(2))
- return Comment(text[1:].strip())
-
-
def _parse_header(text):
match = HEADER_REGEXP.match(text)
if not match:
@@ -284,7 +277,7 @@
if content == '':
line = EmptyLine()
elif content.startswith('!'):
- line = _parse_comment(content)
+ line = Comment(content[1:].lstrip())
elif content.startswith('%') and content.endswith('%'):
line = _parse_instruction(content)
elif content.startswith('[') and content.endswith(']'):
@@ -317,5 +310,32 @@
If `lines` is not iterable.
"""
+ metadata_closed = False
+
for line in lines:
- yield parse_line(line)
+ result = parse_line(line)
+
+ if isinstance(result, Comment):
+ match = METADATA_REGEXP.match(result.text)
+ if match:
+ # The regular expression matches as well if we see a
+ # malformed key (e.g. "Last modified"). In that case we
+ # want to keep looking for more metadata, but yield a
+ # Comment instead of a Metadata object.
+ #
+ # Historically, checksums can occur at the bottom of the
+ # filter list. Checksums are no longer used by Adblock Plus,
+ # but in order to strip them (in abp.filters.renderer),
+ # we have to make sure to still parse them regardless of
+ # their position in the filter list.
+ key, value = match.groups()
+ if key and (not metadata_closed or key.lower() == 'checksum'):
+ result = Metadata(key, value)
+
+ yield result
+ continue
+
+ if not isinstance(result, Header):
+ metadata_closed = True
+
+ yield result
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld