OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 | 133 |
134 | 134 |
135 Header = _line_type('Header', 'version', '[{.version}]') | 135 Header = _line_type('Header', 'version', '[{.version}]') |
136 EmptyLine = _line_type('EmptyLine', '', '') | 136 EmptyLine = _line_type('EmptyLine', '', '') |
137 Comment = _line_type('Comment', 'text', '! {.text}') | 137 Comment = _line_type('Comment', 'text', '! {.text}') |
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
140 Include = _line_type('Include', 'target', '%include {0.target}%') | 140 Include = _line_type('Include', 'target', '%include {0.target}%') |
141 | 141 |
142 | 142 |
143 METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') | 143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)') |
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
147 FILTER_OPTIONS_REGEXP = re.compile( | 147 FILTER_OPTIONS_REGEXP = re.compile( |
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
149 ) | 149 ) |
150 | 150 |
151 | 151 |
152 def _parse_comment(text): | |
153 match = METADATA_REGEXP.match(text) | |
154 if match: | |
155 return Metadata(match.group(1), match.group(2)) | |
156 return Comment(text[1:].strip()) | |
157 | |
158 | |
159 def _parse_header(text): | 152 def _parse_header(text): |
160 match = HEADER_REGEXP.match(text) | 153 match = HEADER_REGEXP.match(text) |
161 if not match: | 154 if not match: |
162 raise ParseError('Malformed header', text) | 155 raise ParseError('Malformed header', text) |
163 return Header(match.group(1)) | 156 return Header(match.group(1)) |
164 | 157 |
165 | 158 |
166 def _parse_instruction(text): | 159 def _parse_instruction(text): |
167 match = INCLUDE_REGEXP.match(text) | 160 match = INCLUDE_REGEXP.match(text) |
168 if not match: | 161 if not match: |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
277 ParseError: If the line can't be parsed. | 270 ParseError: If the line can't be parsed. |
278 """ | 271 """ |
279 if isinstance(line_text, type(b'')): | 272 if isinstance(line_text, type(b'')): |
280 line_text = line_text.decode('utf-8') | 273 line_text = line_text.decode('utf-8') |
281 | 274 |
282 content = line_text.strip() | 275 content = line_text.strip() |
283 | 276 |
284 if content == '': | 277 if content == '': |
285 line = EmptyLine() | 278 line = EmptyLine() |
286 elif content.startswith('!'): | 279 elif content.startswith('!'): |
287 line = _parse_comment(content) | 280 line = Comment(content[1:].lstrip()) |
288 elif content.startswith('%') and content.endswith('%'): | 281 elif content.startswith('%') and content.endswith('%'): |
289 line = _parse_instruction(content) | 282 line = _parse_instruction(content) |
290 elif content.startswith('[') and content.endswith(']'): | 283 elif content.startswith('[') and content.endswith(']'): |
291 line = _parse_header(content) | 284 line = _parse_header(content) |
292 else: | 285 else: |
293 line = parse_filter(content) | 286 line = parse_filter(content) |
294 | 287 |
295 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 288 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
296 return line | 289 return line |
297 | 290 |
(...skipping 12 matching lines...) Expand all Loading... |
310 Parsed lines of the filter list. | 303 Parsed lines of the filter list. |
311 | 304 |
312 Raises | 305 Raises |
313 ------ | 306 ------ |
314 ParseError | 307 ParseError |
315 Thrown during iteration for invalid filter list lines. | 308 Thrown during iteration for invalid filter list lines. |
316 TypeError | 309 TypeError |
317 If `lines` is not iterable. | 310 If `lines` is not iterable. |
318 | 311 |
319 """ | 312 """ |
| 313 metadata_closed = False |
| 314 |
320 for line in lines: | 315 for line in lines: |
321 yield parse_line(line) | 316 result = parse_line(line) |
| 317 |
| 318 if isinstance(result, Comment): |
| 319 match = METADATA_REGEXP.match(result.text) |
| 320 if match: |
| 321 # The regular expression matches as well if we see a |
| 322 # malformed key (e.g. "Last modified"). In that case we |
| 323 # want to keep looking for more metadata, but yield a |
| 324 # Comment instead of a Metadata object. |
| 325 # |
| 326 # Historically, checksums can occur at the bottom of the |
| 327 # filter list. Checksums are no longer used by Adblock Plus, |
| 328 # but in order to strip them (in abp.filters.renderer), |
| 329 # we have to make sure to still parse them regardless of |
| 330 # their position in the filter list. |
| 331 key, value = match.groups() |
| 332 if key and (not metadata_closed or key.lower() == 'checksum'): |
| 333 result = Metadata(key, value) |
| 334 |
| 335 yield result |
| 336 continue |
| 337 |
| 338 if not isinstance(result, Header): |
| 339 metadata_closed = True |
| 340 |
| 341 yield result |
OLD | NEW |