LEFT | RIGHT |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 | 133 |
134 | 134 |
135 Header = _line_type('Header', 'version', '[{.version}]') | 135 Header = _line_type('Header', 'version', '[{.version}]') |
136 EmptyLine = _line_type('EmptyLine', '', '') | 136 EmptyLine = _line_type('EmptyLine', '', '') |
137 Comment = _line_type('Comment', 'text', '! {.text}') | 137 Comment = _line_type('Comment', 'text', '! {.text}') |
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
140 Include = _line_type('Include', 'target', '%include {0.target}%') | 140 Include = _line_type('Include', 'target', '%include {0.target}%') |
141 | 141 |
142 | 142 |
143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)') | 143 METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)') |
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
147 FILTER_OPTIONS_REGEXP = re.compile( | 147 FILTER_OPTIONS_REGEXP = re.compile( |
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
149 ) | 149 ) |
150 | 150 |
151 | 151 |
152 def _parse_header(text): | 152 def _parse_header(text): |
153 match = HEADER_REGEXP.match(text) | 153 match = HEADER_REGEXP.match(text) |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
246 """ | 246 """ |
247 if '#' in text: | 247 if '#' in text: |
248 match = HIDING_FILTER_REGEXP.search(text) | 248 match = HIDING_FILTER_REGEXP.search(text) |
249 if match: | 249 if match: |
250 return _parse_hiding_filter(text, *match.groups()) | 250 return _parse_hiding_filter(text, *match.groups()) |
251 return _parse_blocking_filter(text) | 251 return _parse_blocking_filter(text) |
252 | 252 |
253 | 253 |
254 def parse_line(line_text): | 254 def parse_line(line_text): |
255 """Parse one line of a filter list. | 255 """Parse one line of a filter list. |
| 256 |
| 257 Note that parse_line() doesn't handle special comments, hence never returns |
| 258 a Metadata() object, Adblock Plus only considers metadata when parsing the |
| 259 whole filter list and only if they are given at the top of the filter list. |
256 | 260 |
257 Parameters | 261 Parameters |
258 ---------- | 262 ---------- |
259 line_text : str | 263 line_text : str |
260 Line of a filter list. | 264 Line of a filter list. |
261 | 265 |
262 Returns | 266 Returns |
263 ------- | 267 ------- |
264 namedtuple | 268 namedtuple |
265 Parsed line (see `_line_type`). | 269 Parsed line (see `_line_type`). |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
311 | 315 |
312 """ | 316 """ |
313 metadata_closed = False | 317 metadata_closed = False |
314 | 318 |
315 for line in lines: | 319 for line in lines: |
316 result = parse_line(line) | 320 result = parse_line(line) |
317 | 321 |
318 if isinstance(result, Comment): | 322 if isinstance(result, Comment): |
319 match = METADATA_REGEXP.match(result.text) | 323 match = METADATA_REGEXP.match(result.text) |
320 if match: | 324 if match: |
321 # The regular expression matches as well if we see a | 325 key, value = match.groups() |
322 # malformed key (e.g. "Last modified"). In that case we | 326 |
323 # want to keep looking for more metadata, but yield a | |
324 # Comment instead of a Metadata object. | |
325 # | |
326 # Historically, checksums can occur at the bottom of the | 327 # Historically, checksums can occur at the bottom of the |
327 # filter list. Checksums are no longer used by Adblock Plus, | 328 # filter list. Checksums are no longer used by Adblock Plus, |
328 # but in order to strip them (in abp.filters.renderer), | 329 # but in order to strip them (in abp.filters.renderer), |
329 # we have to make sure to still parse them regardless of | 330 # we have to make sure to still parse them regardless of |
330 # their position in the filter list. | 331 # their position in the filter list. |
331 key, value = match.groups() | 332 if not metadata_closed or key.lower() == 'checksum': |
332 if key and (not metadata_closed or key.lower() == 'checksum'): | 333 yield Metadata(key, value) |
333 result = Metadata(key, value) | 334 continue |
334 | 335 |
335 yield result | 336 if not result.text: |
336 continue | 337 metadata_closed = True |
337 | 338 elif not isinstance(result, Header): |
338 if not isinstance(result, Header): | |
339 metadata_closed = True | 339 metadata_closed = True |
340 | 340 |
341 yield result | 341 yield result |
LEFT | RIGHT |