Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/parser.py

Issue 29979570: Issue 7205 - Change classes that use ALL_CAPS naming to be CamelCase (Closed) Base URL: https://hg.adblockplus.org/python-abp/
Patch Set: Created Jan. 12, 2019, 1:21 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 """Parser for ABP filterlist format.""" 16 """Parser for ABP filterlist format."""
17 17
18 from __future__ import unicode_literals 18 from __future__ import unicode_literals
19 19
20 import re 20 import re
21 from collections import namedtuple 21 from collections import namedtuple
22 22
23 __all__ = [ 23 __all__ = [
24 'FILTER_ACTION', 24 'FilterAction',
25 'FILTER_OPTION', 25 'FilterOption',
26 'SELECTOR_TYPE', 26 'SelectorType',
27 'ParseError', 27 'ParseError',
28 'parse_filterlist', 28 'parse_filterlist',
29 'parse_line', 29 'parse_line',
30 ] 30 ]
31 31
32 32
33 class ParseError(Exception): 33 class ParseError(Exception):
34 """Exception thrown by the parser when it encounters invalid input. 34 """Exception thrown by the parser when it encounters invalid input.
35 35
36 Parameters 36 Parameters
37 ---------- 37 ----------
38 error : str 38 error : str
39 Description of the error. 39 Description of the error.
40 text : str 40 text : str
41 The source text that caused an error. 41 The source text that caused an error.
42 42
43 """ 43 """
44 44
45 def __init__(self, error, text): 45 def __init__(self, error, text):
46 Exception.__init__(self, '{} in "{}"'.format(error, text)) 46 Exception.__init__(self, '{} in "{}"'.format(error, text))
47 self.text = text 47 self.text = text
48 self.error = error 48 self.error = error
49 49
50 50
51 # Constants related to filters (see https://adblockplus.org/filters). 51 # Constants related to filters (see https://adblockplus.org/filters).
52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). 52 class SelectorType: # flake8: noqa (this is a namespace of constants).
Vasily Kuznetsov 2019/01/12 19:23:38 You should be able to delete the comment that disa
rhowell 2019/01/12 21:18:34 Done.
53 """Selector type constants.""" 53 """Selector type constants."""
54 54
55 URL_PATTERN = 'url-pattern' # Normal URL patterns. 55 URL_PATTERN = 'url-pattern' # Normal URL patterns.
56 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. 56 URL_REGEXP = 'url-regexp' # Regular expressions for URLs.
57 CSS = 'css' # CSS selectors for hiding filters. 57 CSS = 'css' # CSS selectors for hiding filters.
58 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). 58 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4).
59 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. 59 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax.
60 60
61 61
62 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). 62 class FilterAction: # flake8: noqa (this is a namespace of constants).
63 """Filter action constants.""" 63 """Filter action constants."""
64 64
65 BLOCK = 'block' # Block the request. 65 BLOCK = 'block' # Block the request.
66 ALLOW = 'allow' # Allow the request (whitelist). 66 ALLOW = 'allow' # Allow the request (whitelist).
67 HIDE = 'hide' # Hide selected element(s). 67 HIDE = 'hide' # Hide selected element(s).
68 SHOW = 'show' # Show selected element(s) (whitelist). 68 SHOW = 'show' # Show selected element(s) (whitelist).
69 69
70 70
71 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). 71 class FilterOption: # flake8: noqa (this is a namespace of constants).
72 """Filter option constants.""" 72 """Filter option constants."""
73 73
74 # Resource types. 74 # Resource types.
75 OTHER = 'other' 75 OTHER = 'other'
76 SCRIPT = 'script' 76 SCRIPT = 'script'
77 IMAGE = 'image' 77 IMAGE = 'image'
78 STYLESHEET = 'stylesheet' 78 STYLESHEET = 'stylesheet'
79 OBJECT = 'object' 79 OBJECT = 'object'
80 SUBDOCUMENT = 'subdocument' 80 SUBDOCUMENT = 'subdocument'
81 DOCUMENT = 'document' 81 DOCUMENT = 'document'
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 return option.split('=', 1) 164 return option.split('=', 1)
165 if option.startswith('~'): 165 if option.startswith('~'):
166 return option[1:], False 166 return option[1:], False
167 return option, True 167 return option, True
168 168
169 169
170 def _parse_filter_option(option): 170 def _parse_filter_option(option):
171 name, value = _parse_option(option) 171 name, value = _parse_option(option)
172 172
173 # Handle special cases of multivalued options. 173 # Handle special cases of multivalued options.
174 if name == FILTER_OPTION.DOMAIN: 174 if name == FilterOption.DOMAIN:
175 value = [_parse_option(o) for o in value.split('|')] 175 value = [_parse_option(o) for o in value.split('|')]
176 elif name == FILTER_OPTION.SITEKEY: 176 elif name == FilterOption.SITEKEY:
177 value = value.split('|') 177 value = value.split('|')
178 178
179 return name, value 179 return name, value
180 180
181 181
182 def _parse_filter_options(options): 182 def _parse_filter_options(options):
183 return [_parse_filter_option(o) for o in options.split(',')] 183 return [_parse_filter_option(o) for o in options.split(',')]
184 184
185 185
186 def _parse_blocking_filter(text): 186 def _parse_blocking_filter(text):
187 # Based on RegExpFilter.fromText in lib/filterClasses.js 187 # Based on RegExpFilter.fromText in lib/filterClasses.js
188 # in https://hg.adblockplus.org/adblockpluscore. 188 # in https://hg.adblockplus.org/adblockpluscore.
189 action = FILTER_ACTION.BLOCK 189 action = FilterAction.BLOCK
190 options = [] 190 options = []
191 selector = text 191 selector = text
192 192
193 if selector.startswith('@@'): 193 if selector.startswith('@@'):
194 action = FILTER_ACTION.ALLOW 194 action = FilterAction.ALLOW
195 selector = selector[2:] 195 selector = selector[2:]
196 196
197 if '$' in selector: 197 if '$' in selector:
198 opt_match = FILTER_OPTIONS_REGEXP.search(selector) 198 opt_match = FILTER_OPTIONS_REGEXP.search(selector)
199 if opt_match: 199 if opt_match:
200 selector = selector[:opt_match.start(0)] 200 selector = selector[:opt_match.start(0)]
201 options = _parse_filter_options(opt_match.group(1)) 201 options = _parse_filter_options(opt_match.group(1))
202 202
203 if (len(selector) > 1 203 if (len(selector) > 1
204 and selector.startswith('/') and selector.endswith('/')): 204 and selector.startswith('/') and selector.endswith('/')):
205 selector = {'type': SELECTOR_TYPE.URL_REGEXP, 'value': selector[1:-1]} 205 selector = {'type': SelectorType.URL_REGEXP, 'value': selector[1:-1]}
206 else: 206 else:
207 selector = {'type': SELECTOR_TYPE.URL_PATTERN, 'value': selector} 207 selector = {'type': SelectorType.URL_PATTERN, 'value': selector}
208 208
209 return Filter(text, selector, action, options) 209 return Filter(text, selector, action, options)
210 210
211 211
212 def _parse_hiding_filter(text, domain, type_flag, selector_value): 212 def _parse_hiding_filter(text, domain, type_flag, selector_value):
213 selector = {'type': SELECTOR_TYPE.CSS, 'value': selector_value} 213 selector = {'type': SelectorType.CSS, 'value': selector_value}
214 action = FILTER_ACTION.HIDE 214 action = FilterAction.HIDE
215 options = [] 215 options = []
216 216
217 if type_flag == '@': 217 if type_flag == '@':
218 action = FILTER_ACTION.SHOW 218 action = FilterAction.SHOW
219 elif type_flag == '?': 219 elif type_flag == '?':
220 selector['type'] = SELECTOR_TYPE.XCSS 220 selector['type'] = SelectorType.XCSS
221 221
222 if domain: 222 if domain:
223 domains = [_parse_option(d) for d in domain.split(',')] 223 domains = [_parse_option(d) for d in domain.split(',')]
224 options.append((FILTER_OPTION.DOMAIN, domains)) 224 options.append((FilterOption.DOMAIN, domains))
225 225
226 return Filter(text, selector, action, options) 226 return Filter(text, selector, action, options)
227 227
228 228
229 def parse_filter(text): 229 def parse_filter(text):
230 """Parse one filter. 230 """Parse one filter.
231 231
232 Parameters 232 Parameters
233 ---------- 233 ----------
234 text : str 234 text : str
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
338 for line in lines: 338 for line in lines:
339 parsed_line = parse_line(line, position) 339 parsed_line = parse_line(line, position)
340 yield parsed_line 340 yield parsed_line
341 341
342 if position != 'body' and parsed_line.type in {'header', 'metadata'}: 342 if position != 'body' and parsed_line.type in {'header', 'metadata'}:
343 # Continue parsing metadata until it's over... 343 # Continue parsing metadata until it's over...
344 position = 'metadata' 344 position = 'metadata'
345 else: 345 else:
346 # ...then switch to parsing the body. 346 # ...then switch to parsing the body.
347 position = 'body' 347 position = 'body'
OLDNEW
« no previous file with comments | « abp/filters/__init__.py ('k') | tests/test_parser.py » ('j') | tests/test_parser.py » ('J')

Powered by Google App Engine
This is Rietveld