flake8-abp/flake8_abp.py - Issue 29342824: Issue 4044 - Added handling for __future__ unicode_literals import to check_quotes()

Delta Between Two Patch Sets: flake8-abp/flake8_abp.py

Issue 29342824: Issue 4044 - Added handling for __future__ unicode_literals import to check_quotes() (Closed)

Left Patch Set: handling for new warning A112 added Created May 24, 2016, 9:58 p.m.

Right Patch Set: removed redundant comment Created June 2, 2016, 5:45 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 # This file is part of Adblock Plus <https://adblockplus.org/>,	1 # This file is part of Adblock Plus <https://adblockplus.org/>,

2 # Copyright (C) 2006-2016 Eyeo GmbH	2 # Copyright (C) 2006-2016 Eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 26 matching lines...) Expand all Loading...
37 're.match': 're.search',	37 're.match': 're.search',

38 'codecs.open': 'io.open',	38 'codecs.open': 'io.open',

39 }	39 }

40	40

41 ESSENTIAL_BUILTINS = set(dir(builtins)) - {'apply', 'buffer', 'coerce',	41 ESSENTIAL_BUILTINS = set(dir(builtins)) - {'apply', 'buffer', 'coerce',

42 'intern', 'file'}	42 'intern', 'file'}

43	43

44 LEAVE_BLOCK = (ast.Return, ast.Raise, ast.Continue, ast.Break)	44 LEAVE_BLOCK = (ast.Return, ast.Raise, ast.Continue, ast.Break)

45 VOLATILE = object()	45 VOLATILE = object()

46	46

47 IS_UNICODE_LITERALS = False
Sebastian Noack 2016/05/25 08:31:36 Uppercase notation is only for variables that are Uppercase notation is only for variables that are considered constant (i.e. never change).
48

49	47

50 def evaluate(node):	48 def evaluate(node):

51 try:	49 try:

52 return eval(compile(ast.Expression(node), '', 'eval'), {})	50 return eval(compile(ast.Expression(node), '', 'eval'), {})

53 except Exception:	51 except Exception:

54 return VOLATILE	52 return VOLATILE

55	53

56	54

57 def is_const(node):	55 def is_const(node):

58 return evaluate(node) is not VOLATILE	56 return evaluate(node) is not VOLATILE

(...skipping 220 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
279 substitute = DISCOURAGED_APIS.get(name)	277 substitute = DISCOURAGED_APIS.get(name)

280 if substitute:	278 if substitute:

281 self.errors.append((node, 'A301 use {}() instead of '	279 self.errors.append((node, 'A301 use {}() instead of '

282 '{}()'.format(substitute, name)))	280 '{}()'.format(substitute, name)))

283	281

284 def visit_Call(self, node):	282 def visit_Call(self, node):

285 func = get_identifier(node.func)	283 func = get_identifier(node.func)

286 arg = next(iter(node.args), None)	284 arg = next(iter(node.args), None)

287 redundant_literal = False	285 redundant_literal = False

288	286

289 if isinstance(arg, ast.Lambda) and func in {'map', 'filter',	287 if isinstance(arg, ast.Lambda):

290 'imap', 'ifilter',	288 if len(node.args) == 2 and func in {'map', 'filter',

291 'itertools.imap',	289 'imap', 'ifilter',

292 'itertools.ifilter'}:	290 'itertools.imap',

293 self.errors.append((node, 'A104 use a comprehension '	291 'itertools.ifilter'}:

294 'instead of calling {}() with '	292 self.errors.append((node, 'A104 use a comprehension '

295 'lambda function'.format(func)))	293 'instead of calling {}() with '

	294 'lambda function'.format(func)))

296 elif isinstance(arg, (ast.List, ast.Tuple)):	295 elif isinstance(arg, (ast.List, ast.Tuple)):

297 if func == 'dict':	296 if func == 'dict':

298 redundant_literal = all(isinstance(elt, (ast.Tuple, ast.List))	297 redundant_literal = all(isinstance(elt, (ast.Tuple, ast.List))

299 for elt in arg.elts)	298 for elt in arg.elts)

300 else:	299 else:

301 redundant_literal = func in {'list', 'set', 'tuple'}	300 redundant_literal = func in {'list', 'set', 'tuple'}

302 elif isinstance(arg, (ast.ListComp, ast.GeneratorExp)):	301 elif isinstance(arg, (ast.ListComp, ast.GeneratorExp)):

303 if func == 'dict':	302 if func == 'dict':

304 redundant_literal = isinstance(arg.elt, (ast.Tuple, ast.List))	303 redundant_literal = isinstance(arg.elt, (ast.Tuple, ast.List))

305 else:	304 else:

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
370	369

371	370

372 def check_non_default_encoding(physical_line, line_number):	371 def check_non_default_encoding(physical_line, line_number):

373 if line_number <= 2 and re.search(r'^\s#.coding[:=]', physical_line):	372 if line_number <= 2 and re.search(r'^\s#.coding[:=]', physical_line):

374 return (0, 'A303 non-default file encoding')	373 return (0, 'A303 non-default file encoding')

375	374

376 check_non_default_encoding.name = 'abp-non-default-encoding'	375 check_non_default_encoding.name = 'abp-non-default-encoding'

377 check_non_default_encoding.version = __version__	376 check_non_default_encoding.version = __version__

378	377

379	378

380 def check_quotes(logical_line, tokens, previous_logical):	379 def check_quotes(logical_line, tokens, previous_logical, checker_state):

381 first_token = True	380 first_token = True

382 global IS_UNICODE_LITERALS	381

383

384 # --- check if this is beginning of file
Sebastian Noack 2016/05/25 08:31:36 We generally don use --- in comments. So please re We generally don use --- in comments. So please remove it for consistency. Vasily Kuznetsov 2016/05/25 13:55:31 Also pep8 recommends capitalising the first letter Show quoted text On 2016/05/25 08:31:36, Sebastian Noack wrote: > We generally don use --- in comments. So please remove it for consistency. Also pep8 recommends capitalising the first letter in a sentence (see https://www.python.org/dev/peps/pep-0008/#comments).
385 if tokens[0][3][0] == 1:

386 IS_UNICODE_LITERALS = False

387

388 # --- check if in unicode_literals mode

389 token_strings = [t[1] for t in tokens]	382 token_strings = [t[1] for t in tokens]
Sebastian Noack 2016/05/25 08:31:35 I wonder whether we should also check for the toke I wonder whether we should also check for the token type to be NAME. Vasily Kuznetsov 2016/05/25 13:55:32 Do we really need to? Can you imagine a line that Show quoted text On 2016/05/25 08:31:35, Sebastian Noack wrote: > I wonder whether we should also check for the token type to be NAME. Do we really need to? Can you imagine a line that starts with ['from', '__future__', 'import'] and has 'unicode_literals' in the tail and they are not names? Sebastian Noack 2016/05/25 14:45:29 I feel that checking for the token type is more co Show quoted text On 2016/05/25 13:55:32, Vasily Kuznetsov wrote: > On 2016/05/25 08:31:35, Sebastian Noack wrote: > > I wonder whether we should also check for the token type to be NAME. > > Do we really need to? Can you imagine a line that starts with ['from', > '__future__', 'import'] and has 'unicode_literals' in the tail and they are not > names? I feel that checking for the token type is more correct (less code assumptions). But I guess, I don't insist, if it makes the check so much simpler. Vasily Kuznetsov 2016/05/25 16:25:56 I see, it does feel a bit sloppy to just check the Show quoted text On 2016/05/25 14:45:29, Sebastian Noack wrote: > On 2016/05/25 13:55:32, Vasily Kuznetsov wrote: > > On 2016/05/25 08:31:35, Sebastian Noack wrote: > > > I wonder whether we should also check for the token type to be NAME. > > > > Do we really need to? Can you imagine a line that starts with ['from', > > '__future__', 'import'] and has 'unicode_literals' in the tail and they are > not > > names? > > I feel that checking for the token type is more correct (less code assumptions). > But I guess, I don't insist, if it makes the check so much simpler. I see, it does feel a bit sloppy to just check the content of the tokens and not the type. But after more thinking about it, it seems reasonable to me in this case. What we're assuming is some facts about syntax of Python and it seems like a pretty safe assumption. So I'd probably prefer keeping the check as it is, since it's shorter and easier to read this way.
390 if token_strings[:3] == ['from', '__future__', 'import']:	383 future_import = token_strings[:3] == ['from', '__future__', 'import']

391 IS_UNICODE_LITERALS = 'unicode_literals' in token_strings	384
Vasily Kuznetsov 2016/05/25 13:55:32 Won't this break if it gets a piece of code like t Won't this break if it gets a piece of code like this? from __future__ import unicode_literals from __future__ import print_function We probably only need to turn on the unicode literals flag when we see that 'unicode_literals' is imported and we never want to turn it off (save for first line of the file).
	385 if future_import and 'unicode_literals' in token_strings:

	386 checker_state['has_unicode_literals'] = True

392	387

393 for kind, token, start, end, _ in tokens:	388 for kind, token, start, end, _ in tokens:

394 if kind == tokenize.INDENT or kind == tokenize.DEDENT:	389 if kind == tokenize.INDENT or kind == tokenize.DEDENT:

395 continue	390 continue

396	391

397 if kind == tokenize.STRING:	392 if kind == tokenize.STRING:

398 match = re.search(r'^(u)?(b)?(r)?((""")?.*)$',	393 match = re.search(r'^([rub])([\'"]{1,3})(.)\2$',

399 token, re.IGNORECASE \| re.DOTALL)	394 token, re.IGNORECASE \| re.DOTALL)

400 (is_unicode, is_bytes, is_raw,	395 prefixes, quote, text = match.groups()

401 literal, has_doc_quotes) = match.groups()	396 prefixes = prefixes.lower()

	397

	398 if 'u' in prefixes:

	399 yield (start, 'A112 use "from __future__ import '

	400 'unicode_literals" instead of '

	401 'prefixing literals with "u"')

402	402

403 if first_token and re.search(r'^(?:(?:def\|class)\s\|$)',	403 if first_token and re.search(r'^(?:(?:def\|class)\s\|$)',

404 previous_logical):	404 previous_logical):

405 if not has_doc_quotes:	405 if quote != '"""':

406 yield (start, 'A109 use triple double '	406 yield (start, 'A109 use triple double '

407 'quotes for docstrings')	407 'quotes for docstrings')

408 elif is_unicode or is_bytes or is_raw:	408 elif start[0] != end[0]:

409 yield (start, "A109 don't use u'', b'' "	409 pass

410 "or r'' for doc strings")	410 elif 'r' in prefixes:

411 elif start[0] == end[0]:	411 if quote != "'" and not (quote == '"' and "'" in text):

412 if is_raw:	412 yield (start, 'A110 use single quotes for raw string')

413 literal = re.sub(r'\\(?!{})'.format(literal[0]),	413 else:

414 '\\\\\\\\', literal)	414 prefix = ''

415 if sys.version_info[0] >= 3:	415 if sys.version_info[0] >= 3:

416 if is_bytes:	416 if 'b' in prefixes:

417 literal = 'b' + literal	417 prefix = 'b'

418 elif is_unicode and not IS_UNICODE_LITERALS:	418 else:
Sebastian Noack 2016/05/25 08:31:35 It seems the check for IS_UNICODE_LITERALS is inco It seems the check for IS_UNICODE_LITERALS is incorrect. Also the check for is_unicode should probably go even above the check for Python 3. After all, the message indicates, and we agreed on, to never use strings like u''. Vasily Kuznetsov 2016/05/25 13:55:32 I second that it should come before the Python 3 c Show quoted text On 2016/05/25 08:31:35, Sebastian Noack wrote: > It seems the check for IS_UNICODE_LITERALS is incorrect. Also the check for > is_unicode should probably go even above the check for Python 3. After all, the > message indicates, and we agreed on, to never use strings like u''. I second that it should come before the Python 3 check. Also, even if we yield A112, we should still yield the other errors, so if we see something like u"foo", we get both A110 and A112. Sebastian Noack 2016/05/25 14:45:29 Just an idea, how about moving it even above the c Show quoted text On 2016/05/25 13:55:32, Vasily Kuznetsov wrote: > On 2016/05/25 08:31:35, Sebastian Noack wrote: > > It seems the check for IS_UNICODE_LITERALS is incorrect. Also the check for > > is_unicode should probably go even above the check for Python 3. After all, > the > > message indicates, and we agreed on, to never use strings like u''. > > I second that it should come before the Python 3 check. Also, even if we yield > A112, we should still yield the other errors, so if we see something like > u"foo", we get both A110 and A112. Just an idea, how about moving it even above the check for docstrings and merge A112 with A109? Docstrings using r or b is probably a case we can give up on. I never saw them in real code, and since Python 3 strings prefixed with b are not even recognized as docstrings. Vasily Kuznetsov 2016/05/25 16:25:56 Moving the check to above the check for docstrings Show quoted text On 2016/05/25 14:45:29, Sebastian Noack wrote: > On 2016/05/25 13:55:32, Vasily Kuznetsov wrote: > > On 2016/05/25 08:31:35, Sebastian Noack wrote: > > > It seems the check for IS_UNICODE_LITERALS is incorrect. Also the check for > > > is_unicode should probably go even above the check for Python 3. After all, > > the > > > message indicates, and we agreed on, to never use strings like u''. > > > > I second that it should come before the Python 3 check. Also, even if we yield > > A112, we should still yield the other errors, so if we see something like > > u"foo", we get both A110 and A112. > > Just an idea, how about moving it even above the check for docstrings and merge > A112 with A109? Docstrings using r or b is probably a case we can give up on. I > never saw them in real code, and since Python 3 strings prefixed with b are not > even recognized as docstrings. Moving the check to above the check for docstrings seems like a good idea. As for merging, I suppose you mean replacing lines 408-410 with this new check and raising A112 in that case. This sounds good too.
419 yield(start, 'A112 use "from __future__ import"'	419 u_literals = checker_state.get('has_unicode_literals')
Sebastian Noack 2016/05/25 08:31:35 There should be a space after "yield". Otherwise i There should be a space after "yield". Otherwise it looks like yield is a function that we call. However, it's a statement and a tuple. Sebastian Noack 2016/05/25 08:31:35 Please document A112 in the README. Please document A112 in the README.
420 'unicode_literals instead of prefixing'	420 if 'u' in prefixes or u_literals and 'b' not in prefixes:
Sebastian Noack 2016/05/25 08:31:35 Please indent long error messages like it's done i Please indent long error messages like it's done in the rest of this script, i.e. align the starting quotes. Also note that you forgot to add spaces in the end. So this message will be printed as: A112 use "from __future__ import"unicode_literals instead of prefixingliterals with "u"
421 'literals with "u"')	421 prefix = 'u'

422 elif not is_bytes:	422

423 literal = 'u' + literal	423 literal = '{0}{1}{2}{1}'.format(prefix, quote, text)

424

425 if ascii(eval(literal)) != literal:	424 if ascii(eval(literal)) != literal:

426 yield (start, "A110 string literal doesn't match "	425 yield (start, "A110 string literal doesn't match "

427 '{}()'.format(ascii.__name__))	426 '{}()'.format(ascii.__name__))

428	427

429 first_token = False	428 first_token = False

430	429

431 check_quotes.name = 'abp-quotes'	430 check_quotes.name = 'abp-quotes'

432 check_quotes.version = __version__	431 check_quotes.version = __version__

433	432

434	433

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
477 if tokens[i + 1][:2] != (tokenize.OP, ':'):	476 if tokens[i + 1][:2] != (tokenize.OP, ':'):

478 break	477 break

479	478

480 return [(pos, 'A111 redundant parenthesis for {} '	479 return [(pos, 'A111 redundant parenthesis for {} '

481 'statement'.format(statement))]	480 'statement'.format(statement))]

482	481

483 return []	482 return []

484	483

485 check_redundant_parenthesis.name = 'abp-redundant-parenthesis'	484 check_redundant_parenthesis.name = 'abp-redundant-parenthesis'

486 check_redundant_parenthesis.version = __version__	485 check_redundant_parenthesis.version = __version__

LEFT	RIGHT