gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / pylint / checkers / strings.py @ 745
History | View | Annotate | Download (26.5 KB)
1 |
# Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard
|
---|---|
2 |
# Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
|
3 |
# Copyright 2012 Google Inc.
|
4 |
#
|
5 |
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
6 |
# This program is free software; you can redistribute it and/or modify it under
|
7 |
# the terms of the GNU General Public License as published by the Free Software
|
8 |
# Foundation; either version 2 of the License, or (at your option) any later
|
9 |
# version.
|
10 |
#
|
11 |
# This program is distributed in the hope that it will be useful, but WITHOUT
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
|
14 |
#
|
15 |
# You should have received a copy of the GNU General Public License along with
|
16 |
# this program; if not, write to the Free Software Foundation, Inc.,
|
17 |
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
18 |
"""Checker for string formatting operations.
|
19 |
"""
|
20 |
|
21 |
import sys |
22 |
import tokenize |
23 |
import string |
24 |
import numbers |
25 |
|
26 |
import six |
27 |
|
28 |
import astroid |
29 |
from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker |
30 |
from pylint.checkers import BaseChecker, BaseTokenChecker |
31 |
from pylint.checkers import utils |
32 |
from pylint.checkers.utils import check_messages |
33 |
|
34 |
|
35 |
_PY3K = sys.version_info[:2] >= (3, 0) |
36 |
_PY27 = sys.version_info[:2] == (2, 7) |
37 |
|
38 |
MSGS = { |
39 |
'E1300': ("Unsupported format character %r (%#02x) at index %d", |
40 |
"bad-format-character",
|
41 |
"Used when a unsupported format character is used in a format\
|
42 |
string."),
|
43 |
'E1301': ("Format string ends in middle of conversion specifier", |
44 |
"truncated-format-string",
|
45 |
"Used when a format string terminates before the end of a \
|
46 |
conversion specifier."),
|
47 |
'E1302': ("Mixing named and unnamed conversion specifiers in format string", |
48 |
"mixed-format-string",
|
49 |
"Used when a format string contains both named (e.g. '%(foo)d') \
|
50 |
and unnamed (e.g. '%d') conversion specifiers. This is also \
|
51 |
used when a named conversion specifier contains * for the \
|
52 |
minimum field width and/or precision."),
|
53 |
'E1303': ("Expected mapping for format string, not %s", |
54 |
"format-needs-mapping",
|
55 |
"Used when a format string that uses named conversion specifiers \
|
56 |
is used with an argument that is not a mapping."),
|
57 |
'W1300': ("Format string dictionary key should be a string, not %s", |
58 |
"bad-format-string-key",
|
59 |
"Used when a format string that uses named conversion specifiers \
|
60 |
is used with a dictionary whose keys are not all strings."),
|
61 |
'W1301': ("Unused key %r in format string dictionary", |
62 |
"unused-format-string-key",
|
63 |
"Used when a format string that uses named conversion specifiers \
|
64 |
is used with a dictionary that conWtains keys not required by the \
|
65 |
format string."),
|
66 |
'E1304': ("Missing key %r in format string dictionary", |
67 |
"missing-format-string-key",
|
68 |
"Used when a format string that uses named conversion specifiers \
|
69 |
is used with a dictionary that doesn't contain all the keys \
|
70 |
required by the format string."),
|
71 |
'E1305': ("Too many arguments for format string", |
72 |
"too-many-format-args",
|
73 |
"Used when a format string that uses unnamed conversion \
|
74 |
specifiers is given too many arguments."),
|
75 |
'E1306': ("Not enough arguments for format string", |
76 |
"too-few-format-args",
|
77 |
"Used when a format string that uses unnamed conversion \
|
78 |
specifiers is given too few arguments"),
|
79 |
'E1310': ("Suspicious argument in %s.%s call", |
80 |
"bad-str-strip-call",
|
81 |
"The argument to a str.{l,r,}strip call contains a"
|
82 |
" duplicate character, "),
|
83 |
'W1302': ("Invalid format string", |
84 |
"bad-format-string",
|
85 |
"Used when a PEP 3101 format string is invalid.",
|
86 |
{'minversion': (2, 7)}), |
87 |
'W1303': ("Missing keyword argument %r for format string", |
88 |
"missing-format-argument-key",
|
89 |
"Used when a PEP 3101 format string that uses named fields "
|
90 |
"doesn't receive one or more required keywords.",
|
91 |
{'minversion': (2, 7)}), |
92 |
'W1304': ("Unused format argument %r", |
93 |
"unused-format-string-argument",
|
94 |
"Used when a PEP 3101 format string that uses named "
|
95 |
"fields is used with an argument that "
|
96 |
"is not required by the format string.",
|
97 |
{'minversion': (2, 7)}), |
98 |
'W1305': ("Format string contains both automatic field numbering " |
99 |
"and manual field specification",
|
100 |
"format-combined-specification",
|
101 |
"Usen when a PEP 3101 format string contains both automatic "
|
102 |
"field numbering (e.g. '{}') and manual field "
|
103 |
"specification (e.g. '{0}').",
|
104 |
{'minversion': (2, 7)}), |
105 |
'W1306': ("Missing format attribute %r in format specifier %r", |
106 |
"missing-format-attribute",
|
107 |
"Used when a PEP 3101 format string uses an "
|
108 |
"attribute specifier ({0.length}), but the argument "
|
109 |
"passed for formatting doesn't have that attribute.",
|
110 |
{'minversion': (2, 7)}), |
111 |
'W1307': ("Using invalid lookup key %r in format specifier %r", |
112 |
"invalid-format-index",
|
113 |
"Used when a PEP 3101 format string uses a lookup specifier "
|
114 |
"({a[1]}), but the argument passed for formatting "
|
115 |
"doesn't contain or doesn't have that key as an attribute.",
|
116 |
{'minversion': (2, 7)}) |
117 |
} |
118 |
|
119 |
OTHER_NODES = (astroid.Const, astroid.List, astroid.Repr, |
120 |
astroid.Lambda, astroid.FunctionDef, |
121 |
astroid.ListComp, astroid.SetComp, astroid.GeneratorExp) |
122 |
|
123 |
if _PY3K:
|
124 |
import _string # pylint: disable=wrong-import-position, wrong-import-order |
125 |
|
126 |
def split_format_field_names(format_string): |
127 |
return _string.formatter_field_name_split(format_string)
|
128 |
else:
|
129 |
def _field_iterator_convertor(iterator): |
130 |
for is_attr, key in iterator: |
131 |
if isinstance(key, numbers.Number): |
132 |
yield is_attr, int(key) |
133 |
else:
|
134 |
yield is_attr, key
|
135 |
|
136 |
def split_format_field_names(format_string): |
137 |
keyname, fielditerator = format_string._formatter_field_name_split() |
138 |
# it will return longs, instead of ints, which will complicate
|
139 |
# the output
|
140 |
return keyname, _field_iterator_convertor(fielditerator)
|
141 |
|
142 |
|
143 |
def collect_string_fields(format_string): |
144 |
""" Given a format string, return an iterator
|
145 |
of all the valid format fields. It handles nested fields
|
146 |
as well.
|
147 |
"""
|
148 |
|
149 |
formatter = string.Formatter() |
150 |
try:
|
151 |
parseiterator = formatter.parse(format_string) |
152 |
for result in parseiterator: |
153 |
if all(item is None for item in result[1:]): |
154 |
# not a replacement format
|
155 |
continue
|
156 |
name = result[1]
|
157 |
nested = result[2]
|
158 |
yield name
|
159 |
if nested:
|
160 |
for field in collect_string_fields(nested): |
161 |
yield field
|
162 |
except ValueError as exc: |
163 |
# Probably the format string is invalid.
|
164 |
if exc.args[0].startswith("cannot switch from manual"): |
165 |
# On Jython, parsing a string with both manual
|
166 |
# and automatic positions will fail with a ValueError,
|
167 |
# while on CPython it will simply return the fields,
|
168 |
# the validation being done in the interpreter (?).
|
169 |
# We're just returning two mixed fields in order
|
170 |
# to trigger the format-combined-specification check.
|
171 |
yield "" |
172 |
yield "1" |
173 |
return
|
174 |
raise utils.IncompleteFormatString(format_string)
|
175 |
|
176 |
def parse_format_method_string(format_string): |
177 |
"""
|
178 |
Parses a PEP 3101 format string, returning a tuple of
|
179 |
(keys, num_args, manual_pos_arg),
|
180 |
where keys is the set of mapping keys in the format string, num_args
|
181 |
is the number of arguments required by the format string and
|
182 |
manual_pos_arg is the number of arguments passed with the position.
|
183 |
"""
|
184 |
keys = [] |
185 |
num_args = 0
|
186 |
manual_pos_arg = set()
|
187 |
for name in collect_string_fields(format_string): |
188 |
if name and str(name).isdigit(): |
189 |
manual_pos_arg.add(str(name))
|
190 |
elif name:
|
191 |
keyname, fielditerator = split_format_field_names(name) |
192 |
if isinstance(keyname, numbers.Number): |
193 |
# In Python 2 it will return long which will lead
|
194 |
# to different output between 2 and 3
|
195 |
manual_pos_arg.add(str(keyname))
|
196 |
keyname = int(keyname)
|
197 |
keys.append((keyname, list(fielditerator)))
|
198 |
else:
|
199 |
num_args += 1
|
200 |
return keys, num_args, len(manual_pos_arg) |
201 |
|
202 |
def get_args(callfunc): |
203 |
"""Get the arguments from the given `CallFunc` node.
|
204 |
|
205 |
Return a tuple, where the first element is the
|
206 |
number of positional arguments and the second element
|
207 |
is the keyword arguments in a dict.
|
208 |
"""
|
209 |
if callfunc.keywords:
|
210 |
named = {arg.arg: utils.safe_infer(arg.value) |
211 |
for arg in callfunc.keywords} |
212 |
else:
|
213 |
named = {} |
214 |
positional = len(callfunc.args)
|
215 |
return positional, named
|
216 |
|
217 |
def get_access_path(key, parts): |
218 |
""" Given a list of format specifiers, returns
|
219 |
the final access path (e.g. a.b.c[0][1]).
|
220 |
"""
|
221 |
path = [] |
222 |
for is_attribute, specifier in parts: |
223 |
if is_attribute:
|
224 |
path.append(".{}".format(specifier))
|
225 |
else:
|
226 |
path.append("[{!r}]".format(specifier))
|
227 |
return str(key) + "".join(path) |
228 |
|
229 |
|
230 |
class StringFormatChecker(BaseChecker): |
231 |
"""Checks string formatting operations to ensure that the format string
|
232 |
is valid and the arguments match the format string.
|
233 |
"""
|
234 |
|
235 |
__implements__ = (IAstroidChecker,) |
236 |
name = 'string'
|
237 |
msgs = MSGS |
238 |
|
239 |
@check_messages(*(MSGS.keys()))
|
240 |
def visit_binop(self, node): |
241 |
if node.op != '%': |
242 |
return
|
243 |
left = node.left |
244 |
args = node.right |
245 |
|
246 |
if not (isinstance(left, astroid.Const) |
247 |
and isinstance(left.value, six.string_types)): |
248 |
return
|
249 |
format_string = left.value |
250 |
try:
|
251 |
required_keys, required_num_args = \ |
252 |
utils.parse_format_string(format_string) |
253 |
except utils.UnsupportedFormatCharacter as e: |
254 |
c = format_string[e.index] |
255 |
self.add_message('bad-format-character', |
256 |
node=node, args=(c, ord(c), e.index))
|
257 |
return
|
258 |
except utils.IncompleteFormatString:
|
259 |
self.add_message('truncated-format-string', node=node) |
260 |
return
|
261 |
if required_keys and required_num_args: |
262 |
# The format string uses both named and unnamed format
|
263 |
# specifiers.
|
264 |
self.add_message('mixed-format-string', node=node) |
265 |
elif required_keys:
|
266 |
# The format string uses only named format specifiers.
|
267 |
# Check that the RHS of the % operator is a mapping object
|
268 |
# that contains precisely the set of keys required by the
|
269 |
# format string.
|
270 |
if isinstance(args, astroid.Dict): |
271 |
keys = set()
|
272 |
unknown_keys = False
|
273 |
for k, _ in args.items: |
274 |
if isinstance(k, astroid.Const): |
275 |
key = k.value |
276 |
if isinstance(key, six.string_types): |
277 |
keys.add(key) |
278 |
else:
|
279 |
self.add_message('bad-format-string-key', |
280 |
node=node, args=key) |
281 |
else:
|
282 |
# One of the keys was something other than a
|
283 |
# constant. Since we can't tell what it is,
|
284 |
# supress checks for missing keys in the
|
285 |
# dictionary.
|
286 |
unknown_keys = True
|
287 |
if not unknown_keys: |
288 |
for key in required_keys: |
289 |
if key not in keys: |
290 |
self.add_message('missing-format-string-key', |
291 |
node=node, args=key) |
292 |
for key in keys: |
293 |
if key not in required_keys: |
294 |
self.add_message('unused-format-string-key', |
295 |
node=node, args=key) |
296 |
elif isinstance(args, OTHER_NODES + (astroid.Tuple,)): |
297 |
type_name = type(args).__name__
|
298 |
self.add_message('format-needs-mapping', |
299 |
node=node, args=type_name) |
300 |
# else:
|
301 |
# The RHS of the format specifier is a name or
|
302 |
# expression. It may be a mapping object, so
|
303 |
# there's nothing we can check.
|
304 |
else:
|
305 |
# The format string uses only unnamed format specifiers.
|
306 |
# Check that the number of arguments passed to the RHS of
|
307 |
# the % operator matches the number required by the format
|
308 |
# string.
|
309 |
if isinstance(args, astroid.Tuple): |
310 |
num_args = len(args.elts)
|
311 |
elif isinstance(args, OTHER_NODES + (astroid.Dict, astroid.DictComp)): |
312 |
num_args = 1
|
313 |
else:
|
314 |
# The RHS of the format specifier is a name or
|
315 |
# expression. It could be a tuple of unknown size, so
|
316 |
# there's nothing we can check.
|
317 |
num_args = None
|
318 |
if num_args is not None: |
319 |
if num_args > required_num_args:
|
320 |
self.add_message('too-many-format-args', node=node) |
321 |
elif num_args < required_num_args:
|
322 |
self.add_message('too-few-format-args', node=node) |
323 |
|
324 |
|
325 |
@check_messages(*(MSGS.keys()))
|
326 |
def visit_call(self, node): |
327 |
func = utils.safe_infer(node.func) |
328 |
if (isinstance(func, astroid.BoundMethod) |
329 |
and isinstance(func.bound, astroid.Instance) |
330 |
and func.bound.name in ('str', 'unicode', 'bytes')): |
331 |
if func.name in ('strip', 'lstrip', 'rstrip') and node.args: |
332 |
arg = utils.safe_infer(node.args[0])
|
333 |
if not isinstance(arg, astroid.Const): |
334 |
return
|
335 |
if len(arg.value) != len(set(arg.value)): |
336 |
self.add_message('bad-str-strip-call', node=node, |
337 |
args=(func.bound.name, func.name)) |
338 |
elif func.name == 'format': |
339 |
if _PY27 or _PY3K: |
340 |
self._check_new_format(node, func)
|
341 |
|
342 |
def _check_new_format(self, node, func): |
343 |
""" Check the new string formatting. """
|
344 |
# TODO: skip (for now) format nodes which don't have
|
345 |
# an explicit string on the left side of the format operation.
|
346 |
# We do this because our inference engine can't properly handle
|
347 |
# redefinitions of the original string.
|
348 |
# For more details, see issue 287.
|
349 |
#
|
350 |
# Note that there may not be any left side at all, if the format method
|
351 |
# has been assigned to another variable. See issue 351. For example:
|
352 |
#
|
353 |
# fmt = 'some string {}'.format
|
354 |
# fmt('arg')
|
355 |
if (isinstance(node.func, astroid.Attribute) |
356 |
and not isinstance(node.func.expr, astroid.Const)): |
357 |
return
|
358 |
try:
|
359 |
strnode = next(func.bound.infer())
|
360 |
except astroid.InferenceError:
|
361 |
return
|
362 |
if not isinstance(strnode, astroid.Const): |
363 |
return
|
364 |
if not isinstance(strnode.value, six.string_types): |
365 |
return
|
366 |
|
367 |
if node.starargs or node.kwargs: |
368 |
return
|
369 |
try:
|
370 |
positional, named = get_args(node) |
371 |
except astroid.InferenceError:
|
372 |
return
|
373 |
try:
|
374 |
fields, num_args, manual_pos = parse_format_method_string(strnode.value) |
375 |
except utils.IncompleteFormatString:
|
376 |
self.add_message('bad-format-string', node=node) |
377 |
return
|
378 |
|
379 |
named_fields = set(field[0] for field in fields |
380 |
if isinstance(field[0], six.string_types)) |
381 |
if num_args and manual_pos: |
382 |
self.add_message('format-combined-specification', |
383 |
node=node) |
384 |
return
|
385 |
|
386 |
check_args = False
|
387 |
# Consider "{[0]} {[1]}" as num_args.
|
388 |
num_args += sum(1 for field in named_fields |
389 |
if field == '') |
390 |
if named_fields:
|
391 |
for field in named_fields: |
392 |
if field not in named and field: |
393 |
self.add_message('missing-format-argument-key', |
394 |
node=node, |
395 |
args=(field, )) |
396 |
for field in named: |
397 |
if field not in named_fields: |
398 |
self.add_message('unused-format-string-argument', |
399 |
node=node, |
400 |
args=(field, )) |
401 |
# num_args can be 0 if manual_pos is not.
|
402 |
num_args = num_args or manual_pos
|
403 |
if positional or num_args: |
404 |
empty = any(True for field in named_fields |
405 |
if field == '') |
406 |
if named or empty: |
407 |
# Verify the required number of positional arguments
|
408 |
# only if the .format got at least one keyword argument.
|
409 |
# This means that the format strings accepts both
|
410 |
# positional and named fields and we should warn
|
411 |
# when one of the them is missing or is extra.
|
412 |
check_args = True
|
413 |
else:
|
414 |
check_args = True
|
415 |
if check_args:
|
416 |
# num_args can be 0 if manual_pos is not.
|
417 |
num_args = num_args or manual_pos
|
418 |
if positional > num_args:
|
419 |
self.add_message('too-many-format-args', node=node) |
420 |
elif positional < num_args:
|
421 |
self.add_message('too-few-format-args', node=node) |
422 |
|
423 |
self._check_new_format_specifiers(node, fields, named)
|
424 |
|
425 |
def _check_new_format_specifiers(self, node, fields, named): |
426 |
"""
|
427 |
Check attribute and index access in the format
|
428 |
string ("{0.a}" and "{0[a]}").
|
429 |
"""
|
430 |
for key, specifiers in fields: |
431 |
# Obtain the argument. If it can't be obtained
|
432 |
# or infered, skip this check.
|
433 |
if key == '': |
434 |
# {[0]} will have an unnamed argument, defaulting
|
435 |
# to 0. It will not be present in `named`, so use the value
|
436 |
# 0 for it.
|
437 |
key = 0
|
438 |
if isinstance(key, numbers.Number): |
439 |
try:
|
440 |
argname = utils.get_argument_from_call(node, key) |
441 |
except utils.NoSuchArgumentError:
|
442 |
continue
|
443 |
else:
|
444 |
if key not in named: |
445 |
continue
|
446 |
argname = named[key] |
447 |
if argname in (astroid.YES, None): |
448 |
continue
|
449 |
try:
|
450 |
argument = next(argname.infer())
|
451 |
except astroid.InferenceError:
|
452 |
continue
|
453 |
if not specifiers or argument is astroid.YES: |
454 |
# No need to check this key if it doesn't
|
455 |
# use attribute / item access
|
456 |
continue
|
457 |
if argument.parent and isinstance(argument.parent, astroid.Arguments): |
458 |
# Ignore any object coming from an argument,
|
459 |
# because we can't infer its value properly.
|
460 |
continue
|
461 |
previous = argument |
462 |
parsed = [] |
463 |
for is_attribute, specifier in specifiers: |
464 |
if previous is astroid.YES: |
465 |
break
|
466 |
parsed.append((is_attribute, specifier)) |
467 |
if is_attribute:
|
468 |
try:
|
469 |
previous = previous.getattr(specifier)[0]
|
470 |
except astroid.NotFoundError:
|
471 |
if (hasattr(previous, 'has_dynamic_getattr') and |
472 |
previous.has_dynamic_getattr()): |
473 |
# Don't warn if the object has a custom __getattr__
|
474 |
break
|
475 |
path = get_access_path(key, parsed) |
476 |
self.add_message('missing-format-attribute', |
477 |
args=(specifier, path), |
478 |
node=node) |
479 |
break
|
480 |
else:
|
481 |
warn_error = False
|
482 |
if hasattr(previous, 'getitem'): |
483 |
try:
|
484 |
previous = previous.getitem(specifier) |
485 |
except (IndexError, TypeError): |
486 |
warn_error = True
|
487 |
except astroid.InferenceError:
|
488 |
break
|
489 |
else:
|
490 |
try:
|
491 |
# Lookup __getitem__ in the current node,
|
492 |
# but skip further checks, because we can't
|
493 |
# retrieve the looked object
|
494 |
previous.getattr('__getitem__')
|
495 |
break
|
496 |
except astroid.NotFoundError:
|
497 |
warn_error = True
|
498 |
if warn_error:
|
499 |
path = get_access_path(key, parsed) |
500 |
self.add_message('invalid-format-index', |
501 |
args=(specifier, path), |
502 |
node=node) |
503 |
break
|
504 |
|
505 |
try:
|
506 |
previous = next(previous.infer())
|
507 |
except astroid.InferenceError:
|
508 |
# can't check further if we can't infer it
|
509 |
break
|
510 |
|
511 |
|
512 |
|
513 |
class StringConstantChecker(BaseTokenChecker): |
514 |
"""Check string literals"""
|
515 |
__implements__ = (ITokenChecker, IRawChecker) |
516 |
name = 'string_constant'
|
517 |
msgs = { |
518 |
'W1401': ('Anomalous backslash in string: \'%s\'. ' |
519 |
'String constant might be missing an r prefix.',
|
520 |
'anomalous-backslash-in-string',
|
521 |
'Used when a backslash is in a literal string but not as an '
|
522 |
'escape.'),
|
523 |
'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. ' |
524 |
'String constant might be missing an r or u prefix.',
|
525 |
'anomalous-unicode-escape-in-string',
|
526 |
'Used when an escape like \\u is encountered in a byte '
|
527 |
'string where it has no effect.'),
|
528 |
} |
529 |
|
530 |
# Characters that have a special meaning after a backslash in either
|
531 |
# Unicode or byte strings.
|
532 |
ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567'
|
533 |
|
534 |
# TODO(mbp): Octal characters are quite an edge case today; people may
|
535 |
# prefer a separate warning where they occur. \0 should be allowed.
|
536 |
|
537 |
# Characters that have a special meaning after a backslash but only in
|
538 |
# Unicode strings.
|
539 |
UNICODE_ESCAPE_CHARACTERS = 'uUN'
|
540 |
|
541 |
def process_module(self, module): |
542 |
self._unicode_literals = 'unicode_literals' in module.future_imports |
543 |
|
544 |
def process_tokens(self, tokens): |
545 |
for (tok_type, token, (start_row, _), _, _) in tokens: |
546 |
if tok_type == tokenize.STRING:
|
547 |
# 'token' is the whole un-parsed token; we can look at the start
|
548 |
# of it to see whether it's a raw or unicode string etc.
|
549 |
self.process_string_token(token, start_row)
|
550 |
|
551 |
def process_string_token(self, token, start_row): |
552 |
for i, c in enumerate(token): |
553 |
if c in '\'\"': |
554 |
quote_char = c |
555 |
break
|
556 |
# pylint: disable=undefined-loop-variable
|
557 |
prefix = token[:i].lower() # markers like u, b, r.
|
558 |
after_prefix = token[i:] |
559 |
if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char: |
560 |
string_body = after_prefix[3:-3] |
561 |
else:
|
562 |
string_body = after_prefix[1:-1] # Chop off quotes |
563 |
# No special checks on raw strings at the moment.
|
564 |
if 'r' not in prefix: |
565 |
self.process_non_raw_string_token(prefix, string_body, start_row)
|
566 |
|
567 |
def process_non_raw_string_token(self, prefix, string_body, start_row): |
568 |
"""check for bad escapes in a non-raw string.
|
569 |
|
570 |
prefix: lowercase string of eg 'ur' string prefix markers.
|
571 |
string_body: the un-parsed body of the string, not including the quote
|
572 |
marks.
|
573 |
start_row: integer line number in the source.
|
574 |
"""
|
575 |
# Walk through the string; if we see a backslash then escape the next
|
576 |
# character, and skip over it. If we see a non-escaped character,
|
577 |
# alert, and continue.
|
578 |
#
|
579 |
# Accept a backslash when it escapes a backslash, or a quote, or
|
580 |
# end-of-line, or one of the letters that introduce a special escape
|
581 |
# sequence <http://docs.python.org/reference/lexical_analysis.html>
|
582 |
#
|
583 |
# TODO(mbp): Maybe give a separate warning about the rarely-used
|
584 |
# \a \b \v \f?
|
585 |
#
|
586 |
# TODO(mbp): We could give the column of the problem character, but
|
587 |
# add_message doesn't seem to have a way to pass it through at present.
|
588 |
i = 0
|
589 |
while True: |
590 |
i = string_body.find('\\', i)
|
591 |
if i == -1: |
592 |
break
|
593 |
# There must be a next character; having a backslash at the end
|
594 |
# of the string would be a SyntaxError.
|
595 |
next_char = string_body[i+1]
|
596 |
match = string_body[i:i+2]
|
597 |
if next_char in self.UNICODE_ESCAPE_CHARACTERS: |
598 |
if 'u' in prefix: |
599 |
pass
|
600 |
elif (_PY3K or self._unicode_literals) and 'b' not in prefix: |
601 |
pass # unicode by default |
602 |
else:
|
603 |
self.add_message('anomalous-unicode-escape-in-string', |
604 |
line=start_row, args=(match, )) |
605 |
elif next_char not in self.ESCAPE_CHARACTERS: |
606 |
self.add_message('anomalous-backslash-in-string', |
607 |
line=start_row, args=(match, )) |
608 |
# Whether it was a valid escape or not, backslash followed by
|
609 |
# another character can always be consumed whole: the second
|
610 |
# character can never be the start of a new backslash escape.
|
611 |
i += 2
|
612 |
|
613 |
|
614 |
|
615 |
def register(linter): |
616 |
"""required method to auto register this checker """
|
617 |
linter.register_checker(StringFormatChecker(linter)) |
618 |
linter.register_checker(StringConstantChecker(linter)) |