gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / pylint / checkers / format.py @ 745
History | View | Annotate | Download (40.1 KB)
1 |
# Copyright (c) 2003-2013 LOGILAB S.A. (Paris, FRANCE).
|
---|---|
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify it under
|
4 |
# the terms of the GNU General Public License as published by the Free Software
|
5 |
# Foundation; either version 2 of the License, or (at your option) any later
|
6 |
# version.
|
7 |
#
|
8 |
# This program is distributed in the hope that it will be useful, but WITHOUT
|
9 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
10 |
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
11 |
#
|
12 |
# You should have received a copy of the GNU General Public License along with
|
13 |
# this program; if not, write to the Free Software Foundation, Inc.,
|
14 |
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
15 |
"""Python code format's checker.
|
16 |
|
17 |
By default try to follow Guido's style guide :
|
18 |
|
19 |
http://www.python.org/doc/essays/styleguide.html
|
20 |
|
21 |
Some parts of the process_token method is based from The Tab Nanny std module.
|
22 |
"""
|
23 |
|
24 |
import keyword |
25 |
import sys |
26 |
import tokenize |
27 |
from functools import reduce # pylint: disable=redefined-builtin |
28 |
|
29 |
import six |
30 |
from six.moves import zip, map, filter # pylint: disable=redefined-builtin |
31 |
|
32 |
from astroid import nodes |
33 |
|
34 |
from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker |
35 |
from pylint.checkers import BaseTokenChecker |
36 |
from pylint.checkers.utils import check_messages |
37 |
from pylint.utils import WarningScope, OPTION_RGX |
38 |
|
39 |
_CONTINUATION_BLOCK_OPENERS = ['elif', 'except', 'for', 'if', 'while', 'def', 'class'] |
40 |
_KEYWORD_TOKENS = ['assert', 'del', 'elif', 'except', 'for', 'if', 'in', 'not', |
41 |
'raise', 'return', 'while', 'yield'] |
42 |
if sys.version_info < (3, 0): |
43 |
_KEYWORD_TOKENS.append('print')
|
44 |
|
45 |
_SPACED_OPERATORS = ['==', '<', '>', '!=', '<>', '<=', '>=', |
46 |
'+=', '-=', '*=', '**=', '/=', '//=', '&=', '|=', '^=', |
47 |
'%=', '>>=', '<<='] |
48 |
_OPENING_BRACKETS = ['(', '[', '{'] |
49 |
_CLOSING_BRACKETS = [')', ']', '}'] |
50 |
_TAB_LENGTH = 8
|
51 |
|
52 |
_EOL = frozenset([tokenize.NEWLINE, tokenize.NL, tokenize.COMMENT])
|
53 |
_JUNK_TOKENS = (tokenize.COMMENT, tokenize.NL) |
54 |
|
55 |
# Whitespace checking policy constants
|
56 |
_MUST = 0
|
57 |
_MUST_NOT = 1
|
58 |
_IGNORE = 2
|
59 |
|
60 |
# Whitespace checking config constants
|
61 |
_DICT_SEPARATOR = 'dict-separator'
|
62 |
_TRAILING_COMMA = 'trailing-comma'
|
63 |
_EMPTY_LINE = 'empty-line'
|
64 |
_NO_SPACE_CHECK_CHOICES = [_TRAILING_COMMA, _DICT_SEPARATOR, _EMPTY_LINE] |
65 |
_DEFAULT_NO_SPACE_CHECK_CHOICES = [_TRAILING_COMMA, _DICT_SEPARATOR] |
66 |
|
67 |
MSGS = { |
68 |
'C0301': ('Line too long (%s/%s)', |
69 |
'line-too-long',
|
70 |
'Used when a line is longer than a given number of characters.'),
|
71 |
'C0302': ('Too many lines in module (%s/%s)', # was W0302 |
72 |
'too-many-lines',
|
73 |
'Used when a module has too much lines, reducing its readability.'
|
74 |
), |
75 |
'C0303': ('Trailing whitespace', |
76 |
'trailing-whitespace',
|
77 |
'Used when there is whitespace between the end of a line and the '
|
78 |
'newline.'),
|
79 |
'C0304': ('Final newline missing', |
80 |
'missing-final-newline',
|
81 |
'Used when the last line in a file is missing a newline.'),
|
82 |
'W0311': ('Bad indentation. Found %s %s, expected %s', |
83 |
'bad-indentation',
|
84 |
'Used when an unexpected number of indentation\'s tabulations or '
|
85 |
'spaces has been found.'),
|
86 |
'C0330': ('Wrong %s indentation%s%s.\n%s%s', |
87 |
'bad-continuation',
|
88 |
'TODO'),
|
89 |
'W0312': ('Found indentation with %ss instead of %ss', |
90 |
'mixed-indentation',
|
91 |
'Used when there are some mixed tabs and spaces in a module.'),
|
92 |
'W0301': ('Unnecessary semicolon', # was W0106 |
93 |
'unnecessary-semicolon',
|
94 |
'Used when a statement is ended by a semi-colon (";"), which \
|
95 |
isn\'t necessary (that\'s python, not C ;).'),
|
96 |
'C0321': ('More than one statement on a single line', |
97 |
'multiple-statements',
|
98 |
'Used when more than on statement are found on the same line.',
|
99 |
{'scope': WarningScope.NODE}),
|
100 |
'C0325' : ('Unnecessary parens after %r keyword', |
101 |
'superfluous-parens',
|
102 |
'Used when a single item in parentheses follows an if, for, or '
|
103 |
'other keyword.'),
|
104 |
'C0326': ('%s space %s %s %s\n%s', |
105 |
'bad-whitespace',
|
106 |
('Used when a wrong number of spaces is used around an operator, '
|
107 |
'bracket or block opener.'),
|
108 |
{'old_names': [('C0323', 'no-space-after-operator'), |
109 |
('C0324', 'no-space-after-comma'), |
110 |
('C0322', 'no-space-before-operator')]}), |
111 |
'W0332': ('Use of "l" as long integer identifier', |
112 |
'lowercase-l-suffix',
|
113 |
'Used when a lower case "l" is used to mark a long integer. You '
|
114 |
'should use a upper case "L" since the letter "l" looks too much '
|
115 |
'like the digit "1"',
|
116 |
{'maxversion': (3, 0)}), |
117 |
'C0327': ('Mixed line endings LF and CRLF', |
118 |
'mixed-line-endings',
|
119 |
'Used when there are mixed (LF and CRLF) newline signs in a file.'),
|
120 |
'C0328': ('Unexpected line ending format. There is \'%s\' while it should be \'%s\'.', |
121 |
'unexpected-line-ending-format',
|
122 |
'Used when there is different newline than expected.'),
|
123 |
} |
124 |
|
125 |
|
126 |
def _underline_token(token): |
127 |
length = token[3][1] - token[2][1] |
128 |
offset = token[2][1] |
129 |
referenced_line = token[4]
|
130 |
# If the referenced line does not end with a newline char, fix it
|
131 |
if referenced_line[-1] != '\n': |
132 |
referenced_line += '\n'
|
133 |
return referenced_line + (' ' * offset) + ('^' * length) |
134 |
|
135 |
def _column_distance(token1, token2): |
136 |
if token1 == token2:
|
137 |
return 0 |
138 |
if token2[3] < token1[3]: |
139 |
token1, token2 = token2, token1 |
140 |
if token1[3][0] != token2[2][0]: |
141 |
return None |
142 |
return token2[2][1] - token1[3][1] |
143 |
|
144 |
|
145 |
def _last_token_on_line_is(tokens, line_end, token): |
146 |
return (line_end > 0 and tokens.token(line_end-1) == token or |
147 |
line_end > 1 and tokens.token(line_end-2) == token |
148 |
and tokens.type(line_end-1) == tokenize.COMMENT) |
149 |
|
150 |
|
151 |
def _token_followed_by_eol(tokens, position): |
152 |
return (tokens.type(position+1) == tokenize.NL or |
153 |
tokens.type(position+1) == tokenize.COMMENT and |
154 |
tokens.type(position+2) == tokenize.NL)
|
155 |
|
156 |
|
157 |
def _get_indent_length(line): |
158 |
"""Return the length of the indentation on the given token's line."""
|
159 |
result = 0
|
160 |
for char in line: |
161 |
if char == ' ': |
162 |
result += 1
|
163 |
elif char == '\t': |
164 |
result += _TAB_LENGTH |
165 |
else:
|
166 |
break
|
167 |
return result
|
168 |
|
169 |
|
170 |
def _get_indent_hint_line(bar_positions, bad_position): |
171 |
"""Return a line with |s for each of the positions in the given lists."""
|
172 |
if not bar_positions: |
173 |
return ('', '') |
174 |
delta_message = ''
|
175 |
markers = [(pos, '|') for pos in bar_positions] |
176 |
if len(markers) == 1: |
177 |
# if we have only one marker we'll provide an extra hint on how to fix
|
178 |
expected_position = markers[0][0] |
179 |
delta = abs(expected_position - bad_position)
|
180 |
direction = 'add' if expected_position > bad_position else 'remove' |
181 |
delta_message = _CONTINUATION_HINT_MESSAGE % ( |
182 |
direction, delta, 's' if delta > 1 else '') |
183 |
markers.append((bad_position, '^'))
|
184 |
markers.sort() |
185 |
line = [' '] * (markers[-1][0] + 1) |
186 |
for position, marker in markers: |
187 |
line[position] = marker |
188 |
return (''.join(line), delta_message) |
189 |
|
190 |
|
191 |
class _ContinuedIndent(object): |
192 |
__slots__ = ('valid_outdent_offsets',
|
193 |
'valid_continuation_offsets',
|
194 |
'context_type',
|
195 |
'token',
|
196 |
'position')
|
197 |
|
198 |
def __init__(self, |
199 |
context_type, |
200 |
token, |
201 |
position, |
202 |
valid_outdent_offsets, |
203 |
valid_continuation_offsets): |
204 |
self.valid_outdent_offsets = valid_outdent_offsets
|
205 |
self.valid_continuation_offsets = valid_continuation_offsets
|
206 |
self.context_type = context_type
|
207 |
self.position = position
|
208 |
self.token = token
|
209 |
|
210 |
|
211 |
# The contexts for hanging indents.
|
212 |
# A hanging indented dictionary value after :
|
213 |
HANGING_DICT_VALUE = 'dict-value'
|
214 |
# Hanging indentation in an expression.
|
215 |
HANGING = 'hanging'
|
216 |
# Hanging indentation in a block header.
|
217 |
HANGING_BLOCK = 'hanging-block'
|
218 |
# Continued indentation inside an expression.
|
219 |
CONTINUED = 'continued'
|
220 |
# Continued indentation in a block header.
|
221 |
CONTINUED_BLOCK = 'continued-block'
|
222 |
|
223 |
SINGLE_LINE = 'single'
|
224 |
WITH_BODY = 'multi'
|
225 |
|
226 |
_CONTINUATION_MSG_PARTS = { |
227 |
HANGING_DICT_VALUE: ('hanging', ' in dict value'), |
228 |
HANGING: ('hanging', ''), |
229 |
HANGING_BLOCK: ('hanging', ' before block'), |
230 |
CONTINUED: ('continued', ''), |
231 |
CONTINUED_BLOCK: ('continued', ' before block'), |
232 |
} |
233 |
|
234 |
_CONTINUATION_HINT_MESSAGE = ' (%s %d space%s)' # Ex: (remove 2 spaces) |
235 |
|
236 |
def _Offsets(*args): |
237 |
"""Valid indentation offsets for a continued line."""
|
238 |
return dict((a, None) for a in args) |
239 |
|
240 |
|
241 |
def _BeforeBlockOffsets(single, with_body): |
242 |
"""Valid alternative indent offsets for continued lines before blocks.
|
243 |
|
244 |
:param single: Valid offset for statements on a single logical line.
|
245 |
:param with_body: Valid offset for statements on several lines.
|
246 |
"""
|
247 |
return {single: SINGLE_LINE, with_body: WITH_BODY}
|
248 |
|
249 |
|
250 |
class TokenWrapper(object): |
251 |
"""A wrapper for readable access to token information."""
|
252 |
|
253 |
def __init__(self, tokens): |
254 |
self._tokens = tokens
|
255 |
|
256 |
def token(self, idx): |
257 |
return self._tokens[idx][1] |
258 |
|
259 |
def type(self, idx): |
260 |
return self._tokens[idx][0] |
261 |
|
262 |
def start_line(self, idx): |
263 |
return self._tokens[idx][2][0] |
264 |
|
265 |
def start_col(self, idx): |
266 |
return self._tokens[idx][2][1] |
267 |
|
268 |
def line(self, idx): |
269 |
return self._tokens[idx][4] |
270 |
|
271 |
|
272 |
class ContinuedLineState(object): |
273 |
"""Tracker for continued indentation inside a logical line."""
|
274 |
|
275 |
def __init__(self, tokens, config): |
276 |
self._line_start = -1 |
277 |
self._cont_stack = []
|
278 |
self._is_block_opener = False |
279 |
self.retained_warnings = []
|
280 |
self._config = config
|
281 |
self._tokens = TokenWrapper(tokens)
|
282 |
|
283 |
@property
|
284 |
def has_content(self): |
285 |
return bool(self._cont_stack) |
286 |
|
287 |
@property
|
288 |
def _block_indent_size(self): |
289 |
return len(self._config.indent_string.replace('\t', ' ' * _TAB_LENGTH)) |
290 |
|
291 |
@property
|
292 |
def _continuation_size(self): |
293 |
return self._config.indent_after_paren |
294 |
|
295 |
def handle_line_start(self, pos): |
296 |
"""Record the first non-junk token at the start of a line."""
|
297 |
if self._line_start > -1: |
298 |
return
|
299 |
self._is_block_opener = self._tokens.token(pos) in _CONTINUATION_BLOCK_OPENERS |
300 |
self._line_start = pos
|
301 |
|
302 |
def next_physical_line(self): |
303 |
"""Prepares the tracker for a new physical line (NL)."""
|
304 |
self._line_start = -1 |
305 |
self._is_block_opener = False |
306 |
|
307 |
def next_logical_line(self): |
308 |
"""Prepares the tracker for a new logical line (NEWLINE).
|
309 |
|
310 |
A new logical line only starts with block indentation.
|
311 |
"""
|
312 |
self.next_physical_line()
|
313 |
self.retained_warnings = []
|
314 |
self._cont_stack = []
|
315 |
|
316 |
def add_block_warning(self, token_position, state, valid_offsets): |
317 |
self.retained_warnings.append((token_position, state, valid_offsets))
|
318 |
|
319 |
def get_valid_offsets(self, idx): |
320 |
"""Returns the valid offsets for the token at the given position."""
|
321 |
# The closing brace on a dict or the 'for' in a dict comprehension may
|
322 |
# reset two indent levels because the dict value is ended implicitly
|
323 |
stack_top = -1
|
324 |
if self._tokens.token(idx) in ('}', 'for') and self._cont_stack[-1].token == ':': |
325 |
stack_top = -2
|
326 |
indent = self._cont_stack[stack_top]
|
327 |
if self._tokens.token(idx) in _CLOSING_BRACKETS: |
328 |
valid_offsets = indent.valid_outdent_offsets |
329 |
else:
|
330 |
valid_offsets = indent.valid_continuation_offsets |
331 |
return indent, valid_offsets.copy()
|
332 |
|
333 |
def _hanging_indent_after_bracket(self, bracket, position): |
334 |
"""Extracts indentation information for a hanging indent."""
|
335 |
indentation = _get_indent_length(self._tokens.line(position))
|
336 |
if self._is_block_opener and self._continuation_size == self._block_indent_size: |
337 |
return _ContinuedIndent(
|
338 |
HANGING_BLOCK, |
339 |
bracket, |
340 |
position, |
341 |
_Offsets(indentation + self._continuation_size, indentation),
|
342 |
_BeforeBlockOffsets(indentation + self._continuation_size,
|
343 |
indentation + self._continuation_size * 2)) |
344 |
elif bracket == ':': |
345 |
# If the dict key was on the same line as the open brace, the new
|
346 |
# correct indent should be relative to the key instead of the
|
347 |
# current indent level
|
348 |
paren_align = self._cont_stack[-1].valid_outdent_offsets |
349 |
next_align = self._cont_stack[-1].valid_continuation_offsets.copy() |
350 |
next_align_keys = list(next_align.keys())
|
351 |
next_align[next_align_keys[0] + self._continuation_size] = True |
352 |
# Note that the continuation of
|
353 |
# d = {
|
354 |
# 'a': 'b'
|
355 |
# 'c'
|
356 |
# }
|
357 |
# is handled by the special-casing for hanging continued string indents.
|
358 |
return _ContinuedIndent(HANGING_DICT_VALUE, bracket, position, paren_align, next_align)
|
359 |
else:
|
360 |
return _ContinuedIndent(
|
361 |
HANGING, |
362 |
bracket, |
363 |
position, |
364 |
_Offsets(indentation, indentation + self._continuation_size),
|
365 |
_Offsets(indentation + self._continuation_size))
|
366 |
|
367 |
def _continuation_inside_bracket(self, bracket, pos): |
368 |
"""Extracts indentation information for a continued indent."""
|
369 |
indentation = _get_indent_length(self._tokens.line(pos))
|
370 |
token_start = self._tokens.start_col(pos)
|
371 |
next_token_start = self._tokens.start_col(pos + 1) |
372 |
if self._is_block_opener and next_token_start - indentation == self._block_indent_size: |
373 |
return _ContinuedIndent(
|
374 |
CONTINUED_BLOCK, |
375 |
bracket, |
376 |
pos, |
377 |
_Offsets(token_start), |
378 |
_BeforeBlockOffsets(next_token_start, next_token_start + self._continuation_size))
|
379 |
else:
|
380 |
return _ContinuedIndent(
|
381 |
CONTINUED, |
382 |
bracket, |
383 |
pos, |
384 |
_Offsets(token_start), |
385 |
_Offsets(next_token_start)) |
386 |
|
387 |
def pop_token(self): |
388 |
self._cont_stack.pop()
|
389 |
|
390 |
def push_token(self, token, position): |
391 |
"""Pushes a new token for continued indentation on the stack.
|
392 |
|
393 |
Tokens that can modify continued indentation offsets are:
|
394 |
* opening brackets
|
395 |
* 'lambda'
|
396 |
* : inside dictionaries
|
397 |
|
398 |
push_token relies on the caller to filter out those
|
399 |
interesting tokens.
|
400 |
|
401 |
:param token: The concrete token
|
402 |
:param position: The position of the token in the stream.
|
403 |
"""
|
404 |
if _token_followed_by_eol(self._tokens, position): |
405 |
self._cont_stack.append(
|
406 |
self._hanging_indent_after_bracket(token, position))
|
407 |
else:
|
408 |
self._cont_stack.append(
|
409 |
self._continuation_inside_bracket(token, position))
|
410 |
|
411 |
|
412 |
class FormatChecker(BaseTokenChecker): |
413 |
"""checks for :
|
414 |
* unauthorized constructions
|
415 |
* strict indentation
|
416 |
* line length
|
417 |
"""
|
418 |
|
419 |
__implements__ = (ITokenChecker, IAstroidChecker, IRawChecker) |
420 |
|
421 |
# configuration section name
|
422 |
name = 'format'
|
423 |
# messages
|
424 |
msgs = MSGS |
425 |
# configuration options
|
426 |
# for available dict keys/values see the optik parser 'add_option' method
|
427 |
options = (('max-line-length',
|
428 |
{'default' : 100, 'type' : "int", 'metavar' : '<int>', |
429 |
'help' : 'Maximum number of characters on a single line.'}), |
430 |
('ignore-long-lines',
|
431 |
{'type': 'regexp', 'metavar': '<regexp>', |
432 |
'default': r'^\s*(# )?<?https?://\S+>?$', |
433 |
'help': ('Regexp for a line that is allowed to be longer than ' |
434 |
'the limit.')}),
|
435 |
('single-line-if-stmt',
|
436 |
{'default': False, 'type' : 'yn', 'metavar' : '<y_or_n>', |
437 |
'help' : ('Allow the body of an if to be on the same ' |
438 |
'line as the test if there is no else.')}),
|
439 |
('no-space-check',
|
440 |
{'default': ','.join(_DEFAULT_NO_SPACE_CHECK_CHOICES), |
441 |
'metavar': ','.join(_NO_SPACE_CHECK_CHOICES), |
442 |
'type': 'multiple_choice', |
443 |
'choices': _NO_SPACE_CHECK_CHOICES,
|
444 |
'help': ('List of optional constructs for which whitespace ' |
445 |
'checking is disabled. '
|
446 |
'`'+ _DICT_SEPARATOR + '` is used to allow tabulation ' |
447 |
'in dicts, etc.: {1 : 1,\\n222: 2}. '
|
448 |
'`'+ _TRAILING_COMMA + '` allows a space between comma ' |
449 |
'and closing bracket: (a, ). '
|
450 |
'`'+ _EMPTY_LINE + '` allows space-only lines.')}), |
451 |
('max-module-lines',
|
452 |
{'default' : 1000, 'type' : 'int', 'metavar' : '<int>', |
453 |
'help': 'Maximum number of lines in a module'} |
454 |
), |
455 |
('indent-string',
|
456 |
{'default' : ' ', 'type' : "string", 'metavar' : '<string>', |
457 |
'help' : 'String used as indentation unit. This is usually ' |
458 |
'" " (4 spaces) or "\\t" (1 tab).'}),
|
459 |
('indent-after-paren',
|
460 |
{'type': 'int', 'metavar': '<int>', 'default': 4, |
461 |
'help': 'Number of spaces of indent required inside a hanging ' |
462 |
' or continued line.'}),
|
463 |
('expected-line-ending-format',
|
464 |
{'type': 'choice', 'metavar': '<empty or LF or CRLF>', 'default': '', |
465 |
'choices': ['', 'LF', 'CRLF'], |
466 |
'help': ('Expected format of line ending, ' |
467 |
'e.g. empty (any line ending), LF or CRLF.')}),
|
468 |
) |
469 |
|
470 |
def __init__(self, linter=None): |
471 |
BaseTokenChecker.__init__(self, linter)
|
472 |
self._lines = None |
473 |
self._visited_lines = None |
474 |
self._bracket_stack = [None] |
475 |
|
476 |
def _pop_token(self): |
477 |
self._bracket_stack.pop()
|
478 |
self._current_line.pop_token()
|
479 |
|
480 |
def _push_token(self, token, idx): |
481 |
self._bracket_stack.append(token)
|
482 |
self._current_line.push_token(token, idx)
|
483 |
|
484 |
def new_line(self, tokens, line_end, line_start): |
485 |
"""a new line has been encountered, process it if necessary"""
|
486 |
if _last_token_on_line_is(tokens, line_end, ';'): |
487 |
self.add_message('unnecessary-semicolon', line=tokens.start_line(line_end)) |
488 |
|
489 |
line_num = tokens.start_line(line_start) |
490 |
line = tokens.line(line_start) |
491 |
if tokens.type(line_start) not in _JUNK_TOKENS: |
492 |
self._lines[line_num] = line.split('\n')[0] |
493 |
self.check_lines(line, line_num)
|
494 |
|
495 |
def process_module(self, module): |
496 |
self._keywords_with_parens = set() |
497 |
if 'print_function' in module.future_imports: |
498 |
self._keywords_with_parens.add('print') |
499 |
|
500 |
def _check_keyword_parentheses(self, tokens, start): |
501 |
"""Check that there are not unnecessary parens after a keyword.
|
502 |
|
503 |
Parens are unnecessary if there is exactly one balanced outer pair on a
|
504 |
line, and it is followed by a colon, and contains no commas (i.e. is not a
|
505 |
tuple).
|
506 |
|
507 |
Args:
|
508 |
tokens: list of Tokens; the entire list of Tokens.
|
509 |
start: int; the position of the keyword in the token list.
|
510 |
"""
|
511 |
# If the next token is not a paren, we're fine.
|
512 |
if self._inside_brackets(':') and tokens[start][1] == 'for': |
513 |
self._pop_token()
|
514 |
if tokens[start+1][1] != '(': |
515 |
return
|
516 |
|
517 |
found_and_or = False
|
518 |
depth = 0
|
519 |
keyword_token = tokens[start][1]
|
520 |
line_num = tokens[start][2][0] |
521 |
|
522 |
for i in range(start, len(tokens) - 1): |
523 |
token = tokens[i] |
524 |
|
525 |
# If we hit a newline, then assume any parens were for continuation.
|
526 |
if token[0] == tokenize.NL: |
527 |
return
|
528 |
|
529 |
if token[1] == '(': |
530 |
depth += 1
|
531 |
elif token[1] == ')': |
532 |
depth -= 1
|
533 |
if depth:
|
534 |
continue
|
535 |
# ')' can't happen after if (foo), since it would be a syntax error.
|
536 |
if (tokens[i+1][1] in (':', ')', ']', '}', 'in') or |
537 |
tokens[i+1][0] in (tokenize.NEWLINE, |
538 |
tokenize.ENDMARKER, |
539 |
tokenize.COMMENT)): |
540 |
# The empty tuple () is always accepted.
|
541 |
if i == start + 2: |
542 |
return
|
543 |
if keyword_token == 'not': |
544 |
if not found_and_or: |
545 |
self.add_message('superfluous-parens', line=line_num, |
546 |
args=keyword_token) |
547 |
elif keyword_token in ('return', 'yield'): |
548 |
self.add_message('superfluous-parens', line=line_num, |
549 |
args=keyword_token) |
550 |
elif keyword_token not in self._keywords_with_parens: |
551 |
if not (tokens[i+1][1] == 'in' and found_and_or): |
552 |
self.add_message('superfluous-parens', line=line_num, |
553 |
args=keyword_token) |
554 |
return
|
555 |
elif depth == 1: |
556 |
# This is a tuple, which is always acceptable.
|
557 |
if token[1] == ',': |
558 |
return
|
559 |
# 'and' and 'or' are the only boolean operators with lower precedence
|
560 |
# than 'not', so parens are only required when they are found.
|
561 |
elif token[1] in ('and', 'or'): |
562 |
found_and_or = True
|
563 |
# A yield inside an expression must always be in parentheses,
|
564 |
# quit early without error.
|
565 |
elif token[1] == 'yield': |
566 |
return
|
567 |
# A generator expression always has a 'for' token in it, and
|
568 |
# the 'for' token is only legal inside parens when it is in a
|
569 |
# generator expression. The parens are necessary here, so bail
|
570 |
# without an error.
|
571 |
elif token[1] == 'for': |
572 |
return
|
573 |
|
574 |
def _opening_bracket(self, tokens, i): |
575 |
self._push_token(tokens[i][1], i) |
576 |
# Special case: ignore slices
|
577 |
if tokens[i][1] == '[' and tokens[i+1][1] == ':': |
578 |
return
|
579 |
|
580 |
if (i > 0 and (tokens[i-1][0] == tokenize.NAME and |
581 |
not (keyword.iskeyword(tokens[i-1][1])) |
582 |
or tokens[i-1][1] in _CLOSING_BRACKETS)): |
583 |
self._check_space(tokens, i, (_MUST_NOT, _MUST_NOT))
|
584 |
else:
|
585 |
self._check_space(tokens, i, (_IGNORE, _MUST_NOT))
|
586 |
|
587 |
def _closing_bracket(self, tokens, i): |
588 |
if self._inside_brackets(':'): |
589 |
self._pop_token()
|
590 |
self._pop_token()
|
591 |
# Special case: ignore slices
|
592 |
if tokens[i-1][1] == ':' and tokens[i][1] == ']': |
593 |
return
|
594 |
policy_before = _MUST_NOT |
595 |
if tokens[i][1] in _CLOSING_BRACKETS and tokens[i-1][1] == ',': |
596 |
if _TRAILING_COMMA in self.config.no_space_check: |
597 |
policy_before = _IGNORE |
598 |
|
599 |
self._check_space(tokens, i, (policy_before, _IGNORE))
|
600 |
|
601 |
def _check_equals_spacing(self, tokens, i): |
602 |
"""Check the spacing of a single equals sign."""
|
603 |
if self._inside_brackets('(') or self._inside_brackets('lambda'): |
604 |
self._check_space(tokens, i, (_MUST_NOT, _MUST_NOT))
|
605 |
else:
|
606 |
self._check_space(tokens, i, (_MUST, _MUST))
|
607 |
|
608 |
def _open_lambda(self, tokens, i): # pylint:disable=unused-argument |
609 |
self._push_token('lambda', i) |
610 |
|
611 |
def _handle_colon(self, tokens, i): |
612 |
# Special case: ignore slices
|
613 |
if self._inside_brackets('['): |
614 |
return
|
615 |
if (self._inside_brackets('{') and |
616 |
_DICT_SEPARATOR in self.config.no_space_check): |
617 |
policy = (_IGNORE, _IGNORE) |
618 |
else:
|
619 |
policy = (_MUST_NOT, _MUST) |
620 |
self._check_space(tokens, i, policy)
|
621 |
|
622 |
if self._inside_brackets('lambda'): |
623 |
self._pop_token()
|
624 |
elif self._inside_brackets('{'): |
625 |
self._push_token(':', i) |
626 |
|
627 |
def _handle_comma(self, tokens, i): |
628 |
# Only require a following whitespace if this is
|
629 |
# not a hanging comma before a closing bracket.
|
630 |
if tokens[i+1][1] in _CLOSING_BRACKETS: |
631 |
self._check_space(tokens, i, (_MUST_NOT, _IGNORE))
|
632 |
else:
|
633 |
self._check_space(tokens, i, (_MUST_NOT, _MUST))
|
634 |
if self._inside_brackets(':'): |
635 |
self._pop_token()
|
636 |
|
637 |
def _check_surrounded_by_space(self, tokens, i): |
638 |
"""Check that a binary operator is surrounded by exactly one space."""
|
639 |
self._check_space(tokens, i, (_MUST, _MUST))
|
640 |
|
641 |
def _check_space(self, tokens, i, policies): |
642 |
def _policy_string(policy): |
643 |
if policy == _MUST:
|
644 |
return 'Exactly one', 'required' |
645 |
else:
|
646 |
return 'No', 'allowed' |
647 |
|
648 |
def _name_construct(token): |
649 |
if token[1] == ',': |
650 |
return 'comma' |
651 |
elif token[1] == ':': |
652 |
return ':' |
653 |
elif token[1] in '()[]{}': |
654 |
return 'bracket' |
655 |
elif token[1] in ('<', '>', '<=', '>=', '!=', '=='): |
656 |
return 'comparison' |
657 |
else:
|
658 |
if self._inside_brackets('('): |
659 |
return 'keyword argument assignment' |
660 |
else:
|
661 |
return 'assignment' |
662 |
|
663 |
good_space = [True, True] |
664 |
token = tokens[i] |
665 |
pairs = [(tokens[i-1], token), (token, tokens[i+1])] |
666 |
|
667 |
for other_idx, (policy, token_pair) in enumerate(zip(policies, pairs)): |
668 |
if token_pair[other_idx][0] in _EOL or policy == _IGNORE: |
669 |
continue
|
670 |
|
671 |
distance = _column_distance(*token_pair) |
672 |
if distance is None: |
673 |
continue
|
674 |
good_space[other_idx] = ( |
675 |
(policy == _MUST and distance == 1) or |
676 |
(policy == _MUST_NOT and distance == 0)) |
677 |
|
678 |
warnings = [] |
679 |
if not any(good_space) and policies[0] == policies[1]: |
680 |
warnings.append((policies[0], 'around')) |
681 |
else:
|
682 |
for ok, policy, position in zip(good_space, policies, ('before', 'after')): |
683 |
if not ok: |
684 |
warnings.append((policy, position)) |
685 |
for policy, position in warnings: |
686 |
construct = _name_construct(token) |
687 |
count, state = _policy_string(policy) |
688 |
self.add_message('bad-whitespace', line=token[2][0], |
689 |
args=(count, state, position, construct, |
690 |
_underline_token(token))) |
691 |
|
692 |
def _inside_brackets(self, left): |
693 |
return self._bracket_stack[-1] == left |
694 |
|
695 |
def _prepare_token_dispatcher(self): |
696 |
raw = [ |
697 |
(_KEYWORD_TOKENS, |
698 |
self._check_keyword_parentheses),
|
699 |
|
700 |
(_OPENING_BRACKETS, self._opening_bracket),
|
701 |
|
702 |
(_CLOSING_BRACKETS, self._closing_bracket),
|
703 |
|
704 |
(['='], self._check_equals_spacing), |
705 |
|
706 |
(_SPACED_OPERATORS, self._check_surrounded_by_space),
|
707 |
|
708 |
([','], self._handle_comma), |
709 |
|
710 |
([':'], self._handle_colon), |
711 |
|
712 |
(['lambda'], self._open_lambda), |
713 |
|
714 |
] |
715 |
|
716 |
dispatch = {} |
717 |
for tokens, handler in raw: |
718 |
for token in tokens: |
719 |
dispatch[token] = handler |
720 |
return dispatch
|
721 |
|
722 |
def process_tokens(self, tokens): |
723 |
"""process tokens and search for :
|
724 |
|
725 |
_ non strict indentation (i.e. not always using the <indent> parameter as
|
726 |
indent unit)
|
727 |
_ too long lines (i.e. longer than <max_chars>)
|
728 |
_ optionally bad construct (if given, bad_construct must be a compiled
|
729 |
regular expression).
|
730 |
"""
|
731 |
self._bracket_stack = [None] |
732 |
indents = [0]
|
733 |
check_equal = False
|
734 |
line_num = 0
|
735 |
self._lines = {}
|
736 |
self._visited_lines = {}
|
737 |
token_handlers = self._prepare_token_dispatcher()
|
738 |
self._last_line_ending = None |
739 |
|
740 |
self._current_line = ContinuedLineState(tokens, self.config) |
741 |
for idx, (tok_type, token, start, _, line) in enumerate(tokens): |
742 |
if start[0] != line_num: |
743 |
line_num = start[0]
|
744 |
# A tokenizer oddity: if an indented line contains a multi-line
|
745 |
# docstring, the line member of the INDENT token does not contain
|
746 |
# the full line; therefore we check the next token on the line.
|
747 |
if tok_type == tokenize.INDENT:
|
748 |
self.new_line(TokenWrapper(tokens), idx-1, idx+1) |
749 |
else:
|
750 |
self.new_line(TokenWrapper(tokens), idx-1, idx) |
751 |
|
752 |
if tok_type == tokenize.NEWLINE:
|
753 |
# a program statement, or ENDMARKER, will eventually follow,
|
754 |
# after some (possibly empty) run of tokens of the form
|
755 |
# (NL | COMMENT)* (INDENT | DEDENT+)?
|
756 |
# If an INDENT appears, setting check_equal is wrong, and will
|
757 |
# be undone when we see the INDENT.
|
758 |
check_equal = True
|
759 |
self._process_retained_warnings(TokenWrapper(tokens), idx)
|
760 |
self._current_line.next_logical_line()
|
761 |
self._check_line_ending(token, line_num)
|
762 |
elif tok_type == tokenize.INDENT:
|
763 |
check_equal = False
|
764 |
self.check_indent_level(token, indents[-1]+1, line_num) |
765 |
indents.append(indents[-1]+1) |
766 |
elif tok_type == tokenize.DEDENT:
|
767 |
# there's nothing we need to check here! what's important is
|
768 |
# that when the run of DEDENTs ends, the indentation of the
|
769 |
# program statement (or ENDMARKER) that triggered the run is
|
770 |
# equal to what's left at the top of the indents stack
|
771 |
check_equal = True
|
772 |
if len(indents) > 1: |
773 |
del indents[-1] |
774 |
elif tok_type == tokenize.NL:
|
775 |
self._check_continued_indentation(TokenWrapper(tokens), idx+1) |
776 |
self._current_line.next_physical_line()
|
777 |
elif tok_type != tokenize.COMMENT:
|
778 |
self._current_line.handle_line_start(idx)
|
779 |
# This is the first concrete token following a NEWLINE, so it
|
780 |
# must be the first token of the next program statement, or an
|
781 |
# ENDMARKER; the "line" argument exposes the leading whitespace
|
782 |
# for this statement; in the case of ENDMARKER, line is an empty
|
783 |
# string, so will properly match the empty string with which the
|
784 |
# "indents" stack was seeded
|
785 |
if check_equal:
|
786 |
check_equal = False
|
787 |
self.check_indent_level(line, indents[-1], line_num) |
788 |
|
789 |
if tok_type == tokenize.NUMBER and token.endswith('l'): |
790 |
self.add_message('lowercase-l-suffix', line=line_num) |
791 |
|
792 |
try:
|
793 |
handler = token_handlers[token] |
794 |
except KeyError: |
795 |
pass
|
796 |
else:
|
797 |
handler(tokens, idx) |
798 |
|
799 |
line_num -= 1 # to be ok with "wc -l" |
800 |
if line_num > self.config.max_module_lines: |
801 |
# Get the line where the too-many-lines (or its message id)
|
802 |
# was disabled or default to 1.
|
803 |
symbol = self.linter.msgs_store.check_message_id('too-many-lines') |
804 |
names = (symbol.msgid, 'too-many-lines')
|
805 |
line = next(filter(None, |
806 |
map(self.linter._pragma_lineno.get, names)), 1) |
807 |
self.add_message('too-many-lines', |
808 |
args=(line_num, self.config.max_module_lines),
|
809 |
line=line) |
810 |
|
811 |
def _check_line_ending(self, line_ending, line_num): |
812 |
# check if line endings are mixed
|
813 |
if self._last_line_ending is not None: |
814 |
if line_ending != self._last_line_ending: |
815 |
self.add_message('mixed-line-endings', line=line_num) |
816 |
|
817 |
self._last_line_ending = line_ending
|
818 |
|
819 |
# check if line ending is as expected
|
820 |
expected = self.config.expected_line_ending_format
|
821 |
if expected:
|
822 |
# reduce multiple \n\n\n\n to one \n
|
823 |
line_ending = reduce(lambda x, y: x + y if x != y else x, line_ending, "") |
824 |
line_ending = 'LF' if line_ending == '\n' else 'CRLF' |
825 |
if line_ending != expected:
|
826 |
self.add_message('unexpected-line-ending-format', args=(line_ending, expected), |
827 |
line=line_num) |
828 |
|
829 |
|
830 |
def _process_retained_warnings(self, tokens, current_pos): |
831 |
single_line_block_stmt = not _last_token_on_line_is(tokens, current_pos, ':') |
832 |
|
833 |
for indent_pos, state, offsets in self._current_line.retained_warnings: |
834 |
block_type = offsets[tokens.start_col(indent_pos)] |
835 |
hints = dict((k, v) for k, v in six.iteritems(offsets) |
836 |
if v != block_type)
|
837 |
if single_line_block_stmt and block_type == WITH_BODY: |
838 |
self._add_continuation_message(state, hints, tokens, indent_pos)
|
839 |
elif not single_line_block_stmt and block_type == SINGLE_LINE: |
840 |
self._add_continuation_message(state, hints, tokens, indent_pos)
|
841 |
|
842 |
def _check_continued_indentation(self, tokens, next_idx): |
843 |
def same_token_around_nl(token_type): |
844 |
return (tokens.type(next_idx) == token_type and |
845 |
tokens.type(next_idx-2) == token_type)
|
846 |
|
847 |
# Do not issue any warnings if the next line is empty.
|
848 |
if not self._current_line.has_content or tokens.type(next_idx) == tokenize.NL: |
849 |
return
|
850 |
|
851 |
state, valid_offsets = self._current_line.get_valid_offsets(next_idx)
|
852 |
# Special handling for hanging comments and strings. If the last line ended
|
853 |
# with a comment (string) and the new line contains only a comment, the line
|
854 |
# may also be indented to the start of the previous token.
|
855 |
if same_token_around_nl(tokenize.COMMENT) or same_token_around_nl(tokenize.STRING): |
856 |
valid_offsets[tokens.start_col(next_idx-2)] = True |
857 |
|
858 |
# We can only decide if the indentation of a continued line before opening
|
859 |
# a new block is valid once we know of the body of the block is on the
|
860 |
# same line as the block opener. Since the token processing is single-pass,
|
861 |
# emitting those warnings is delayed until the block opener is processed.
|
862 |
if (state.context_type in (HANGING_BLOCK, CONTINUED_BLOCK) |
863 |
and tokens.start_col(next_idx) in valid_offsets): |
864 |
self._current_line.add_block_warning(next_idx, state, valid_offsets)
|
865 |
elif tokens.start_col(next_idx) not in valid_offsets: |
866 |
self._add_continuation_message(state, valid_offsets, tokens, next_idx)
|
867 |
|
868 |
def _add_continuation_message(self, state, offsets, tokens, position): |
869 |
readable_type, readable_position = _CONTINUATION_MSG_PARTS[state.context_type] |
870 |
hint_line, delta_message = _get_indent_hint_line(offsets, tokens.start_col(position)) |
871 |
self.add_message(
|
872 |
'bad-continuation',
|
873 |
line=tokens.start_line(position), |
874 |
args=(readable_type, readable_position, delta_message, |
875 |
tokens.line(position), hint_line)) |
876 |
|
877 |
@check_messages('multiple-statements') |
878 |
def visit_default(self, node): |
879 |
"""check the node line number and check it if not yet done"""
|
880 |
if not node.is_statement: |
881 |
return
|
882 |
if not node.root().pure_python: |
883 |
return # XXX block visit of child nodes |
884 |
prev_sibl = node.previous_sibling() |
885 |
if prev_sibl is not None: |
886 |
prev_line = prev_sibl.fromlineno |
887 |
else:
|
888 |
# The line on which a finally: occurs in a try/finally
|
889 |
# is not directly represented in the AST. We infer it
|
890 |
# by taking the last line of the body and adding 1, which
|
891 |
# should be the line of finally:
|
892 |
if (isinstance(node.parent, nodes.TryFinally) |
893 |
and node in node.parent.finalbody): |
894 |
prev_line = node.parent.body[0].tolineno + 1 |
895 |
else:
|
896 |
prev_line = node.parent.statement().fromlineno |
897 |
line = node.fromlineno |
898 |
assert line, node
|
899 |
if prev_line == line and self._visited_lines.get(line) != 2: |
900 |
self._check_multi_statement_line(node, line)
|
901 |
return
|
902 |
if line in self._visited_lines: |
903 |
return
|
904 |
try:
|
905 |
tolineno = node.blockstart_tolineno |
906 |
except AttributeError: |
907 |
tolineno = node.tolineno |
908 |
assert tolineno, node
|
909 |
lines = [] |
910 |
for line in range(line, tolineno + 1): |
911 |
self._visited_lines[line] = 1 |
912 |
try:
|
913 |
lines.append(self._lines[line].rstrip())
|
914 |
except KeyError: |
915 |
lines.append('')
|
916 |
|
917 |
def _check_multi_statement_line(self, node, line): |
918 |
"""Check for lines containing multiple statements."""
|
919 |
# Do not warn about multiple nested context managers
|
920 |
# in with statements.
|
921 |
if isinstance(node, nodes.With): |
922 |
return
|
923 |
# For try... except... finally..., the two nodes
|
924 |
# appear to be on the same line due to how the AST is built.
|
925 |
if (isinstance(node, nodes.TryExcept) and |
926 |
isinstance(node.parent, nodes.TryFinally)):
|
927 |
return
|
928 |
if (isinstance(node.parent, nodes.If) and not node.parent.orelse |
929 |
and self.config.single_line_if_stmt): |
930 |
return
|
931 |
self.add_message('multiple-statements', node=node) |
932 |
self._visited_lines[line] = 2 |
933 |
|
934 |
def check_lines(self, lines, i): |
935 |
"""check lines have less than a maximum number of characters
|
936 |
"""
|
937 |
max_chars = self.config.max_line_length
|
938 |
ignore_long_line = self.config.ignore_long_lines
|
939 |
|
940 |
for line in lines.splitlines(True): |
941 |
if not line.endswith('\n'): |
942 |
self.add_message('missing-final-newline', line=i) |
943 |
else:
|
944 |
stripped_line = line.rstrip() |
945 |
if not stripped_line and _EMPTY_LINE in self.config.no_space_check: |
946 |
# allow empty lines
|
947 |
pass
|
948 |
elif line[len(stripped_line):] not in ('\n', '\r\n'): |
949 |
self.add_message('trailing-whitespace', line=i) |
950 |
# Don't count excess whitespace in the line length.
|
951 |
line = stripped_line |
952 |
mobj = OPTION_RGX.search(line) |
953 |
if mobj and mobj.group(1).split('=', 1)[0].strip() == 'disable': |
954 |
line = line.split('#')[0].rstrip() |
955 |
|
956 |
if len(line) > max_chars and not ignore_long_line.search(line): |
957 |
self.add_message('line-too-long', line=i, args=(len(line), max_chars)) |
958 |
i += 1
|
959 |
|
960 |
def check_indent_level(self, string, expected, line_num): |
961 |
"""return the indent level of the string
|
962 |
"""
|
963 |
indent = self.config.indent_string
|
964 |
if indent == '\\t': # \t is not interpreted in the configuration file |
965 |
indent = '\t'
|
966 |
level = 0
|
967 |
unit_size = len(indent)
|
968 |
while string[:unit_size] == indent:
|
969 |
string = string[unit_size:] |
970 |
level += 1
|
971 |
suppl = ''
|
972 |
while string and string[0] in ' \t': |
973 |
if string[0] != indent[0]: |
974 |
if string[0] == '\t': |
975 |
args = ('tab', 'space') |
976 |
else:
|
977 |
args = ('space', 'tab') |
978 |
self.add_message('mixed-indentation', args=args, line=line_num) |
979 |
return level
|
980 |
suppl += string[0]
|
981 |
string = string[1:]
|
982 |
if level != expected or suppl: |
983 |
i_type = 'spaces'
|
984 |
if indent[0] == '\t': |
985 |
i_type = 'tabs'
|
986 |
self.add_message('bad-indentation', line=line_num, |
987 |
args=(level * unit_size + len(suppl), i_type,
|
988 |
expected * unit_size)) |
989 |
|
990 |
|
991 |
def register(linter): |
992 |
"""required method to auto register this checker """
|
993 |
linter.register_checker(FormatChecker(linter)) |