Revision 798

View differences:

org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/docutils/statemachine.py
1
 # $Id: statemachine.py 7464 2012-06-25 13:16:03Z milde $
2
# Author: David Goodger <goodger@python.org>
3
# Copyright: This module has been placed in the public domain.
4

  
5
"""
6
A finite state machine specialized for regular-expression-based text filters,
7
this module defines the following classes:
8

  
9
- `StateMachine`, a state machine
10
- `State`, a state superclass
11
- `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12
- `StateWS`, a state superclass for use with `StateMachineWS`
13
- `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14
- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15
- `ViewList`, extends standard Python lists.
16
- `StringList`, string-specific ViewList.
17

  
18
Exception classes:
19

  
20
- `StateMachineError`
21
- `UnknownStateError`
22
- `DuplicateStateError`
23
- `UnknownTransitionError`
24
- `DuplicateTransitionError`
25
- `TransitionPatternNotFound`
26
- `TransitionMethodNotFound`
27
- `UnexpectedIndentationError`
28
- `TransitionCorrection`: Raised to switch to another transition.
29
- `StateCorrection`: Raised to switch to another state & transition.
30

  
31
Functions:
32

  
33
- `string2lines()`: split a multi-line string into a list of one-line strings
34

  
35

  
36
How To Use This Module
37
======================
38
(See the individual classes, methods, and attributes for details.)
39

  
40
1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41
   You will also need to ``import re``.
42

  
43
2. Derive a subclass of `State` (or `StateWS`) for each state in your state
44
   machine::
45

  
46
       class MyState(statemachine.State):
47

  
48
   Within the state's class definition:
49

  
50
   a) Include a pattern for each transition, in `State.patterns`::
51

  
52
          patterns = {'atransition': r'pattern', ...}
53

  
54
   b) Include a list of initial transitions to be set up automatically, in
55
      `State.initial_transitions`::
56

  
57
          initial_transitions = ['atransition', ...]
58

  
59
   c) Define a method for each transition, with the same name as the
60
      transition pattern::
61

  
62
          def atransition(self, match, context, next_state):
63
              # do something
64
              result = [...]  # a list
65
              return context, next_state, result
66
              # context, next_state may be altered
67

  
68
      Transition methods may raise an `EOFError` to cut processing short.
69

  
70
   d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71
      transition methods, which handle the beginning- and end-of-file.
72

  
73
   e) In order to handle nested processing, you may wish to override the
74
      attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
75

  
76
      If you are using `StateWS` as a base class, in order to handle nested
77
      indented blocks, you may wish to:
78

  
79
      - override the attributes `StateWS.indent_sm`,
80
        `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81
        `StateWS.known_indent_sm_kwargs`;
82
      - override the `StateWS.blank()` method; and/or
83
      - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84
        and/or `StateWS.firstknown_indent()` methods.
85

  
86
3. Create a state machine object::
87

  
88
       sm = StateMachine(state_classes=[MyState, ...],
89
                         initial_state='MyState')
90

  
91
4. Obtain the input text, which needs to be converted into a tab-free list of
92
   one-line strings. For example, to read text from a file called
93
   'inputfile'::
94

  
95
       input_string = open('inputfile').read()
96
       input_lines = statemachine.string2lines(input_string)
97

  
98
5. Run the state machine on the input text and collect the results, a list::
99

  
100
       results = sm.run(input_lines)
101

  
102
6. Remove any lingering circular references::
103

  
104
       sm.unlink()
105
"""
106

  
107
__docformat__ = 'restructuredtext'
108

  
109
import sys
110
import re
111
import types
112
import unicodedata
113
from docutils import utils
114
from docutils.utils.error_reporting import ErrorOutput
115

  
116

  
117
class StateMachine:
118

  
119
    """
120
    A finite state machine for text filters using regular expressions.
121

  
122
    The input is provided in the form of a list of one-line strings (no
123
    newlines). States are subclasses of the `State` class. Transitions consist
124
    of regular expression patterns and transition methods, and are defined in
125
    each state.
126

  
127
    The state machine is started with the `run()` method, which returns the
128
    results of processing in a list.
129
    """
130

  
131
    def __init__(self, state_classes, initial_state, debug=False):
132
        """
133
        Initialize a `StateMachine` object; add state objects.
134

  
135
        Parameters:
136

  
137
        - `state_classes`: a list of `State` (sub)classes.
138
        - `initial_state`: a string, the class name of the initial state.
139
        - `debug`: a boolean; produce verbose output if true (nonzero).
140
        """
141

  
142
        self.input_lines = None
143
        """`StringList` of input lines (without newlines).
144
        Filled by `self.run()`."""
145

  
146
        self.input_offset = 0
147
        """Offset of `self.input_lines` from the beginning of the file."""
148

  
149
        self.line = None
150
        """Current input line."""
151

  
152
        self.line_offset = -1
153
        """Current input line offset from beginning of `self.input_lines`."""
154

  
155
        self.debug = debug
156
        """Debugging mode on/off."""
157

  
158
        self.initial_state = initial_state
159
        """The name of the initial state (key to `self.states`)."""
160

  
161
        self.current_state = initial_state
162
        """The name of the current state (key to `self.states`)."""
163

  
164
        self.states = {}
165
        """Mapping of {state_name: State_object}."""
166

  
167
        self.add_states(state_classes)
168

  
169
        self.observers = []
170
        """List of bound methods or functions to call whenever the current
171
        line changes.  Observers are called with one argument, ``self``.
172
        Cleared at the end of `run()`."""
173

  
174
        self._stderr = ErrorOutput()
175
        """Wrapper around sys.stderr catching en-/decoding errors"""
176

  
177

  
178
    def unlink(self):
179
        """Remove circular references to objects no longer required."""
180
        for state in self.states.values():
181
            state.unlink()
182
        self.states = None
183

  
184
    def run(self, input_lines, input_offset=0, context=None,
185
            input_source=None, initial_state=None):
186
        """
187
        Run the state machine on `input_lines`. Return results (a list).
188

  
189
        Reset `self.line_offset` and `self.current_state`. Run the
190
        beginning-of-file transition. Input one line at a time and check for a
191
        matching transition. If a match is found, call the transition method
192
        and possibly change the state. Store the context returned by the
193
        transition method to be passed on to the next transition matched.
194
        Accumulate the results returned by the transition methods in a list.
195
        Run the end-of-file transition. Finally, return the accumulated
196
        results.
197

  
198
        Parameters:
199

  
200
        - `input_lines`: a list of strings without newlines, or `StringList`.
201
        - `input_offset`: the line offset of `input_lines` from the beginning
202
          of the file.
203
        - `context`: application-specific storage.
204
        - `input_source`: name or path of source of `input_lines`.
205
        - `initial_state`: name of initial state.
206
        """
207
        self.runtime_init()
208
        if isinstance(input_lines, StringList):
209
            self.input_lines = input_lines
210
        else:
211
            self.input_lines = StringList(input_lines, source=input_source)
212
        self.input_offset = input_offset
213
        self.line_offset = -1
214
        self.current_state = initial_state or self.initial_state
215
        if self.debug:
216
            print >>self._stderr, (
217
                u'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
218
                % (self.line_offset, u'\n| '.join(self.input_lines)))
219
        transitions = None
220
        results = []
221
        state = self.get_state()
222
        try:
223
            if self.debug:
224
                print >>self._stderr, '\nStateMachine.run: bof transition'
225
            context, result = state.bof(context)
226
            results.extend(result)
227
            while True:
228
                try:
229
                    try:
230
                        self.next_line()
231
                        if self.debug:
232
                            source, offset = self.input_lines.info(
233
                                self.line_offset)
234
                            print >>self._stderr, (
235
                                u'\nStateMachine.run: line (source=%r, '
236
                                u'offset=%r):\n| %s'
237
                                % (source, offset, self.line))
238
                        context, next_state, result = self.check_line(
239
                            context, state, transitions)
240
                    except EOFError:
241
                        if self.debug:
242
                            print >>self._stderr, (
243
                                '\nStateMachine.run: %s.eof transition'
244
                                % state.__class__.__name__)
245
                        result = state.eof(context)
246
                        results.extend(result)
247
                        break
248
                    else:
249
                        results.extend(result)
250
                except TransitionCorrection, exception:
251
                    self.previous_line() # back up for another try
252
                    transitions = (exception.args[0],)
253
                    if self.debug:
254
                        print >>self._stderr, (
255
                              '\nStateMachine.run: TransitionCorrection to '
256
                              'state "%s", transition %s.'
257
                              % (state.__class__.__name__, transitions[0]))
258
                    continue
259
                except StateCorrection, exception:
260
                    self.previous_line() # back up for another try
261
                    next_state = exception.args[0]
262
                    if len(exception.args) == 1:
263
                        transitions = None
264
                    else:
265
                        transitions = (exception.args[1],)
266
                    if self.debug:
267
                        print >>self._stderr, (
268
                              '\nStateMachine.run: StateCorrection to state '
269
                              '"%s", transition %s.'
270
                              % (next_state, transitions[0]))
271
                else:
272
                    transitions = None
273
                state = self.get_state(next_state)
274
        except:
275
            if self.debug:
276
                self.error()
277
            raise
278
        self.observers = []
279
        return results
280

  
281
    def get_state(self, next_state=None):
282
        """
283
        Return current state object; set it first if `next_state` given.
284

  
285
        Parameter `next_state`: a string, the name of the next state.
286

  
287
        Exception: `UnknownStateError` raised if `next_state` unknown.
288
        """
289
        if next_state:
290
            if self.debug and next_state != self.current_state:
291
                print >>self._stderr, (
292
                    '\nStateMachine.get_state: Changing state from '
293
                    '"%s" to "%s" (input line %s).'
294
                    % (self.current_state, next_state,
295
                       self.abs_line_number()))
296
            self.current_state = next_state
297
        try:
298
            return self.states[self.current_state]
299
        except KeyError:
300
            raise UnknownStateError(self.current_state)
301

  
302
    def next_line(self, n=1):
303
        """Load `self.line` with the `n`'th next line and return it."""
304
        try:
305
            try:
306
                self.line_offset += n
307
                self.line = self.input_lines[self.line_offset]
308
            except IndexError:
309
                self.line = None
310
                raise EOFError
311
            return self.line
312
        finally:
313
            self.notify_observers()
314

  
315
    def is_next_line_blank(self):
316
        """Return 1 if the next line is blank or non-existant."""
317
        try:
318
            return not self.input_lines[self.line_offset + 1].strip()
319
        except IndexError:
320
            return 1
321

  
322
    def at_eof(self):
323
        """Return 1 if the input is at or past end-of-file."""
324
        return self.line_offset >= len(self.input_lines) - 1
325

  
326
    def at_bof(self):
327
        """Return 1 if the input is at or before beginning-of-file."""
328
        return self.line_offset <= 0
329

  
330
    def previous_line(self, n=1):
331
        """Load `self.line` with the `n`'th previous line and return it."""
332
        self.line_offset -= n
333
        if self.line_offset < 0:
334
            self.line = None
335
        else:
336
            self.line = self.input_lines[self.line_offset]
337
        self.notify_observers()
338
        return self.line
339

  
340
    def goto_line(self, line_offset):
341
        """Jump to absolute line offset `line_offset`, load and return it."""
342
        try:
343
            try:
344
                self.line_offset = line_offset - self.input_offset
345
                self.line = self.input_lines[self.line_offset]
346
            except IndexError:
347
                self.line = None
348
                raise EOFError
349
            return self.line
350
        finally:
351
            self.notify_observers()
352

  
353
    def get_source(self, line_offset):
354
        """Return source of line at absolute line offset `line_offset`."""
355
        return self.input_lines.source(line_offset - self.input_offset)
356

  
357
    def abs_line_offset(self):
358
        """Return line offset of current line, from beginning of file."""
359
        return self.line_offset + self.input_offset
360

  
361
    def abs_line_number(self):
362
        """Return line number of current line (counting from 1)."""
363
        return self.line_offset + self.input_offset + 1
364

  
365
    def get_source_and_line(self, lineno=None):
366
        """Return (source, line) tuple for current or given line number.
367

  
368
        Looks up the source and line number in the `self.input_lines`
369
        StringList instance to count for included source files.
370

  
371
        If the optional argument `lineno` is given, convert it from an
372
        absolute line number to the corresponding (source, line) pair.
373
        """
374
        if lineno is None:
375
            offset = self.line_offset
376
        else:
377
            offset = lineno - self.input_offset - 1
378
        try:
379
            src, srcoffset = self.input_lines.info(offset)
380
            srcline = srcoffset + 1
381
        except (TypeError):
382
            # line is None if index is "Just past the end"
383
            src, srcline = self.get_source_and_line(offset + self.input_offset)
384
            return src, srcline + 1
385
        except (IndexError): # `offset` is off the list
386
            src, srcline = None, None
387
            # raise AssertionError('cannot find line %d in %s lines' %
388
            #                      (offset, len(self.input_lines)))
389
            #                      # list(self.input_lines.lines())))
390
        # assert offset == srcoffset, str(self.input_lines)
391
        # print "get_source_and_line(%s):" % lineno,
392
        # print offset + 1, '->', src, srcline
393
        # print self.input_lines
394
        return (src, srcline)
395

  
396
    def insert_input(self, input_lines, source):
397
        self.input_lines.insert(self.line_offset + 1, '',
398
                                source='internal padding after '+source,
399
                                offset=len(input_lines))
400
        self.input_lines.insert(self.line_offset + 1, '',
401
                                source='internal padding before '+source,
402
                                offset=-1)
403
        self.input_lines.insert(self.line_offset + 2,
404
                                StringList(input_lines, source))
405

  
406
    def get_text_block(self, flush_left=False):
407
        """
408
        Return a contiguous block of text.
409

  
410
        If `flush_left` is true, raise `UnexpectedIndentationError` if an
411
        indented line is encountered before the text block ends (with a blank
412
        line).
413
        """
414
        try:
415
            block = self.input_lines.get_text_block(self.line_offset,
416
                                                    flush_left)
417
            self.next_line(len(block) - 1)
418
            return block
419
        except UnexpectedIndentationError, err:
420
            block = err.args[0]
421
            self.next_line(len(block) - 1) # advance to last line of block
422
            raise
423

  
424
    def check_line(self, context, state, transitions=None):
425
        """
426
        Examine one line of input for a transition match & execute its method.
427

  
428
        Parameters:
429

  
430
        - `context`: application-dependent storage.
431
        - `state`: a `State` object, the current state.
432
        - `transitions`: an optional ordered list of transition names to try,
433
          instead of ``state.transition_order``.
434

  
435
        Return the values returned by the transition method:
436

  
437
        - context: possibly modified from the parameter `context`;
438
        - next state name (`State` subclass name);
439
        - the result output of the transition, a list.
440

  
441
        When there is no match, ``state.no_match()`` is called and its return
442
        value is returned.
443
        """
444
        if transitions is None:
445
            transitions =  state.transition_order
446
        state_correction = None
447
        if self.debug:
448
            print >>self._stderr, (
449
                  '\nStateMachine.check_line: state="%s", transitions=%r.'
450
                  % (state.__class__.__name__, transitions))
451
        for name in transitions:
452
            pattern, method, next_state = state.transitions[name]
453
            match = pattern.match(self.line)
454
            if match:
455
                if self.debug:
456
                    print >>self._stderr, (
457
                          '\nStateMachine.check_line: Matched transition '
458
                          '"%s" in state "%s".'
459
                          % (name, state.__class__.__name__))
460
                return method(match, context, next_state)
461
        else:
462
            if self.debug:
463
                print >>self._stderr, (
464
                      '\nStateMachine.check_line: No match in state "%s".'
465
                      % state.__class__.__name__)
466
            return state.no_match(context, transitions)
467

  
468
    def add_state(self, state_class):
469
        """
470
        Initialize & add a `state_class` (`State` subclass) object.
471

  
472
        Exception: `DuplicateStateError` raised if `state_class` was already
473
        added.
474
        """
475
        statename = state_class.__name__
476
        if statename in self.states:
477
            raise DuplicateStateError(statename)
478
        self.states[statename] = state_class(self, self.debug)
479

  
480
    def add_states(self, state_classes):
481
        """
482
        Add `state_classes` (a list of `State` subclasses).
483
        """
484
        for state_class in state_classes:
485
            self.add_state(state_class)
486

  
487
    def runtime_init(self):
488
        """
489
        Initialize `self.states`.
490
        """
491
        for state in self.states.values():
492
            state.runtime_init()
493

  
494
    def error(self):
495
        """Report error details."""
496
        type, value, module, line, function = _exception_data()
497
        print >>self._stderr, u'%s: %s' % (type, value)
498
        print >>self._stderr, 'input line %s' % (self.abs_line_number())
499
        print >>self._stderr, (u'module %s, line %s, function %s' %
500
                               (module, line, function))
501

  
502
    def attach_observer(self, observer):
503
        """
504
        The `observer` parameter is a function or bound method which takes two
505
        arguments, the source and offset of the current line.
506
        """
507
        self.observers.append(observer)
508

  
509
    def detach_observer(self, observer):
510
        self.observers.remove(observer)
511

  
512
    def notify_observers(self):
513
        for observer in self.observers:
514
            try:
515
                info = self.input_lines.info(self.line_offset)
516
            except IndexError:
517
                info = (None, None)
518
            observer(*info)
519

  
520

  
521
class State:
522

  
523
    """
524
    State superclass. Contains a list of transitions, and transition methods.
525

  
526
    Transition methods all have the same signature. They take 3 parameters:
527

  
528
    - An `re` match object. ``match.string`` contains the matched input line,
529
      ``match.start()`` gives the start index of the match, and
530
      ``match.end()`` gives the end index.
531
    - A context object, whose meaning is application-defined (initial value
532
      ``None``). It can be used to store any information required by the state
533
      machine, and the retured context is passed on to the next transition
534
      method unchanged.
535
    - The name of the next state, a string, taken from the transitions list;
536
      normally it is returned unchanged, but it may be altered by the
537
      transition method if necessary.
538

  
539
    Transition methods all return a 3-tuple:
540

  
541
    - A context object, as (potentially) modified by the transition method.
542
    - The next state name (a return value of ``None`` means no state change).
543
    - The processing result, a list, which is accumulated by the state
544
      machine.
545

  
546
    Transition methods may raise an `EOFError` to cut processing short.
547

  
548
    There are two implicit transitions, and corresponding transition methods
549
    are defined: `bof()` handles the beginning-of-file, and `eof()` handles
550
    the end-of-file. These methods have non-standard signatures and return
551
    values. `bof()` returns the initial context and results, and may be used
552
    to return a header string, or do any other processing needed. `eof()`
553
    should handle any remaining context and wrap things up; it returns the
554
    final processing result.
555

  
556
    Typical applications need only subclass `State` (or a subclass), set the
557
    `patterns` and `initial_transitions` class attributes, and provide
558
    corresponding transition methods. The default object initialization will
559
    take care of constructing the list of transitions.
560
    """
561

  
562
    patterns = None
563
    """
564
    {Name: pattern} mapping, used by `make_transition()`. Each pattern may
565
    be a string or a compiled `re` pattern. Override in subclasses.
566
    """
567

  
568
    initial_transitions = None
569
    """
570
    A list of transitions to initialize when a `State` is instantiated.
571
    Each entry is either a transition name string, or a (transition name, next
572
    state name) pair. See `make_transitions()`. Override in subclasses.
573
    """
574

  
575
    nested_sm = None
576
    """
577
    The `StateMachine` class for handling nested processing.
578

  
579
    If left as ``None``, `nested_sm` defaults to the class of the state's
580
    controlling state machine. Override it in subclasses to avoid the default.
581
    """
582

  
583
    nested_sm_kwargs = None
584
    """
585
    Keyword arguments dictionary, passed to the `nested_sm` constructor.
586

  
587
    Two keys must have entries in the dictionary:
588

  
589
    - Key 'state_classes' must be set to a list of `State` classes.
590
    - Key 'initial_state' must be set to the name of the initial state class.
591

  
592
    If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
593
    class of the current state, and 'initial_state' defaults to the name of
594
    the class of the current state. Override in subclasses to avoid the
595
    defaults.
596
    """
597

  
598
    def __init__(self, state_machine, debug=False):
599
        """
600
        Initialize a `State` object; make & add initial transitions.
601

  
602
        Parameters:
603

  
604
        - `statemachine`: the controlling `StateMachine` object.
605
        - `debug`: a boolean; produce verbose output if true.
606
        """
607

  
608
        self.transition_order = []
609
        """A list of transition names in search order."""
610

  
611
        self.transitions = {}
612
        """
613
        A mapping of transition names to 3-tuples containing
614
        (compiled_pattern, transition_method, next_state_name). Initialized as
615
        an instance attribute dynamically (instead of as a class attribute)
616
        because it may make forward references to patterns and methods in this
617
        or other classes.
618
        """
619

  
620
        self.add_initial_transitions()
621

  
622
        self.state_machine = state_machine
623
        """A reference to the controlling `StateMachine` object."""
624

  
625
        self.debug = debug
626
        """Debugging mode on/off."""
627

  
628
        if self.nested_sm is None:
629
            self.nested_sm = self.state_machine.__class__
630
        if self.nested_sm_kwargs is None:
631
            self.nested_sm_kwargs = {'state_classes': [self.__class__],
632
                                     'initial_state': self.__class__.__name__}
633

  
634
    def runtime_init(self):
635
        """
636
        Initialize this `State` before running the state machine; called from
637
        `self.state_machine.run()`.
638
        """
639
        pass
640

  
641
    def unlink(self):
642
        """Remove circular references to objects no longer required."""
643
        self.state_machine = None
644

  
645
    def add_initial_transitions(self):
646
        """Make and add transitions listed in `self.initial_transitions`."""
647
        if self.initial_transitions:
648
            names, transitions = self.make_transitions(
649
                  self.initial_transitions)
650
            self.add_transitions(names, transitions)
651

  
652
    def add_transitions(self, names, transitions):
653
        """
654
        Add a list of transitions to the start of the transition list.
655

  
656
        Parameters:
657

  
658
        - `names`: a list of transition names.
659
        - `transitions`: a mapping of names to transition tuples.
660

  
661
        Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
662
        """
663
        for name in names:
664
            if name in self.transitions:
665
                raise DuplicateTransitionError(name)
666
            if name not in transitions:
667
                raise UnknownTransitionError(name)
668
        self.transition_order[:0] = names
669
        self.transitions.update(transitions)
670

  
671
    def add_transition(self, name, transition):
672
        """
673
        Add a transition to the start of the transition list.
674

  
675
        Parameter `transition`: a ready-made transition 3-tuple.
676

  
677
        Exception: `DuplicateTransitionError`.
678
        """
679
        if name in self.transitions:
680
            raise DuplicateTransitionError(name)
681
        self.transition_order[:0] = [name]
682
        self.transitions[name] = transition
683

  
684
    def remove_transition(self, name):
685
        """
686
        Remove a transition by `name`.
687

  
688
        Exception: `UnknownTransitionError`.
689
        """
690
        try:
691
            del self.transitions[name]
692
            self.transition_order.remove(name)
693
        except:
694
            raise UnknownTransitionError(name)
695

  
696
    def make_transition(self, name, next_state=None):
697
        """
698
        Make & return a transition tuple based on `name`.
699

  
700
        This is a convenience function to simplify transition creation.
701

  
702
        Parameters:
703

  
704
        - `name`: a string, the name of the transition pattern & method. This
705
          `State` object must have a method called '`name`', and a dictionary
706
          `self.patterns` containing a key '`name`'.
707
        - `next_state`: a string, the name of the next `State` object for this
708
          transition. A value of ``None`` (or absent) implies no state change
709
          (i.e., continue with the same state).
710

  
711
        Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
712
        """
713
        if next_state is None:
714
            next_state = self.__class__.__name__
715
        try:
716
            pattern = self.patterns[name]
717
            if not hasattr(pattern, 'match'):
718
                pattern = re.compile(pattern)
719
        except KeyError:
720
            raise TransitionPatternNotFound(
721
                  '%s.patterns[%r]' % (self.__class__.__name__, name))
722
        try:
723
            method = getattr(self, name)
724
        except AttributeError:
725
            raise TransitionMethodNotFound(
726
                  '%s.%s' % (self.__class__.__name__, name))
727
        return (pattern, method, next_state)
728

  
729
    def make_transitions(self, name_list):
730
        """
731
        Return a list of transition names and a transition mapping.
732

  
733
        Parameter `name_list`: a list, where each entry is either a transition
734
        name string, or a 1- or 2-tuple (transition name, optional next state
735
        name).
736
        """
737
        stringtype = type('')
738
        names = []
739
        transitions = {}
740
        for namestate in name_list:
741
            if type(namestate) is stringtype:
742
                transitions[namestate] = self.make_transition(namestate)
743
                names.append(namestate)
744
            else:
745
                transitions[namestate[0]] = self.make_transition(*namestate)
746
                names.append(namestate[0])
747
        return names, transitions
748

  
749
    def no_match(self, context, transitions):
750
        """
751
        Called when there is no match from `StateMachine.check_line()`.
752

  
753
        Return the same values returned by transition methods:
754

  
755
        - context: unchanged;
756
        - next state name: ``None``;
757
        - empty result list.
758

  
759
        Override in subclasses to catch this event.
760
        """
761
        return context, None, []
762

  
763
    def bof(self, context):
764
        """
765
        Handle beginning-of-file. Return unchanged `context`, empty result.
766

  
767
        Override in subclasses.
768

  
769
        Parameter `context`: application-defined storage.
770
        """
771
        return context, []
772

  
773
    def eof(self, context):
774
        """
775
        Handle end-of-file. Return empty result.
776

  
777
        Override in subclasses.
778

  
779
        Parameter `context`: application-defined storage.
780
        """
781
        return []
782

  
783
    def nop(self, match, context, next_state):
784
        """
785
        A "do nothing" transition method.
786

  
787
        Return unchanged `context` & `next_state`, empty result. Useful for
788
        simple state changes (actionless transitions).
789
        """
790
        return context, next_state, []
791

  
792

  
793
class StateMachineWS(StateMachine):
794

  
795
    """
796
    `StateMachine` subclass specialized for whitespace recognition.
797

  
798
    There are three methods provided for extracting indented text blocks:
799

  
800
    - `get_indented()`: use when the indent is unknown.
801
    - `get_known_indented()`: use when the indent is known for all lines.
802
    - `get_first_known_indented()`: use when only the first line's indent is
803
      known.
804
    """
805

  
806
    def get_indented(self, until_blank=False, strip_indent=True):
807
        """
808
        Return a block of indented lines of text, and info.
809

  
810
        Extract an indented block where the indent is unknown for all lines.
811

  
812
        :Parameters:
813
            - `until_blank`: Stop collecting at the first blank line if true.
814
            - `strip_indent`: Strip common leading indent if true (default).
815

  
816
        :Return:
817
            - the indented block (a list of lines of text),
818
            - its indent,
819
            - its first line offset from BOF, and
820
            - whether or not it finished with a blank line.
821
        """
822
        offset = self.abs_line_offset()
823
        indented, indent, blank_finish = self.input_lines.get_indented(
824
              self.line_offset, until_blank, strip_indent)
825
        if indented:
826
            self.next_line(len(indented) - 1) # advance to last indented line
827
        while indented and not indented[0].strip():
828
            indented.trim_start()
829
            offset += 1
830
        return indented, indent, offset, blank_finish
831

  
832
    def get_known_indented(self, indent, until_blank=False, strip_indent=True):
833
        """
834
        Return an indented block and info.
835

  
836
        Extract an indented block where the indent is known for all lines.
837
        Starting with the current line, extract the entire text block with at
838
        least `indent` indentation (which must be whitespace, except for the
839
        first line).
840

  
841
        :Parameters:
842
            - `indent`: The number of indent columns/characters.
843
            - `until_blank`: Stop collecting at the first blank line if true.
844
            - `strip_indent`: Strip `indent` characters of indentation if true
845
              (default).
846

  
847
        :Return:
848
            - the indented block,
849
            - its first line offset from BOF, and
850
            - whether or not it finished with a blank line.
851
        """
852
        offset = self.abs_line_offset()
853
        indented, indent, blank_finish = self.input_lines.get_indented(
854
              self.line_offset, until_blank, strip_indent,
855
              block_indent=indent)
856
        self.next_line(len(indented) - 1) # advance to last indented line
857
        while indented and not indented[0].strip():
858
            indented.trim_start()
859
            offset += 1
860
        return indented, offset, blank_finish
861

  
862
    def get_first_known_indented(self, indent, until_blank=False, 
863
                                 strip_indent=True, strip_top=True):
864
        """
865
        Return an indented block and info.
866

  
867
        Extract an indented block where the indent is known for the first line
868
        and unknown for all other lines.
869

  
870
        :Parameters:
871
            - `indent`: The first line's indent (# of columns/characters).
872
            - `until_blank`: Stop collecting at the first blank line if true
873
              (1).
874
            - `strip_indent`: Strip `indent` characters of indentation if true
875
              (1, default).
876
            - `strip_top`: Strip blank lines from the beginning of the block.
877

  
878
        :Return:
879
            - the indented block,
880
            - its indent,
881
            - its first line offset from BOF, and
882
            - whether or not it finished with a blank line.
883
        """
884
        offset = self.abs_line_offset()
885
        indented, indent, blank_finish = self.input_lines.get_indented(
886
              self.line_offset, until_blank, strip_indent,
887
              first_indent=indent)
888
        self.next_line(len(indented) - 1) # advance to last indented line
889
        if strip_top:
890
            while indented and not indented[0].strip():
891
                indented.trim_start()
892
                offset += 1
893
        return indented, indent, offset, blank_finish
894

  
895

  
896
class StateWS(State):
897

  
898
    """
899
    State superclass specialized for whitespace (blank lines & indents).
900

  
901
    Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
902
    lines) and 'indent' (for indented text blocks) are added automatically,
903
    before any other transitions.  The transition method `blank()` handles
904
    blank lines and `indent()` handles nested indented blocks.  Indented
905
    blocks trigger a new state machine to be created by `indent()` and run.
906
    The class of the state machine to be created is in `indent_sm`, and the
907
    constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
908

  
909
    The methods `known_indent()` and `firstknown_indent()` are provided for
910
    indented blocks where the indent (all lines' and first line's only,
911
    respectively) is known to the transition method, along with the attributes
912
    `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
913
    is triggered automatically.
914
    """
915

  
916
    indent_sm = None
917
    """
918
    The `StateMachine` class handling indented text blocks.
919

  
920
    If left as ``None``, `indent_sm` defaults to the value of
921
    `State.nested_sm`.  Override it in subclasses to avoid the default.
922
    """
923

  
924
    indent_sm_kwargs = None
925
    """
926
    Keyword arguments dictionary, passed to the `indent_sm` constructor.
927

  
928
    If left as ``None``, `indent_sm_kwargs` defaults to the value of
929
    `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
930
    """
931

  
932
    known_indent_sm = None
933
    """
934
    The `StateMachine` class handling known-indented text blocks.
935

  
936
    If left as ``None``, `known_indent_sm` defaults to the value of
937
    `indent_sm`.  Override it in subclasses to avoid the default.
938
    """
939

  
940
    known_indent_sm_kwargs = None
941
    """
942
    Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
943

  
944
    If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
945
    `indent_sm_kwargs`. Override it in subclasses to avoid the default.
946
    """
947

  
948
    ws_patterns = {'blank': ' *$',
949
                   'indent': ' +'}
950
    """Patterns for default whitespace transitions.  May be overridden in
951
    subclasses."""
952

  
953
    ws_initial_transitions = ('blank', 'indent')
954
    """Default initial whitespace transitions, added before those listed in
955
    `State.initial_transitions`.  May be overridden in subclasses."""
956

  
957
    def __init__(self, state_machine, debug=False):
958
        """
959
        Initialize a `StateSM` object; extends `State.__init__()`.
960

  
961
        Check for indent state machine attributes, set defaults if not set.
962
        """
963
        State.__init__(self, state_machine, debug)
964
        if self.indent_sm is None:
965
            self.indent_sm = self.nested_sm
966
        if self.indent_sm_kwargs is None:
967
            self.indent_sm_kwargs = self.nested_sm_kwargs
968
        if self.known_indent_sm is None:
969
            self.known_indent_sm = self.indent_sm
970
        if self.known_indent_sm_kwargs is None:
971
            self.known_indent_sm_kwargs = self.indent_sm_kwargs
972

  
973
    def add_initial_transitions(self):
974
        """
975
        Add whitespace-specific transitions before those defined in subclass.
976

  
977
        Extends `State.add_initial_transitions()`.
978
        """
979
        State.add_initial_transitions(self)
980
        if self.patterns is None:
981
            self.patterns = {}
982
        self.patterns.update(self.ws_patterns)
983
        names, transitions = self.make_transitions(
984
            self.ws_initial_transitions)
985
        self.add_transitions(names, transitions)
986

  
987
    def blank(self, match, context, next_state):
988
        """Handle blank lines. Does nothing. Override in subclasses."""
989
        return self.nop(match, context, next_state)
990

  
991
    def indent(self, match, context, next_state):
992
        """
993
        Handle an indented text block. Extend or override in subclasses.
994

  
995
        Recursively run the registered state machine for indented blocks
996
        (`self.indent_sm`).
997
        """
998
        indented, indent, line_offset, blank_finish = \
999
              self.state_machine.get_indented()
1000
        sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
1001
        results = sm.run(indented, input_offset=line_offset)
1002
        return context, next_state, results
1003

  
1004
    def known_indent(self, match, context, next_state):
1005
        """
1006
        Handle a known-indent text block. Extend or override in subclasses.
1007

  
1008
        Recursively run the registered state machine for known-indent indented
1009
        blocks (`self.known_indent_sm`). The indent is the length of the
1010
        match, ``match.end()``.
1011
        """
1012
        indented, line_offset, blank_finish = \
1013
              self.state_machine.get_known_indented(match.end())
1014
        sm = self.known_indent_sm(debug=self.debug,
1015
                                 **self.known_indent_sm_kwargs)
1016
        results = sm.run(indented, input_offset=line_offset)
1017
        return context, next_state, results
1018

  
1019
    def first_known_indent(self, match, context, next_state):
1020
        """
1021
        Handle an indented text block (first line's indent known).
1022

  
1023
        Extend or override in subclasses.
1024

  
1025
        Recursively run the registered state machine for known-indent indented
1026
        blocks (`self.known_indent_sm`). The indent is the length of the
1027
        match, ``match.end()``.
1028
        """
1029
        indented, line_offset, blank_finish = \
1030
              self.state_machine.get_first_known_indented(match.end())
1031
        sm = self.known_indent_sm(debug=self.debug,
1032
                                 **self.known_indent_sm_kwargs)
1033
        results = sm.run(indented, input_offset=line_offset)
1034
        return context, next_state, results
1035

  
1036

  
1037
class _SearchOverride:
1038

  
1039
    """
1040
    Mix-in class to override `StateMachine` regular expression behavior.
1041

  
1042
    Changes regular expression matching, from the default `re.match()`
1043
    (succeeds only if the pattern matches at the start of `self.line`) to
1044
    `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1045
    When subclassing a `StateMachine`, list this class **first** in the
1046
    inheritance list of the class definition.
1047
    """
1048

  
1049
    def match(self, pattern):
1050
        """
1051
        Return the result of a regular expression search.
1052

  
1053
        Overrides `StateMachine.match()`.
1054

  
1055
        Parameter `pattern`: `re` compiled regular expression.
1056
        """
1057
        return pattern.search(self.line)
1058

  
1059

  
1060
class SearchStateMachine(_SearchOverride, StateMachine):
1061
    """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1062
    pass
1063

  
1064

  
1065
class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1066
    """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1067
    pass
1068

  
1069

  
1070
class ViewList:
1071

  
1072
    """
1073
    List with extended functionality: slices of ViewList objects are child
1074
    lists, linked to their parents. Changes made to a child list also affect
1075
    the parent list.  A child list is effectively a "view" (in the SQL sense)
1076
    of the parent list.  Changes to parent lists, however, do *not* affect
1077
    active child lists.  If a parent list is changed, any active child lists
1078
    should be recreated.
1079

  
1080
    The start and end of the slice can be trimmed using the `trim_start()` and
1081
    `trim_end()` methods, without affecting the parent list.  The link between
1082
    child and parent lists can be broken by calling `disconnect()` on the
1083
    child list.
1084

  
1085
    Also, ViewList objects keep track of the source & offset of each item.
1086
    This information is accessible via the `source()`, `offset()`, and
1087
    `info()` methods.
1088
    """
1089

  
1090
    def __init__(self, initlist=None, source=None, items=None,
1091
                 parent=None, parent_offset=None):
1092
        self.data = []
1093
        """The actual list of data, flattened from various sources."""
1094

  
1095
        self.items = []
1096
        """A list of (source, offset) pairs, same length as `self.data`: the
1097
        source of each line and the offset of each line from the beginning of
1098
        its source."""
1099

  
1100
        self.parent = parent
1101
        """The parent list."""
1102

  
1103
        self.parent_offset = parent_offset
1104
        """Offset of this list from the beginning of the parent list."""
1105

  
1106
        if isinstance(initlist, ViewList):
1107
            self.data = initlist.data[:]
1108
            self.items = initlist.items[:]
1109
        elif initlist is not None:
1110
            self.data = list(initlist)
1111
            if items:
1112
                self.items = items
1113
            else:
1114
                self.items = [(source, i) for i in range(len(initlist))]
1115
        assert len(self.data) == len(self.items), 'data mismatch'
1116

  
1117
    def __str__(self):
1118
        return str(self.data)
1119

  
1120
    def __repr__(self):
1121
        return '%s(%s, items=%s)' % (self.__class__.__name__,
1122
                                     self.data, self.items)
1123

  
1124
    def __lt__(self, other): return self.data <  self.__cast(other)
1125
    def __le__(self, other): return self.data <= self.__cast(other)
1126
    def __eq__(self, other): return self.data == self.__cast(other)
1127
    def __ne__(self, other): return self.data != self.__cast(other)
1128
    def __gt__(self, other): return self.data >  self.__cast(other)
1129
    def __ge__(self, other): return self.data >= self.__cast(other)
1130
    def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1131

  
1132
    def __cast(self, other):
1133
        if isinstance(other, ViewList):
1134
            return other.data
1135
        else:
1136
            return other
1137

  
1138
    def __contains__(self, item): return item in self.data
1139
    def __len__(self): return len(self.data)
1140

  
1141
    # The __getitem__()/__setitem__() methods check whether the index
1142
    # is a slice first, since indexing a native list with a slice object
1143
    # just works.
1144

  
1145
    def __getitem__(self, i):
1146
        if isinstance(i, types.SliceType):
1147
            assert i.step in (None, 1),  'cannot handle slice with stride'
1148
            return self.__class__(self.data[i.start:i.stop],
1149
                                  items=self.items[i.start:i.stop],
1150
                                  parent=self, parent_offset=i.start or 0)
1151
        else:
1152
            return self.data[i]
1153

  
1154
    def __setitem__(self, i, item):
1155
        if isinstance(i, types.SliceType):
1156
            assert i.step in (None, 1), 'cannot handle slice with stride'
1157
            if not isinstance(item, ViewList):
1158
                raise TypeError('assigning non-ViewList to ViewList slice')
1159
            self.data[i.start:i.stop] = item.data
1160
            self.items[i.start:i.stop] = item.items
1161
            assert len(self.data) == len(self.items), 'data mismatch'
1162
            if self.parent:
1163
                self.parent[(i.start or 0) + self.parent_offset
1164
                            : (i.stop or len(self)) + self.parent_offset] = item
1165
        else:
1166
            self.data[i] = item
1167
            if self.parent:
1168
                self.parent[i + self.parent_offset] = item
1169

  
1170
    def __delitem__(self, i):
1171
        try:
1172
            del self.data[i]
1173
            del self.items[i]
1174
            if self.parent:
1175
                del self.parent[i + self.parent_offset]
1176
        except TypeError:
1177
            assert i.step is None, 'cannot handle slice with stride'
1178
            del self.data[i.start:i.stop]
1179
            del self.items[i.start:i.stop]
1180
            if self.parent:
1181
                del self.parent[(i.start or 0) + self.parent_offset
1182
                                : (i.stop or len(self)) + self.parent_offset]
1183

  
1184
    def __add__(self, other):
1185
        if isinstance(other, ViewList):
1186
            return self.__class__(self.data + other.data,
1187
                                  items=(self.items + other.items))
1188
        else:
1189
            raise TypeError('adding non-ViewList to a ViewList')
1190

  
1191
    def __radd__(self, other):
1192
        if isinstance(other, ViewList):
1193
            return self.__class__(other.data + self.data,
1194
                                  items=(other.items + self.items))
1195
        else:
1196
            raise TypeError('adding ViewList to a non-ViewList')
1197

  
1198
    def __iadd__(self, other):
1199
        if isinstance(other, ViewList):
1200
            self.data += other.data
1201
        else:
1202
            raise TypeError('argument to += must be a ViewList')
1203
        return self
1204

  
1205
    def __mul__(self, n):
1206
        return self.__class__(self.data * n, items=(self.items * n))
1207

  
1208
    __rmul__ = __mul__
1209

  
1210
    def __imul__(self, n):
1211
        self.data *= n
1212
        self.items *= n
1213
        return self
1214

  
1215
    def extend(self, other):
1216
        if not isinstance(other, ViewList):
1217
            raise TypeError('extending a ViewList with a non-ViewList')
1218
        if self.parent:
1219
            self.parent.insert(len(self.data) + self.parent_offset, other)
1220
        self.data.extend(other.data)
1221
        self.items.extend(other.items)
1222

  
1223
    def append(self, item, source=None, offset=0):
1224
        if source is None:
1225
            self.extend(item)
1226
        else:
1227
            if self.parent:
1228
                self.parent.insert(len(self.data) + self.parent_offset, item,
1229
                                   source, offset)
1230
            self.data.append(item)
1231
            self.items.append((source, offset))
1232

  
1233
    def insert(self, i, item, source=None, offset=0):
1234
        if source is None:
1235
            if not isinstance(item, ViewList):
1236
                raise TypeError('inserting non-ViewList with no source given')
1237
            self.data[i:i] = item.data
1238
            self.items[i:i] = item.items
1239
            if self.parent:
1240
                index = (len(self.data) + i) % len(self.data)
1241
                self.parent.insert(index + self.parent_offset, item)
1242
        else:
1243
            self.data.insert(i, item)
1244
            self.items.insert(i, (source, offset))
1245
            if self.parent:
1246
                index = (len(self.data) + i) % len(self.data)
1247
                self.parent.insert(index + self.parent_offset, item,
1248
                                   source, offset)
1249

  
1250
    def pop(self, i=-1):
1251
        if self.parent:
1252
            index = (len(self.data) + i) % len(self.data)
1253
            self.parent.pop(index + self.parent_offset)
1254
        self.items.pop(i)
1255
        return self.data.pop(i)
1256

  
1257
    def trim_start(self, n=1):
1258
        """
1259
        Remove items from the start of the list, without touching the parent.
1260
        """
1261
        if n > len(self.data):
1262
            raise IndexError("Size of trim too large; can't trim %s items "
1263
                             "from a list of size %s." % (n, len(self.data)))
1264
        elif n < 0:
1265
            raise IndexError('Trim size must be >= 0.')
1266
        del self.data[:n]
1267
        del self.items[:n]
1268
        if self.parent:
1269
            self.parent_offset += n
1270

  
1271
    def trim_end(self, n=1):
1272
        """
1273
        Remove items from the end of the list, without touching the parent.
1274
        """
1275
        if n > len(self.data):
1276
            raise IndexError("Size of trim too large; can't trim %s items "
1277
                             "from a list of size %s." % (n, len(self.data)))
1278
        elif n < 0:
1279
            raise IndexError('Trim size must be >= 0.')
1280
        del self.data[-n:]
1281
        del self.items[-n:]
1282

  
1283
    def remove(self, item):
1284
        index = self.index(item)
1285
        del self[index]
1286

  
1287
    def count(self, item): return self.data.count(item)
1288
    def index(self, item): return self.data.index(item)
1289

  
1290
    def reverse(self):
1291
        self.data.reverse()
1292
        self.items.reverse()
1293
        self.parent = None
1294

  
1295
    def sort(self, *args):
1296
        tmp = zip(self.data, self.items)
1297
        tmp.sort(*args)
1298
        self.data = [entry[0] for entry in tmp]
1299
        self.items = [entry[1] for entry in tmp]
1300
        self.parent = None
1301

  
1302
    def info(self, i):
1303
        """Return source & offset for index `i`."""
1304
        try:
1305
            return self.items[i]
1306
        except IndexError:
1307
            if i == len(self.data):     # Just past the end
1308
                return self.items[i - 1][0], None
1309
            else:
1310
                raise
1311

  
1312
    def source(self, i):
1313
        """Return source for index `i`."""
1314
        return self.info(i)[0]
1315

  
1316
    def offset(self, i):
1317
        """Return offset for index `i`."""
1318
        return self.info(i)[1]
1319

  
1320
    def disconnect(self):
1321
        """Break link between this list and parent list."""
1322
        self.parent = None
1323

  
1324
    def xitems(self):
1325
        """Return iterator yielding (source, offset, value) tuples."""
1326
        for (value, (source, offset)) in zip(self.data, self.items):
1327
            yield (source, offset, value)
1328

  
1329
    def pprint(self):
1330
        """Print the list in `grep` format (`source:offset:value` lines)"""
1331
        for line in self.xitems():
1332
            print "%s:%d:%s" % line
1333

  
1334

  
1335
class StringList(ViewList):
1336

  
1337
    """A `ViewList` with string-specific methods."""
1338

  
1339
    def trim_left(self, length, start=0, end=sys.maxint):
1340
        """
1341
        Trim `length` characters off the beginning of each item, in-place,
1342
        from index `start` to `end`.  No whitespace-checking is done on the
1343
        trimmed text.  Does not affect slice parent.
1344
        """
1345
        self.data[start:end] = [line[length:]
1346
                                for line in self.data[start:end]]
1347

  
1348
    def get_text_block(self, start, flush_left=False):
1349
        """
1350
        Return a contiguous block of text.
1351

  
1352
        If `flush_left` is true, raise `UnexpectedIndentationError` if an
1353
        indented line is encountered before the text block ends (with a blank
1354
        line).
1355
        """
1356
        end = start
1357
        last = len(self.data)
1358
        while end < last:
1359
            line = self.data[end]
1360
            if not line.strip():
1361
                break
1362
            if flush_left and (line[0] == ' '):
1363
                source, offset = self.info(end)
1364
                raise UnexpectedIndentationError(self[start:end], source,
1365
                                                 offset + 1)
1366
            end += 1
1367
        return self[start:end]
1368

  
1369
    def get_indented(self, start=0, until_blank=False, strip_indent=True,
1370
                     block_indent=None, first_indent=None):
1371
        """
1372
        Extract and return a StringList of indented lines of text.
1373

  
1374
        Collect all lines with indentation, determine the minimum indentation,
1375
        remove the minimum indentation from all indented lines (unless
1376
        `strip_indent` is false), and return them. All lines up to but not
1377
        including the first unindented line will be returned.
1378

  
1379
        :Parameters:
1380
          - `start`: The index of the first line to examine.
1381
          - `until_blank`: Stop collecting at the first blank line if true.
1382
          - `strip_indent`: Strip common leading indent if true (default).
1383
          - `block_indent`: The indent of the entire block, if known.
1384
          - `first_indent`: The indent of the first line, if known.
1385

  
1386
        :Return:
1387
          - a StringList of indented lines with mininum indent removed;
1388
          - the amount of the indent;
1389
          - a boolean: did the indented block finish with a blank line or EOF?
1390
        """
1391
        indent = block_indent           # start with None if unknown
1392
        end = start
1393
        if block_indent is not None and first_indent is None:
1394
            first_indent = block_indent
1395
        if first_indent is not None:
1396
            end += 1
1397
        last = len(self.data)
1398
        while end < last:
1399
            line = self.data[end]
1400
            if line and (line[0] != ' '
1401
                         or (block_indent is not None
1402
                             and line[:block_indent].strip())):
1403
                # Line not indented or insufficiently indented.
1404
                # Block finished properly iff the last indented line blank:
1405
                blank_finish = ((end > start)
1406
                                and not self.data[end - 1].strip())
1407
                break
1408
            stripped = line.lstrip()
1409
            if not stripped:            # blank line
1410
                if until_blank:
1411
                    blank_finish = 1
1412
                    break
1413
            elif block_indent is None:
1414
                line_indent = len(line) - len(stripped)
1415
                if indent is None:
1416
                    indent = line_indent
1417
                else:
1418
                    indent = min(indent, line_indent)
1419
            end += 1
1420
        else:
1421
            blank_finish = 1            # block ends at end of lines
1422
        block = self[start:end]
1423
        if first_indent is not None and block:
1424
            block.data[0] = block.data[0][first_indent:]
1425
        if indent and strip_indent:
1426
            block.trim_left(indent, start=(first_indent is not None))
1427
        return block, indent or 0, blank_finish
1428

  
1429
    def get_2D_block(self, top, left, bottom, right, strip_indent=True):
1430
        block = self[top:bottom]
1431
        indent = right
1432
        for i in range(len(block.data)):
1433
            # get slice from line, care for combining characters
1434
            ci = utils.column_indices(block.data[i])
1435
            try:
1436
                left = ci[left]
1437
            except IndexError:
1438
                left += len(block.data[i]) - len(ci)
1439
            try:
1440
                right = ci[right]
1441
            except IndexError:
1442
                right += len(block.data[i]) - len(ci)
1443
            block.data[i] = line = block.data[i][left:right].rstrip()
1444
            if line:
1445
                indent = min(indent, len(line) - len(line.lstrip()))
1446
        if strip_indent and 0 < indent < right:
1447
            block.data = [line[indent:] for line in block.data]
1448
        return block
1449

  
1450
    def pad_double_width(self, pad_char):
1451
        """
1452
        Pad all double-width characters in self by appending `pad_char` to each.
1453
        For East Asian language support.
1454
        """
1455
        if hasattr(unicodedata, 'east_asian_width'):
1456
            east_asian_width = unicodedata.east_asian_width
1457
        else:
1458
            return                      # new in Python 2.4
1459
        for i in range(len(self.data)):
1460
            line = self.data[i]
1461
            if isinstance(line, unicode):
1462
                new = []
1463
                for char in line:
1464
                    new.append(char)
1465
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1466
                        new.append(pad_char)
1467
                self.data[i] = ''.join(new)
1468

  
1469
    def replace(self, old, new):
1470
        """Replace all occurrences of substring `old` with `new`."""
1471
        for i in range(len(self.data)):
1472
            self.data[i] = self.data[i].replace(old, new)
1473

  
1474

  
1475
class StateMachineError(Exception): pass
1476
class UnknownStateError(StateMachineError): pass
1477
class DuplicateStateError(StateMachineError): pass
1478
class UnknownTransitionError(StateMachineError): pass
1479
class DuplicateTransitionError(StateMachineError): pass
1480
class TransitionPatternNotFound(StateMachineError): pass
1481
class TransitionMethodNotFound(StateMachineError): pass
1482
class UnexpectedIndentationError(StateMachineError): pass
1483

  
1484

  
1485
class TransitionCorrection(Exception):
1486

  
1487
    """
1488
    Raise from within a transition method to switch to another transition.
1489

  
1490
    Raise with one argument, the new transition name.
1491
    """
1492

  
1493

  
1494
class StateCorrection(Exception):
1495

  
1496
    """
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff