Statistics
| Revision:

gvsig-3d / 1.10 / trunk / binaries / mac / raster / gdal / GDAL.framework / Versions / 1.7 / Python / site-packages / numpy / core / _mx_datetime_parser.py @ 27

History | View | Annotate | Download (32.5 KB)

1
#-*- coding: latin-1 -*-
2
""" 
3
Date/Time string parsing module.
4

5
This code is a slightly modified version of Parser.py found in mx.DateTime
6
version 3.0.0
7

8
As such, it is subject to the terms of the eGenix public license version 1.1.0.
9

10
FIXME: Add license.txt to NumPy
11
"""
12

    
13
__all__ = ['date_from_string', 'datetime_from_string']
14

    
15
import types
16
import re
17
import datetime as dt
18

    
19
class RangeError(Exception): pass
20

    
21
# Enable to produce debugging output
22
_debug = 0
23

    
24
# REs for matching date and time parts in a string; These REs
25
# parse a superset of ARPA, ISO, American and European style dates.
26
# Timezones are supported via the Timezone submodule.
27

    
28
_year = '(?P<year>-?\d+\d(?!:))'
29
_fullyear = '(?P<year>-?\d+\d\d(?!:))'
30
_year_epoch = '(?:' + _year + '(?P<epoch> *[ABCDE\.]+)?)'
31
_fullyear_epoch = '(?:' + _fullyear + '(?P<epoch> *[ABCDE\.]+)?)'
32
_relyear = '(?:\((?P<relyear>[-+]?\d+)\))'
33

    
34
_month = '(?P<month>\d?\d(?!:))'
35
_fullmonth = '(?P<month>\d\d(?!:))'
36
_litmonth = ('(?P<litmonth>'
37
             'jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|'
38
             'm?r|mae|mrz|mai|okt|dez|'
39
             'fev|avr|juin|juil|aou|ao?|d?c|'
40
             'ene|abr|ago|dic|'
41
             'out'
42
             ')[a-z,\.;]*')
43
litmonthtable = {
44
    # English
45
    'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6,
46
    'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12,
47
    # German
48
    'm?r':3, 'mae':3, 'mrz':3, 'mai':5, 'okt':10, 'dez':12,
49
    # French
50
    'fev':2, 'avr':4, 'juin':6, 'juil':7, 'aou':8, 'ao?':8,
51
    'd?c':12,
52
    # Spanish
53
    'ene':1, 'abr':4, 'ago':8, 'dic':12,
54
    # Portuguese
55
    'out':10,
56
    }
57
_relmonth = '(?:\((?P<relmonth>[-+]?\d+)\))'
58

    
59
_day = '(?P<day>\d?\d(?!:))'
60
_usday = '(?P<day>\d?\d(?!:))(?:st|nd|rd|th|[,\.;])?'
61
_fullday = '(?P<day>\d\d(?!:))'
62
_litday = ('(?P<litday>'
63
           'mon|tue|wed|thu|fri|sat|sun|'
64
           'die|mit|don|fre|sam|son|'
65
           'lun|mar|mer|jeu|ven|sam|dim|'
66
           'mie|jue|vie|sab|dom|'
67
           'pri|seg|ter|cua|qui'
68
           ')[a-z]*')
69
litdaytable = {
70
    # English
71
    'mon':0, 'tue':1, 'wed':2, 'thu':3, 'fri':4, 'sat':5, 'sun':6,
72
    # German
73
    'die':1, 'mit':2, 'don':3, 'fre':4, 'sam':5, 'son':6,
74
    # French
75
    'lun':0, 'mar':1, 'mer':2, 'jeu':3, 'ven':4, 'sam':5, 'dim':6,
76
    # Spanish
77
    'mie':2, 'jue':3, 'vie':4, 'sab':5, 'dom':6,
78
    # Portuguese
79
    'pri':0, 'seg':1, 'ter':2, 'cua':3, 'qui':4,
80
    }
81
_relday = '(?:\((?P<relday>[-+]?\d+)\))'
82

    
83
_hour = '(?P<hour>[012]?\d)'
84
_minute = '(?P<minute>[0-6]\d)'
85
_second = '(?P<second>[0-6]\d(?:[.,]\d+)?)'
86

    
87
_days = '(?P<days>\d*\d(?:[.,]\d+)?)'
88
_hours = '(?P<hours>\d*\d(?:[.,]\d+)?)'
89
_minutes = '(?P<minutes>\d*\d(?:[.,]\d+)?)'
90
_seconds = '(?P<seconds>\d*\d(?:[.,]\d+)?)'
91

    
92
_reldays = '(?:\((?P<reldays>[-+]?\d+(?:[.,]\d+)?)\))'
93
_relhours = '(?:\((?P<relhours>[-+]?\d+(?:[.,]\d+)?)\))'
94
_relminutes = '(?:\((?P<relminutes>[-+]?\d+(?:[.,]\d+)?)\))'
95
_relseconds = '(?:\((?P<relseconds>[-+]?\d+(?:[.,]\d+)?)\))'
96

    
97
_sign = '(?:(?P<sign>[-+]) *)'
98
_week = 'W(?P<week>\d?\d)'
99
_zone = '(?P<zone>[A-Z]+|[+-]\d\d?:?(?:\d\d)?)'
100
_ampm = '(?P<ampm>[ap][m.]+)'
101

    
102
_time = (_hour + ':' + _minute + '(?::' + _second + '|[^:]|$) *'
103
         + _ampm + '? *' + _zone + '?')
104
_isotime = _hour + ':?' + _minute + ':?' + _second + '? *' + _zone + '?'
105

    
106
_yeardate = _year
107
_weekdate = _year + '-?(?:' + _week + '-?' + _day + '?)?'
108
_eurodate = _day + '\.' + _month + '\.' + _year_epoch + '?'
109
_usdate = _month + '/' + _day + '(?:/' + _year_epoch + '|[^/]|$)'
110
_altusdate = _month + '-' + _day + '-' + _fullyear_epoch
111
_isodate = _year + '-' + _month + '-?' + _day + '?(?!:)'
112
_altisodate = _year + _fullmonth + _fullday + '(?!:)'
113
_usisodate = _fullyear + '/' + _fullmonth + '/' + _fullday
114
_litdate = ('(?:'+ _litday + ',? )? *' +
115
            _usday + ' *' +
116
            '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ] *' +
117
            _year_epoch + '?')
118
_altlitdate = ('(?:'+ _litday + ',? )? *' +
119
               _litmonth + '[ ,.a-z]+' +
120
               _usday +
121
               '(?:[ a-z]+' + _year_epoch + ')?')
122
_eurlitdate = ('(?:'+ _litday + ',?[ a-z]+)? *' +
123
               '(?:'+ _usday + '[ a-z]+)? *' +
124
               _litmonth +
125
               '(?:[ ,.a-z]+' + _year_epoch + ')?')
126

    
127
_relany = '[*%?a-zA-Z]+'
128

    
129
_relisodate = ('(?:(?:' + _relany + '|' + _year + '|' + _relyear + ')-' +
130
               '(?:' + _relany + '|' + _month + '|' + _relmonth + ')-' +
131
               '(?:' + _relany + '|' + _day + '|' + _relday + '))')
132

    
133
_asctime = ('(?:'+ _litday + ',? )? *' +
134
                _usday + ' *' +
135
                '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ]' +
136
                '(?:[0-9: ]+)' +
137
                _year_epoch + '?')
138

    
139
_relisotime = ('(?:(?:' + _relany + '|' + _hour + '|' + _relhours + '):' +
140
               '(?:' + _relany + '|' + _minute + '|' + _relminutes + ')' +
141
               '(?::(?:' + _relany + '|' + _second + '|' + _relseconds + '))?)')
142

    
143
_isodelta1 = (_sign + '?' +
144
              _days + ':' + _hours + ':' + _minutes + ':' + _seconds)
145
_isodelta2 = (_sign + '?' +
146
              _hours + ':' + _minutes + ':' + _seconds)
147
_isodelta3 = (_sign + '?' +
148
              _hours + ':' + _minutes)
149
_litdelta = (_sign + '?' +
150
             '(?:' + _days + ' *d[a-z]*[,; ]*)?' +
151
             '(?:' + _hours + ' *h[a-z]*[,; ]*)?' +
152
             '(?:' + _minutes + ' *m[a-z]*[,; ]*)?' +
153
             '(?:' + _seconds + ' *s[a-z]*[,; ]*)?')
154
_litdelta2 = (_sign + '?' +
155
             '(?:' + _days + ' *d[a-z]*[,; ]*)?' +
156
              _hours + ':' + _minutes + '(?::' + _seconds + ')?')
157

    
158
_timeRE = re.compile(_time, re.I)
159
_isotimeRE = re.compile(_isotime, re.I)
160
_isodateRE = re.compile(_isodate, re.I)
161
_altisodateRE = re.compile(_altisodate, re.I)
162
_usisodateRE = re.compile(_usisodate, re.I)
163
_yeardateRE = re.compile(_yeardate, re.I)
164
_eurodateRE = re.compile(_eurodate, re.I)
165
_usdateRE = re.compile(_usdate, re.I)
166
_altusdateRE = re.compile(_altusdate, re.I)
167
_litdateRE = re.compile(_litdate, re.I)
168
_altlitdateRE = re.compile(_altlitdate, re.I)
169
_eurlitdateRE = re.compile(_eurlitdate, re.I)
170
_relisodateRE = re.compile(_relisodate, re.I)
171
_asctimeRE = re.compile(_asctime, re.I)
172
_isodelta1RE = re.compile(_isodelta1)
173
_isodelta2RE = re.compile(_isodelta2)
174
_isodelta3RE = re.compile(_isodelta3)
175
_litdeltaRE = re.compile(_litdelta)
176
_litdelta2RE = re.compile(_litdelta2)
177
_relisotimeRE = re.compile(_relisotime, re.I)
178

    
179
# Available date parsers
180
_date_formats = ('euro',
181
                 'usiso', 'us', 'altus',
182
                 'iso', 'altiso',
183
                 'lit', 'altlit', 'eurlit',
184
                 'year', 'unknown')
185

    
186
# Available time parsers
187
_time_formats = ('standard',
188
                 'iso',
189
                 'unknown')
190

    
191
_zoneoffset = ('(?:'
192
              '(?P<zonesign>[+-])?'
193
              '(?P<hours>\d\d?)'
194
              ':?'
195
              '(?P<minutes>\d\d)?'
196
              '(?P<extra>\d+)?'
197
              ')'
198
              )
199

    
200
_zoneoffsetRE = re.compile(_zoneoffset)
201

    
202
_zonetable = {
203
              # Timezone abbreviations
204
              # Std     Summer
205

    
206
              # Standards
207
              'UT':0,
208
              'UTC':0,
209
              'GMT':0,
210

    
211
              # A few common timezone abbreviations
212
              'CET':1,  'CEST':2, 'CETDST':2, # Central European
213
              'MET':1,  'MEST':2, 'METDST':2, # Mean European
214
              'MEZ':1,  'MESZ':2,             # Mitteleurop?ische Zeit
215
              'EET':2,  'EEST':3, 'EETDST':3, # Eastern Europe
216
              'WET':0,  'WEST':1, 'WETDST':1, # Western Europe
217
              'MSK':3,  'MSD':4,  # Moscow
218
              'IST':5.5,          # India
219
              'JST':9,            # Japan
220
              'KST':9,            # Korea
221
              'HKT':8,            # Hong Kong
222

    
223
              # US time zones
224
              'AST':-4, 'ADT':-3, # Atlantic
225
              'EST':-5, 'EDT':-4, # Eastern
226
              'CST':-6, 'CDT':-5, # Central
227
              'MST':-7, 'MDT':-6, # Midwestern
228
              'PST':-8, 'PDT':-7, # Pacific
229

    
230
              # Australian time zones
231
              'CAST':9.5, 'CADT':10.5, # Central
232
              'EAST':10,  'EADT':11,   # Eastern
233
              'WAST':8,   'WADT':9,    # Western
234
              'SAST':9.5, 'SADT':10.5, # Southern
235

    
236
              # US military time zones
237
              'Z': 0,
238
              'A': 1,
239
              'B': 2,
240
              'C': 3,
241
              'D': 4,
242
              'E': 5,
243
              'F': 6,
244
              'G': 7,
245
              'H': 8,
246
              'I': 9,
247
              'K': 10,
248
              'L': 11,
249
              'M': 12,
250
              'N':-1,
251
              'O':-2,
252
              'P':-3,
253
              'Q':-4,
254
              'R':-5,
255
              'S':-6,
256
              'T':-7,
257
              'U':-8,
258
              'V':-9,
259
              'W':-10,
260
              'X':-11,
261
              'Y':-12
262
              }
263

    
264

    
265
def utc_offset(zone):
266
    """ utc_offset(zonestring)
267

268
        Return the UTC time zone offset in minutes.
269

270
        zone must be string and can either be given as +-HH:MM,
271
        +-HHMM, +-HH numeric offset or as time zone
272
        abbreviation. Daylight saving time must be encoded into the
273
        zone offset.
274

275
        Timezone abbreviations are treated case-insensitive.
276

277
    """
278
    if not zone:
279
        return 0
280
    uzone = zone.upper()
281
    if uzone in _zonetable:
282
        return _zonetable[uzone]*60
283
    offset = _zoneoffsetRE.match(zone)
284
    if not offset:
285
        raise ValueError,'wrong format or unkown time zone: "%s"' % zone
286
    zonesign,hours,minutes,extra = offset.groups()
287
    if extra:
288
        raise ValueError,'illegal time zone offset: "%s"' % zone
289
    offset = int(hours or 0) * 60 + int(minutes or 0)
290
    if zonesign == '-':
291
        offset = -offset
292
    return offset
293

    
294
def add_century(year):
295

    
296
    """ Sliding window approach to the Y2K problem: adds a suitable
297
        century to the given year and returns it as integer.
298

299
        The window used depends on the current year. If adding the current
300
        century to the given year gives a year within the range
301
        current_year-70...current_year+30 [both inclusive], then the
302
        current century is added. Otherwise the century (current + 1 or
303
        - 1) producing the least difference is chosen.
304

305
    """
306

    
307
    current_year=dt.datetime.now().year
308
    current_century=(dt.datetime.now().year / 100) * 100
309

    
310
    if year > 99:
311
        # Take it as-is
312
        return year
313
    year = year + current_century
314
    diff = year - current_year
315
    if diff >= -70 and diff <= 30:
316
        return year
317
    elif diff < -70:
318
        return year + 100
319
    else:
320
        return year - 100
321

    
322

    
323
def _parse_date(text):
324
    """
325
    Parses the date part given in text and returns a tuple
326
    (text,day,month,year,style) with the following meanings:
327

328
    * text gives the original text without the date part
329

330
    * day,month,year give the parsed date
331

332
    * style gives information about which parser was successful:
333
      'euro' - the European date parser
334
      'us' - the US date parser
335
      'altus' - the alternative US date parser (with '-' instead of '/')
336
      'iso' - the ISO date parser
337
      'altiso' - the alternative ISO date parser (without '-')
338
      'usiso' - US style ISO date parser (yyyy/mm/dd)
339
      'lit' - the US literal date parser
340
      'altlit' - the alternative US literal date parser
341
      'eurlit' - the Eurpean literal date parser
342
      'unknown' - no date part was found, defaultdate was used
343

344
    Formats may be set to a tuple of style strings specifying which of the above
345
    parsers to use and in which order to try them.
346
    Default is to try all of them in the above order.
347

348
    ``defaultdate`` provides the defaults to use in case no date part is found.
349
    Most other parsers default to the current year January 1 if some of these
350
    date parts are missing.
351

352
    If ``'unknown'`` is not given in formats and the date cannot be parsed,
353
    a :exc:`ValueError` is raised.
354

355
    """
356
    match = None
357
    style = ''
358

    
359
    formats = _date_formats
360

    
361
    us_formats=('us', 'altus')
362
    iso_formats=('iso', 'altiso', 'usiso')
363

    
364
    now=dt.datetime.now
365

    
366
    # Apply parsers in the order given in formats
367
    for format in formats:
368

    
369
        if format == 'euro':
370
            # European style date
371
            match = _eurodateRE.search(text)
372
            if match is not None:
373
                day,month,year,epoch = match.groups()
374
                if year:
375
                    if len(year) == 2:
376
                        # Y2K problem:
377
                        year = add_century(int(year))
378
                    else:
379
                        year = int(year)
380
                else:
381
                    defaultdate = now()
382
                    year = defaultdate.year
383
                if epoch and 'B' in epoch:
384
                    year = -year + 1
385
                month = int(month)
386
                day = int(day)
387
                # Could have mistaken euro format for us style date
388
                # which uses month, day order
389
                if month > 12 or month == 0:
390
                    match = None
391
                    continue
392
                break
393

    
394
        elif format == 'year':
395
            # just a year specified
396
            match = _yeardateRE.match(text)
397
            if match is not None:
398
                year = match.groups()[0]
399
                if year:
400
                    if len(year) == 2:
401
                        # Y2K problem:
402
                        year = add_century(int(year))
403
                    else:
404
                        year = int(year)
405
                else:
406
                    defaultdate = now()
407
                    year = defaultdate.year
408
                day = 1
409
                month = 1
410
                break
411

    
412
        elif format in iso_formats:
413
            # ISO style date
414
            if format == 'iso':
415
                match = _isodateRE.search(text)
416
            elif format == 'altiso':
417
                match = _altisodateRE.search(text)
418
                # Avoid mistaking ISO time parts ('Thhmmss') for dates
419
                if match is not None:
420
                    left, right = match.span()
421
                    if left > 0 and \
422
                       text[left - 1:left] == 'T':
423
                        match = None
424
                        continue
425
            else:
426
                match = _usisodateRE.search(text)
427
            if match is not None:
428
                year,month,day = match.groups()
429
                if len(year) == 2:
430
                    # Y2K problem:
431
                    year = add_century(int(year))
432
                else:
433
                    year = int(year)
434
                # Default to January 1st
435
                if not month:
436
                    month = 1
437
                else:
438
                    month = int(month)
439
                if not day:
440
                    day = 1
441
                else:
442
                    day = int(day)
443
                break
444

    
445
        elif format in us_formats:
446
            # US style date
447
            if format == 'us':
448
                match = _usdateRE.search(text)
449
            else:
450
                match = _altusdateRE.search(text)
451
            if match is not None:
452
                month,day,year,epoch = match.groups()
453
                if year:
454
                    if len(year) == 2:
455
                        # Y2K problem:
456
                        year = add_century(int(year))
457
                    else:
458
                        year = int(year)
459
                else:
460
                    defaultdate = now()
461
                    year = defaultdate.year
462
                if epoch and 'B' in epoch:
463
                    year = -year + 1
464
                # Default to 1 if no day is given
465
                if day:
466
                    day = int(day)
467
                else:
468
                    day = 1
469
                month = int(month)
470
                # Could have mistaken us format for euro style date
471
                # which uses day, month order
472
                if month > 12 or month == 0:
473
                    match = None
474
                    continue
475
                break
476

    
477
        elif format == 'lit':
478
            # US style literal date
479
            match = _litdateRE.search(text)
480
            if match is not None:
481
                litday,day,litmonth,month,year,epoch = match.groups()
482
                break
483

    
484
        elif format == 'altlit':
485
            # Alternative US style literal date
486
            match = _altlitdateRE.search(text)
487
            if match is not None:
488
                litday,litmonth,day,year,epoch = match.groups()
489
                month = '<missing>'
490
                break
491

    
492
        elif format == 'eurlit':
493
            # European style literal date
494
            match = _eurlitdateRE.search(text)
495
            if match is not None:
496
                litday,day,litmonth,year,epoch = match.groups()
497
                month = '<missing>'
498
                break
499

    
500
        elif format == 'unknown':
501
            # No date part: use defaultdate
502
            defaultdate = now()
503
            year = defaultdate.year
504
            month = defaultdate.month
505
            day = defaultdate.day
506
            style = format
507
            break
508

    
509
    # Check success
510
    if match is not None:
511
        # Remove date from text
512
        left, right = match.span()
513
        if 0 and _debug:
514
            print 'parsed date:',repr(text[left:right]),\
515
                  'giving:',year,month,day
516
        text = text[:left] + text[right:]
517
        style = format
518

    
519
    elif not style:
520
        # Not recognized: raise an error
521
        raise ValueError, 'unknown date format: "%s"' % text
522

    
523
    # Literal date post-processing
524
    if style in ('lit', 'altlit', 'eurlit'):
525
        if 0 and _debug: print match.groups()
526
        # Default to current year, January 1st
527
        if not year:
528
            defaultdate = now()
529
            year = defaultdate.year
530
        else:
531
            if len(year) == 2:
532
                # Y2K problem:
533
                year = add_century(int(year))
534
            else:
535
                year = int(year)
536
        if epoch and 'B' in epoch:
537
            year = -year + 1
538
        if litmonth:
539
            litmonth = litmonth.lower()
540
            try:
541
                month = litmonthtable[litmonth]
542
            except KeyError:
543
                raise ValueError,\
544
                      'wrong month name: "%s"' % litmonth
545
        elif month:
546
            month = int(month)
547
        else:
548
            month = 1
549
        if day:
550
            day = int(day)
551
        else:
552
            day = 1
553

    
554
    #print '_parse_date:',text,day,month,year,style
555
    return text,day,month,year,style
556

    
557
def _parse_time(text):
558

    
559
    """ Parses a time part given in text and returns a tuple
560
        (text,hour,minute,second,offset,style) with the following
561
        meanings:
562

563
        * text gives the original text without the time part
564
        * hour,minute,second give the parsed time
565
        * offset gives the time zone UTC offset
566
        * style gives information about which parser was successful:
567
          'standard' - the standard parser
568
          'iso' - the ISO time format parser
569
          'unknown' - no time part was found
570

571
        formats may be set to a tuple specifying the parsers to use:
572
          'standard' - standard time format with ':' delimiter
573
          'iso' - ISO time format (superset of 'standard')
574
          'unknown' - default to 0:00:00, 0 zone offset
575

576
        If 'unknown' is not given in formats and the time cannot be
577
        parsed, a ValueError is raised.
578

579
    """
580
    match = None
581
    style = ''
582

    
583
    formats=_time_formats
584

    
585
    # Apply parsers in the order given in formats
586
    for format in formats:
587

    
588
        # Standard format
589
        if format == 'standard':
590
            match = _timeRE.search(text)
591
            if match is not None:
592
                hour,minute,second,ampm,zone = match.groups()
593
                style = 'standard'
594
                break
595

    
596
        # ISO format
597
        if format == 'iso':
598
            match =  _isotimeRE.search(text)
599
            if match is not None:
600
                hour,minute,second,zone = match.groups()
601
                ampm = None
602
                style = 'iso'
603
                break
604

    
605
        # Default handling
606
        elif format == 'unknown':
607
            hour,minute,second,offset = 0,0,0.0,0
608
            style = 'unknown'
609
            break
610

    
611
    if not style:
612
        # If no default handling should be applied, raise an error
613
        raise ValueError, 'unknown time format: "%s"' % text
614

    
615
    # Post-processing
616
    if match is not None:
617

    
618
        if zone:
619
            # Convert to UTC offset
620
            offset = utc_offset(zone)
621
        else:
622
            offset = 0
623

    
624
        hour = int(hour)
625
        if ampm:
626
            if ampm[0] in ('p', 'P'):
627
                # 12pm = midday
628
                if hour < 12:
629
                    hour = hour + 12
630
            else:
631
                # 12am = midnight
632
                if hour >= 12:
633
                    hour = hour - 12
634
        if minute:
635
            minute = int(minute)
636
        else:
637
            minute = 0
638
        if not second:
639
            second = 0.0
640
        else:
641
            if ',' in second:
642
                second = second.replace(',', '.')
643
            second = float(second)
644

    
645
        # Remove time from text
646
        left,right = match.span()
647
        if 0 and _debug:
648
            print 'parsed time:',repr(text[left:right]),\
649
                  'giving:',hour,minute,second,offset
650
        text = text[:left] + text[right:]
651

    
652
    #print '_parse_time:',text,hour,minute,second,offset,style
653
    return text,hour,minute,second,offset,style
654

    
655
###
656

    
657
def datetime_from_string(text):
658

    
659
    """ datetime_from_string(text, [formats, defaultdate])
660

661
        Returns a datetime instance reflecting the date and time given
662
        in text. In case a timezone is given, the returned instance
663
        will point to the corresponding UTC time value. Otherwise, the
664
        value is set as given in the string.
665

666
        formats may be set to a tuple of strings specifying which of
667
        the following parsers to use and in which order to try
668
        them. Default is to try all of them in the order given below:
669

670
          'euro' - the European date parser
671
          'us' - the US date parser
672
          'altus' - the alternative US date parser (with '-' instead of '/')
673
          'iso' - the ISO date parser
674
          'altiso' - the alternative ISO date parser (without '-')
675
          'usiso' - US style ISO date parser (yyyy/mm/dd)
676
          'lit' - the US literal date parser
677
          'altlit' - the alternative US literal date parser
678
          'eurlit' - the Eurpean literal date parser
679
          'unknown' - if no date part is found, use defaultdate
680

681
        defaultdate provides the defaults to use in case no date part
682
        is found. Most of the parsers default to the current year
683
        January 1 if some of these date parts are missing.
684

685
        If 'unknown' is not given in formats and the date cannot
686
        be parsed, a ValueError is raised.
687

688
        time_formats may be set to a tuple of strings specifying which
689
        of the following parsers to use and in which order to try
690
        them. Default is to try all of them in the order given below:
691

692
          'standard' - standard time format HH:MM:SS (with ':' delimiter)
693
          'iso' - ISO time format (superset of 'standard')
694
          'unknown' - default to 00:00:00 in case the time format
695
                      cannot be parsed
696

697
        Defaults to 00:00:00.00 for time parts that are not included
698
        in the textual representation.
699

700
        If 'unknown' is not given in time_formats and the time cannot
701
        be parsed, a ValueError is raised.
702

703
    """
704
    origtext = text
705

    
706
    text,hour,minute,second,offset,timestyle = _parse_time(origtext)
707
    text,day,month,year,datestyle = _parse_date(text)
708

    
709
    if 0 and _debug:
710
        print 'tried time/date on %s, date=%s, time=%s' % (origtext,
711
                                                           datestyle,
712
                                                           timestyle)
713

    
714
    # If this fails, try the ISO order (date, then time)
715
    if timestyle in ('iso', 'unknown'):
716
        text,day,month,year,datestyle = _parse_date(origtext)
717
        text,hour,minute,second,offset,timestyle = _parse_time(text)
718
        if 0 and _debug:
719
            print 'tried ISO on %s, date=%s, time=%s' % (origtext,
720
                                                         datestyle,
721
                                                         timestyle)
722

    
723
    try:
724
        microsecond = int(round(1000000 * (second % 1)))
725
        second = int(second)
726
        return dt.datetime(year,month,day,hour,minute,second, microsecond) - \
727
                                        dt.timedelta(minutes=offset)
728
    except ValueError, why:
729
        raise RangeError,\
730
              'Failed to parse "%s": %s' % (origtext, why)
731

    
732
def date_from_string(text):
733

    
734
    """ date_from_string(text, [formats, defaultdate])
735

736
        Returns a datetime instance reflecting the date given in
737
        text. A possibly included time part is ignored.
738

739
        formats and defaultdate work just like for
740
        datetime_from_string().
741

742
    """
743
    _text,day,month,year,datestyle = _parse_date(text)
744

    
745
    try:
746
        return dt.datetime(year,month,day)
747
    except ValueError, why:
748
        raise RangeError,\
749
              'Failed to parse "%s": %s' % (text, why)
750

    
751
def validateDateTimeString(text):
752

    
753
    """ validateDateTimeString(text, [formats, defaultdate])
754

755
        Validates the given text and returns 1/0 depending on whether
756
        text includes parseable date and time values or not.
757

758
        formats works just like for datetime_from_string() and defines
759
        the order of date/time parsers to apply. It defaults to the
760
        same list of parsers as for datetime_from_string().
761

762
        XXX Undocumented !
763

764
    """
765
    try:
766
        datetime_from_string(text)
767
    except ValueError, why:
768
        return 0
769
    return 1
770

    
771

    
772
def validateDateString(text):
773

    
774
    """ validateDateString(text, [formats, defaultdate])
775

776
        Validates the given text and returns 1/0 depending on whether
777
        text includes a parseable date value or not.
778

779
        formats works just like for datetime_from_string() and defines
780
        the order of date/time parsers to apply. It defaults to the
781
        same list of parsers as for datetime_from_string().
782

783
        XXX Undocumented !
784

785
    """
786
    try:
787
        date_from_string(text)
788
    except ValueError, why:
789
        return 0
790
    return 1
791

    
792
### Tests
793

    
794
def _test():
795

    
796
    import sys
797

    
798
    t = dt.datetime.now()
799
    _date = t.strftime('%Y-%m-%d')
800

    
801
    print 'Testing DateTime Parser...'
802

    
803
    l = [
804

    
805
        # Literal formats
806
        ('Sun Nov  6 08:49:37 1994', '1994-11-06 08:49:37.00'),
807
        ('sun nov  6 08:49:37 1994', '1994-11-06 08:49:37.00'),
808
        ('sUN NOV  6 08:49:37 1994', '1994-11-06 08:49:37.00'),
809
        ('Sunday, 06-Nov-94 08:49:37 GMT', '1994-11-06 08:49:37.00'),
810
        ('Sun, 06 Nov 1994 08:49:37 GMT', '1994-11-06 08:49:37.00'),
811
        ('06-Nov-94 08:49:37', '1994-11-06 08:49:37.00'),
812
        ('06-Nov-94', '1994-11-06 00:00:00.00'),
813
        ('06-NOV-94', '1994-11-06 00:00:00.00'),
814
        ('November 19 08:49:37', '%s-11-19 08:49:37.00' % t.year),
815
        ('Nov. 9', '%s-11-09 00:00:00.00' % t.year),
816
        ('Sonntag, der 6. November 1994, 08:49:37 GMT', '1994-11-06 08:49:37.00'),
817
        ('6. November 2001, 08:49:37', '2001-11-06 08:49:37.00'),
818
        ('sep 6', '%s-09-06 00:00:00.00' % t.year),
819
        ('sep 6 2000', '2000-09-06 00:00:00.00'),
820
        ('September 29', '%s-09-29 00:00:00.00' % t.year),
821
        ('Sep. 29', '%s-09-29 00:00:00.00' % t.year),
822
        ('6 sep', '%s-09-06 00:00:00.00' % t.year),
823
        ('29 September', '%s-09-29 00:00:00.00' % t.year),
824
        ('29 Sep.', '%s-09-29 00:00:00.00' % t.year),
825
        ('sep 6 2001', '2001-09-06 00:00:00.00'),
826
        ('Sep 6, 2001', '2001-09-06 00:00:00.00'),
827
        ('September 6, 2001', '2001-09-06 00:00:00.00'),
828
        ('sep 6 01', '2001-09-06 00:00:00.00'),
829
        ('Sep 6, 01', '2001-09-06 00:00:00.00'),
830
        ('September 6, 01', '2001-09-06 00:00:00.00'),
831
        ('30 Apr 2006 20:19:00', '2006-04-30 20:19:00.00'),
832

    
833
        # ISO formats
834
        ('1994-11-06 08:49:37', '1994-11-06 08:49:37.00'),
835
        ('010203', '2001-02-03 00:00:00.00'),
836
        ('2001-02-03 00:00:00.00', '2001-02-03 00:00:00.00'),
837
        ('2001-02 00:00:00.00', '2001-02-01 00:00:00.00'),
838
        ('2001-02-03', '2001-02-03 00:00:00.00'),
839
        ('2001-02', '2001-02-01 00:00:00.00'),
840
        ('20000824/2300', '2000-08-24 23:00:00.00'),
841
        ('20000824/0102', '2000-08-24 01:02:00.00'),
842
        ('20000824', '2000-08-24 00:00:00.00'),
843
        ('20000824/020301', '2000-08-24 02:03:01.00'),
844
        ('20000824 020301', '2000-08-24 02:03:01.00'),
845
        ('20000824T020301', '2000-08-24 02:03:01.00'),
846
        ('20000824 020301', '2000-08-24 02:03:01.00'),
847
        ('2000-08-24 02:03:01.00', '2000-08-24 02:03:01.00'),
848
        ('T020311', '%s 02:03:11.00' % _date),
849
        ('2003-12-9', '2003-12-09 00:00:00.00'),
850
        ('03-12-9', '2003-12-09 00:00:00.00'),
851
        ('003-12-9', '0003-12-09 00:00:00.00'),
852
        ('0003-12-9', '0003-12-09 00:00:00.00'),
853
        ('2003-1-9', '2003-01-09 00:00:00.00'),
854
        ('03-1-9', '2003-01-09 00:00:00.00'),
855
        ('003-1-9', '0003-01-09 00:00:00.00'),
856
        ('0003-1-9', '0003-01-09 00:00:00.00'),
857

    
858
        # US formats
859
        ('06/11/94 08:49:37', '1994-06-11 08:49:37.00'),
860
        ('11/06/94 08:49:37', '1994-11-06 08:49:37.00'),
861
        ('9/23/2001', '2001-09-23 00:00:00.00'),
862
        ('9-23-2001', '2001-09-23 00:00:00.00'),
863
        ('9/6', '%s-09-06 00:00:00.00' % t.year),
864
        ('09/6', '%s-09-06 00:00:00.00' % t.year),
865
        ('9/06', '%s-09-06 00:00:00.00' % t.year),
866
        ('09/06', '%s-09-06 00:00:00.00' % t.year),
867
        ('9/6/2001', '2001-09-06 00:00:00.00'),
868
        ('09/6/2001', '2001-09-06 00:00:00.00'),
869
        ('9/06/2001', '2001-09-06 00:00:00.00'),
870
        ('09/06/2001', '2001-09-06 00:00:00.00'),
871
        ('9-6-2001', '2001-09-06 00:00:00.00'),
872
        ('09-6-2001', '2001-09-06 00:00:00.00'),
873
        ('9-06-2001', '2001-09-06 00:00:00.00'),
874
        ('09-06-2001', '2001-09-06 00:00:00.00'),
875
        ('2002/05/28 13:10:56.114700 GMT+2', '2002-05-28 13:10:56.114700'),
876
        ('1970/01/01', '1970-01-01 00:00:00.00'),
877
        ('20021025 12:00 PM', '2002-10-25 12:00:00.00'),
878
        ('20021025 12:30 PM', '2002-10-25 12:30:00.00'),
879
        ('20021025 12:00 AM', '2002-10-25 00:00:00.00'),
880
        ('20021025 12:30 AM', '2002-10-25 00:30:00.00'),
881
        ('20021025 1:00 PM', '2002-10-25 13:00:00.00'),
882
        ('20021025 2:00 AM', '2002-10-25 02:00:00.00'),
883
        ('Thursday, February 06, 2003 12:40 PM', '2003-02-06 12:40:00.00'),
884
        ('Mon, 18 Sep 2006 23:03:00', '2006-09-18 23:03:00.00'),
885

    
886
        # European formats
887
        ('6.11.2001, 08:49:37', '2001-11-06 08:49:37.00'),
888
        ('06.11.2001, 08:49:37', '2001-11-06 08:49:37.00'),
889
        ('06.11. 08:49:37', '%s-11-06 08:49:37.00' % t.year),
890
        #('21/12/2002', '2002-12-21 00:00:00.00'),
891
        #('21/08/2002', '2002-08-21 00:00:00.00'),
892
        #('21-08-2002', '2002-08-21 00:00:00.00'),
893
        #('13/01/03', '2003-01-13 00:00:00.00'),
894
        #('13/1/03', '2003-01-13 00:00:00.00'),
895
        #('13/1/3', '2003-01-13 00:00:00.00'),
896
        #('13/01/3', '2003-01-13 00:00:00.00'),
897

    
898
        # Time only formats
899
        ('01:03', '%s 01:03:00.00' % _date),
900
        ('01:03:11', '%s 01:03:11.00' % _date),
901
        ('01:03:11.50', '%s 01:03:11.500000' % _date),
902
        ('01:03:11.50 AM', '%s 01:03:11.500000' % _date),
903
        ('01:03:11.50 PM', '%s 13:03:11.500000' % _date),
904
        ('01:03:11.50 a.m.', '%s 01:03:11.500000' % _date),
905
        ('01:03:11.50 p.m.', '%s 13:03:11.500000' % _date),
906

    
907
        # Invalid formats
908
        ('6..2001, 08:49:37', '%s 08:49:37.00' % _date),
909
        ('9//2001', 'ignore'),
910
        ('06--94 08:49:37', 'ignore'),
911
        ('20-03 00:00:00.00', 'ignore'),
912
        ('9/2001', 'ignore'),
913
        ('9-6', 'ignore'),
914
        ('09-6', 'ignore'),
915
        ('9-06', 'ignore'),
916
        ('09-06', 'ignore'),
917
        ('20000824/23', 'ignore'),
918
        ('November 1994 08:49:37', 'ignore'),
919
        ]
920

    
921
    # Add Unicode versions
922
    try:
923
        unicode
924
    except NameError:
925
        pass
926
    else:
927
        k = []
928
        for text, result in l:
929
            k.append((unicode(text), result))
930
        l.extend(k)
931

    
932
    for text, reference in l:
933
        try:
934
            value = datetime_from_string(text)
935
        except:
936
            if reference is None:
937
                continue
938
            else:
939
                value = str(sys.exc_info()[1])
940
        valid_datetime = validateDateTimeString(text)
941
        valid_date = validateDateString(text)
942

    
943
        if reference[-3:] == '.00': reference = reference[:-3]
944

    
945
        if str(value) != reference and \
946
           not reference == 'ignore':
947
            print 'Failed to parse "%s"' % text
948
            print '  expected: %s' % (reference or '<exception>')
949
            print '  parsed:   %s' % value
950
        elif _debug:
951
            print 'Parsed "%s" successfully' % text
952
        if _debug:
953
            if not valid_datetime:
954
                print '  "%s" failed date/time validation' % text
955
            if not valid_date:
956
                print '  "%s" failed date validation' % text
957

    
958
    et = dt.datetime.now()
959
    print 'done. (after %f seconds)' % ((et-t).seconds)
960

    
961
if __name__ == '__main__':
962
    _test()