gvsig-3d / 1.10 / trunk / binaries / mac / raster / gdal / GDAL.framework / Versions / 1.7 / Python / site-packages / numpy / core / _mx_datetime_parser.py @ 27
History | View | Annotate | Download (32.5 KB)
1 |
#-*- coding: latin-1 -*-
|
---|---|
2 |
"""
|
3 |
Date/Time string parsing module.
|
4 |
|
5 |
This code is a slightly modified version of Parser.py found in mx.DateTime
|
6 |
version 3.0.0
|
7 |
|
8 |
As such, it is subject to the terms of the eGenix public license version 1.1.0.
|
9 |
|
10 |
FIXME: Add license.txt to NumPy
|
11 |
"""
|
12 |
|
13 |
__all__ = ['date_from_string', 'datetime_from_string'] |
14 |
|
15 |
import types |
16 |
import re |
17 |
import datetime as dt |
18 |
|
19 |
class RangeError(Exception): pass |
20 |
|
21 |
# Enable to produce debugging output
|
22 |
_debug = 0
|
23 |
|
24 |
# REs for matching date and time parts in a string; These REs
|
25 |
# parse a superset of ARPA, ISO, American and European style dates.
|
26 |
# Timezones are supported via the Timezone submodule.
|
27 |
|
28 |
_year = '(?P<year>-?\d+\d(?!:))'
|
29 |
_fullyear = '(?P<year>-?\d+\d\d(?!:))'
|
30 |
_year_epoch = '(?:' + _year + '(?P<epoch> *[ABCDE\.]+)?)' |
31 |
_fullyear_epoch = '(?:' + _fullyear + '(?P<epoch> *[ABCDE\.]+)?)' |
32 |
_relyear = '(?:\((?P<relyear>[-+]?\d+)\))'
|
33 |
|
34 |
_month = '(?P<month>\d?\d(?!:))'
|
35 |
_fullmonth = '(?P<month>\d\d(?!:))'
|
36 |
_litmonth = ('(?P<litmonth>'
|
37 |
'jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|'
|
38 |
'm?r|mae|mrz|mai|okt|dez|'
|
39 |
'fev|avr|juin|juil|aou|ao?|d?c|'
|
40 |
'ene|abr|ago|dic|'
|
41 |
'out'
|
42 |
')[a-z,\.;]*')
|
43 |
litmonthtable = { |
44 |
# English
|
45 |
'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, |
46 |
'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12, |
47 |
# German
|
48 |
'm?r':3, 'mae':3, 'mrz':3, 'mai':5, 'okt':10, 'dez':12, |
49 |
# French
|
50 |
'fev':2, 'avr':4, 'juin':6, 'juil':7, 'aou':8, 'ao?':8, |
51 |
'd?c':12, |
52 |
# Spanish
|
53 |
'ene':1, 'abr':4, 'ago':8, 'dic':12, |
54 |
# Portuguese
|
55 |
'out':10, |
56 |
} |
57 |
_relmonth = '(?:\((?P<relmonth>[-+]?\d+)\))'
|
58 |
|
59 |
_day = '(?P<day>\d?\d(?!:))'
|
60 |
_usday = '(?P<day>\d?\d(?!:))(?:st|nd|rd|th|[,\.;])?'
|
61 |
_fullday = '(?P<day>\d\d(?!:))'
|
62 |
_litday = ('(?P<litday>'
|
63 |
'mon|tue|wed|thu|fri|sat|sun|'
|
64 |
'die|mit|don|fre|sam|son|'
|
65 |
'lun|mar|mer|jeu|ven|sam|dim|'
|
66 |
'mie|jue|vie|sab|dom|'
|
67 |
'pri|seg|ter|cua|qui'
|
68 |
')[a-z]*')
|
69 |
litdaytable = { |
70 |
# English
|
71 |
'mon':0, 'tue':1, 'wed':2, 'thu':3, 'fri':4, 'sat':5, 'sun':6, |
72 |
# German
|
73 |
'die':1, 'mit':2, 'don':3, 'fre':4, 'sam':5, 'son':6, |
74 |
# French
|
75 |
'lun':0, 'mar':1, 'mer':2, 'jeu':3, 'ven':4, 'sam':5, 'dim':6, |
76 |
# Spanish
|
77 |
'mie':2, 'jue':3, 'vie':4, 'sab':5, 'dom':6, |
78 |
# Portuguese
|
79 |
'pri':0, 'seg':1, 'ter':2, 'cua':3, 'qui':4, |
80 |
} |
81 |
_relday = '(?:\((?P<relday>[-+]?\d+)\))'
|
82 |
|
83 |
_hour = '(?P<hour>[012]?\d)'
|
84 |
_minute = '(?P<minute>[0-6]\d)'
|
85 |
_second = '(?P<second>[0-6]\d(?:[.,]\d+)?)'
|
86 |
|
87 |
_days = '(?P<days>\d*\d(?:[.,]\d+)?)'
|
88 |
_hours = '(?P<hours>\d*\d(?:[.,]\d+)?)'
|
89 |
_minutes = '(?P<minutes>\d*\d(?:[.,]\d+)?)'
|
90 |
_seconds = '(?P<seconds>\d*\d(?:[.,]\d+)?)'
|
91 |
|
92 |
_reldays = '(?:\((?P<reldays>[-+]?\d+(?:[.,]\d+)?)\))'
|
93 |
_relhours = '(?:\((?P<relhours>[-+]?\d+(?:[.,]\d+)?)\))'
|
94 |
_relminutes = '(?:\((?P<relminutes>[-+]?\d+(?:[.,]\d+)?)\))'
|
95 |
_relseconds = '(?:\((?P<relseconds>[-+]?\d+(?:[.,]\d+)?)\))'
|
96 |
|
97 |
_sign = '(?:(?P<sign>[-+]) *)'
|
98 |
_week = 'W(?P<week>\d?\d)'
|
99 |
_zone = '(?P<zone>[A-Z]+|[+-]\d\d?:?(?:\d\d)?)'
|
100 |
_ampm = '(?P<ampm>[ap][m.]+)'
|
101 |
|
102 |
_time = (_hour + ':' + _minute + '(?::' + _second + '|[^:]|$) *' |
103 |
+ _ampm + '? *' + _zone + '?') |
104 |
_isotime = _hour + ':?' + _minute + ':?' + _second + '? *' + _zone + '?' |
105 |
|
106 |
_yeardate = _year |
107 |
_weekdate = _year + '-?(?:' + _week + '-?' + _day + '?)?' |
108 |
_eurodate = _day + '\.' + _month + '\.' + _year_epoch + '?' |
109 |
_usdate = _month + '/' + _day + '(?:/' + _year_epoch + '|[^/]|$)' |
110 |
_altusdate = _month + '-' + _day + '-' + _fullyear_epoch |
111 |
_isodate = _year + '-' + _month + '-?' + _day + '?(?!:)' |
112 |
_altisodate = _year + _fullmonth + _fullday + '(?!:)'
|
113 |
_usisodate = _fullyear + '/' + _fullmonth + '/' + _fullday |
114 |
_litdate = ('(?:'+ _litday + ',? )? *' + |
115 |
_usday + ' *' +
|
116 |
'[- ] *(?:' + _litmonth + '|'+ _month +') *[- ] *' + |
117 |
_year_epoch + '?')
|
118 |
_altlitdate = ('(?:'+ _litday + ',? )? *' + |
119 |
_litmonth + '[ ,.a-z]+' +
|
120 |
_usday + |
121 |
'(?:[ a-z]+' + _year_epoch + ')?') |
122 |
_eurlitdate = ('(?:'+ _litday + ',?[ a-z]+)? *' + |
123 |
'(?:'+ _usday + '[ a-z]+)? *' + |
124 |
_litmonth + |
125 |
'(?:[ ,.a-z]+' + _year_epoch + ')?') |
126 |
|
127 |
_relany = '[*%?a-zA-Z]+'
|
128 |
|
129 |
_relisodate = ('(?:(?:' + _relany + '|' + _year + '|' + _relyear + ')-' + |
130 |
'(?:' + _relany + '|' + _month + '|' + _relmonth + ')-' + |
131 |
'(?:' + _relany + '|' + _day + '|' + _relday + '))') |
132 |
|
133 |
_asctime = ('(?:'+ _litday + ',? )? *' + |
134 |
_usday + ' *' +
|
135 |
'[- ] *(?:' + _litmonth + '|'+ _month +') *[- ]' + |
136 |
'(?:[0-9: ]+)' +
|
137 |
_year_epoch + '?')
|
138 |
|
139 |
_relisotime = ('(?:(?:' + _relany + '|' + _hour + '|' + _relhours + '):' + |
140 |
'(?:' + _relany + '|' + _minute + '|' + _relminutes + ')' + |
141 |
'(?::(?:' + _relany + '|' + _second + '|' + _relseconds + '))?)') |
142 |
|
143 |
_isodelta1 = (_sign + '?' +
|
144 |
_days + ':' + _hours + ':' + _minutes + ':' + _seconds) |
145 |
_isodelta2 = (_sign + '?' +
|
146 |
_hours + ':' + _minutes + ':' + _seconds) |
147 |
_isodelta3 = (_sign + '?' +
|
148 |
_hours + ':' + _minutes)
|
149 |
_litdelta = (_sign + '?' +
|
150 |
'(?:' + _days + ' *d[a-z]*[,; ]*)?' + |
151 |
'(?:' + _hours + ' *h[a-z]*[,; ]*)?' + |
152 |
'(?:' + _minutes + ' *m[a-z]*[,; ]*)?' + |
153 |
'(?:' + _seconds + ' *s[a-z]*[,; ]*)?') |
154 |
_litdelta2 = (_sign + '?' +
|
155 |
'(?:' + _days + ' *d[a-z]*[,; ]*)?' + |
156 |
_hours + ':' + _minutes + '(?::' + _seconds + ')?') |
157 |
|
158 |
_timeRE = re.compile(_time, re.I) |
159 |
_isotimeRE = re.compile(_isotime, re.I) |
160 |
_isodateRE = re.compile(_isodate, re.I) |
161 |
_altisodateRE = re.compile(_altisodate, re.I) |
162 |
_usisodateRE = re.compile(_usisodate, re.I) |
163 |
_yeardateRE = re.compile(_yeardate, re.I) |
164 |
_eurodateRE = re.compile(_eurodate, re.I) |
165 |
_usdateRE = re.compile(_usdate, re.I) |
166 |
_altusdateRE = re.compile(_altusdate, re.I) |
167 |
_litdateRE = re.compile(_litdate, re.I) |
168 |
_altlitdateRE = re.compile(_altlitdate, re.I) |
169 |
_eurlitdateRE = re.compile(_eurlitdate, re.I) |
170 |
_relisodateRE = re.compile(_relisodate, re.I) |
171 |
_asctimeRE = re.compile(_asctime, re.I) |
172 |
_isodelta1RE = re.compile(_isodelta1) |
173 |
_isodelta2RE = re.compile(_isodelta2) |
174 |
_isodelta3RE = re.compile(_isodelta3) |
175 |
_litdeltaRE = re.compile(_litdelta) |
176 |
_litdelta2RE = re.compile(_litdelta2) |
177 |
_relisotimeRE = re.compile(_relisotime, re.I) |
178 |
|
179 |
# Available date parsers
|
180 |
_date_formats = ('euro',
|
181 |
'usiso', 'us', 'altus', |
182 |
'iso', 'altiso', |
183 |
'lit', 'altlit', 'eurlit', |
184 |
'year', 'unknown') |
185 |
|
186 |
# Available time parsers
|
187 |
_time_formats = ('standard',
|
188 |
'iso',
|
189 |
'unknown')
|
190 |
|
191 |
_zoneoffset = ('(?:'
|
192 |
'(?P<zonesign>[+-])?'
|
193 |
'(?P<hours>\d\d?)'
|
194 |
':?'
|
195 |
'(?P<minutes>\d\d)?'
|
196 |
'(?P<extra>\d+)?'
|
197 |
')'
|
198 |
) |
199 |
|
200 |
_zoneoffsetRE = re.compile(_zoneoffset) |
201 |
|
202 |
_zonetable = { |
203 |
# Timezone abbreviations
|
204 |
# Std Summer
|
205 |
|
206 |
# Standards
|
207 |
'UT':0, |
208 |
'UTC':0, |
209 |
'GMT':0, |
210 |
|
211 |
# A few common timezone abbreviations
|
212 |
'CET':1, 'CEST':2, 'CETDST':2, # Central European |
213 |
'MET':1, 'MEST':2, 'METDST':2, # Mean European |
214 |
'MEZ':1, 'MESZ':2, # Mitteleurop?ische Zeit |
215 |
'EET':2, 'EEST':3, 'EETDST':3, # Eastern Europe |
216 |
'WET':0, 'WEST':1, 'WETDST':1, # Western Europe |
217 |
'MSK':3, 'MSD':4, # Moscow |
218 |
'IST':5.5, # India |
219 |
'JST':9, # Japan |
220 |
'KST':9, # Korea |
221 |
'HKT':8, # Hong Kong |
222 |
|
223 |
# US time zones
|
224 |
'AST':-4, 'ADT':-3, # Atlantic |
225 |
'EST':-5, 'EDT':-4, # Eastern |
226 |
'CST':-6, 'CDT':-5, # Central |
227 |
'MST':-7, 'MDT':-6, # Midwestern |
228 |
'PST':-8, 'PDT':-7, # Pacific |
229 |
|
230 |
# Australian time zones
|
231 |
'CAST':9.5, 'CADT':10.5, # Central |
232 |
'EAST':10, 'EADT':11, # Eastern |
233 |
'WAST':8, 'WADT':9, # Western |
234 |
'SAST':9.5, 'SADT':10.5, # Southern |
235 |
|
236 |
# US military time zones
|
237 |
'Z': 0, |
238 |
'A': 1, |
239 |
'B': 2, |
240 |
'C': 3, |
241 |
'D': 4, |
242 |
'E': 5, |
243 |
'F': 6, |
244 |
'G': 7, |
245 |
'H': 8, |
246 |
'I': 9, |
247 |
'K': 10, |
248 |
'L': 11, |
249 |
'M': 12, |
250 |
'N':-1, |
251 |
'O':-2, |
252 |
'P':-3, |
253 |
'Q':-4, |
254 |
'R':-5, |
255 |
'S':-6, |
256 |
'T':-7, |
257 |
'U':-8, |
258 |
'V':-9, |
259 |
'W':-10, |
260 |
'X':-11, |
261 |
'Y':-12 |
262 |
} |
263 |
|
264 |
|
265 |
def utc_offset(zone): |
266 |
""" utc_offset(zonestring)
|
267 |
|
268 |
Return the UTC time zone offset in minutes.
|
269 |
|
270 |
zone must be string and can either be given as +-HH:MM,
|
271 |
+-HHMM, +-HH numeric offset or as time zone
|
272 |
abbreviation. Daylight saving time must be encoded into the
|
273 |
zone offset.
|
274 |
|
275 |
Timezone abbreviations are treated case-insensitive.
|
276 |
|
277 |
"""
|
278 |
if not zone: |
279 |
return 0 |
280 |
uzone = zone.upper() |
281 |
if uzone in _zonetable: |
282 |
return _zonetable[uzone]*60 |
283 |
offset = _zoneoffsetRE.match(zone) |
284 |
if not offset: |
285 |
raise ValueError,'wrong format or unkown time zone: "%s"' % zone |
286 |
zonesign,hours,minutes,extra = offset.groups() |
287 |
if extra:
|
288 |
raise ValueError,'illegal time zone offset: "%s"' % zone |
289 |
offset = int(hours or 0) * 60 + int(minutes or 0) |
290 |
if zonesign == '-': |
291 |
offset = -offset |
292 |
return offset
|
293 |
|
294 |
def add_century(year): |
295 |
|
296 |
""" Sliding window approach to the Y2K problem: adds a suitable
|
297 |
century to the given year and returns it as integer.
|
298 |
|
299 |
The window used depends on the current year. If adding the current
|
300 |
century to the given year gives a year within the range
|
301 |
current_year-70...current_year+30 [both inclusive], then the
|
302 |
current century is added. Otherwise the century (current + 1 or
|
303 |
- 1) producing the least difference is chosen.
|
304 |
|
305 |
"""
|
306 |
|
307 |
current_year=dt.datetime.now().year |
308 |
current_century=(dt.datetime.now().year / 100) * 100 |
309 |
|
310 |
if year > 99: |
311 |
# Take it as-is
|
312 |
return year
|
313 |
year = year + current_century |
314 |
diff = year - current_year |
315 |
if diff >= -70 and diff <= 30: |
316 |
return year
|
317 |
elif diff < -70: |
318 |
return year + 100 |
319 |
else:
|
320 |
return year - 100 |
321 |
|
322 |
|
323 |
def _parse_date(text): |
324 |
"""
|
325 |
Parses the date part given in text and returns a tuple
|
326 |
(text,day,month,year,style) with the following meanings:
|
327 |
|
328 |
* text gives the original text without the date part
|
329 |
|
330 |
* day,month,year give the parsed date
|
331 |
|
332 |
* style gives information about which parser was successful:
|
333 |
'euro' - the European date parser
|
334 |
'us' - the US date parser
|
335 |
'altus' - the alternative US date parser (with '-' instead of '/')
|
336 |
'iso' - the ISO date parser
|
337 |
'altiso' - the alternative ISO date parser (without '-')
|
338 |
'usiso' - US style ISO date parser (yyyy/mm/dd)
|
339 |
'lit' - the US literal date parser
|
340 |
'altlit' - the alternative US literal date parser
|
341 |
'eurlit' - the Eurpean literal date parser
|
342 |
'unknown' - no date part was found, defaultdate was used
|
343 |
|
344 |
Formats may be set to a tuple of style strings specifying which of the above
|
345 |
parsers to use and in which order to try them.
|
346 |
Default is to try all of them in the above order.
|
347 |
|
348 |
``defaultdate`` provides the defaults to use in case no date part is found.
|
349 |
Most other parsers default to the current year January 1 if some of these
|
350 |
date parts are missing.
|
351 |
|
352 |
If ``'unknown'`` is not given in formats and the date cannot be parsed,
|
353 |
a :exc:`ValueError` is raised.
|
354 |
|
355 |
"""
|
356 |
match = None
|
357 |
style = ''
|
358 |
|
359 |
formats = _date_formats |
360 |
|
361 |
us_formats=('us', 'altus') |
362 |
iso_formats=('iso', 'altiso', 'usiso') |
363 |
|
364 |
now=dt.datetime.now |
365 |
|
366 |
# Apply parsers in the order given in formats
|
367 |
for format in formats: |
368 |
|
369 |
if format == 'euro': |
370 |
# European style date
|
371 |
match = _eurodateRE.search(text) |
372 |
if match is not None: |
373 |
day,month,year,epoch = match.groups() |
374 |
if year:
|
375 |
if len(year) == 2: |
376 |
# Y2K problem:
|
377 |
year = add_century(int(year))
|
378 |
else:
|
379 |
year = int(year)
|
380 |
else:
|
381 |
defaultdate = now() |
382 |
year = defaultdate.year |
383 |
if epoch and 'B' in epoch: |
384 |
year = -year + 1
|
385 |
month = int(month)
|
386 |
day = int(day)
|
387 |
# Could have mistaken euro format for us style date
|
388 |
# which uses month, day order
|
389 |
if month > 12 or month == 0: |
390 |
match = None
|
391 |
continue
|
392 |
break
|
393 |
|
394 |
elif format == 'year': |
395 |
# just a year specified
|
396 |
match = _yeardateRE.match(text) |
397 |
if match is not None: |
398 |
year = match.groups()[0]
|
399 |
if year:
|
400 |
if len(year) == 2: |
401 |
# Y2K problem:
|
402 |
year = add_century(int(year))
|
403 |
else:
|
404 |
year = int(year)
|
405 |
else:
|
406 |
defaultdate = now() |
407 |
year = defaultdate.year |
408 |
day = 1
|
409 |
month = 1
|
410 |
break
|
411 |
|
412 |
elif format in iso_formats: |
413 |
# ISO style date
|
414 |
if format == 'iso': |
415 |
match = _isodateRE.search(text) |
416 |
elif format == 'altiso': |
417 |
match = _altisodateRE.search(text) |
418 |
# Avoid mistaking ISO time parts ('Thhmmss') for dates
|
419 |
if match is not None: |
420 |
left, right = match.span() |
421 |
if left > 0 and \ |
422 |
text[left - 1:left] == 'T': |
423 |
match = None
|
424 |
continue
|
425 |
else:
|
426 |
match = _usisodateRE.search(text) |
427 |
if match is not None: |
428 |
year,month,day = match.groups() |
429 |
if len(year) == 2: |
430 |
# Y2K problem:
|
431 |
year = add_century(int(year))
|
432 |
else:
|
433 |
year = int(year)
|
434 |
# Default to January 1st
|
435 |
if not month: |
436 |
month = 1
|
437 |
else:
|
438 |
month = int(month)
|
439 |
if not day: |
440 |
day = 1
|
441 |
else:
|
442 |
day = int(day)
|
443 |
break
|
444 |
|
445 |
elif format in us_formats: |
446 |
# US style date
|
447 |
if format == 'us': |
448 |
match = _usdateRE.search(text) |
449 |
else:
|
450 |
match = _altusdateRE.search(text) |
451 |
if match is not None: |
452 |
month,day,year,epoch = match.groups() |
453 |
if year:
|
454 |
if len(year) == 2: |
455 |
# Y2K problem:
|
456 |
year = add_century(int(year))
|
457 |
else:
|
458 |
year = int(year)
|
459 |
else:
|
460 |
defaultdate = now() |
461 |
year = defaultdate.year |
462 |
if epoch and 'B' in epoch: |
463 |
year = -year + 1
|
464 |
# Default to 1 if no day is given
|
465 |
if day:
|
466 |
day = int(day)
|
467 |
else:
|
468 |
day = 1
|
469 |
month = int(month)
|
470 |
# Could have mistaken us format for euro style date
|
471 |
# which uses day, month order
|
472 |
if month > 12 or month == 0: |
473 |
match = None
|
474 |
continue
|
475 |
break
|
476 |
|
477 |
elif format == 'lit': |
478 |
# US style literal date
|
479 |
match = _litdateRE.search(text) |
480 |
if match is not None: |
481 |
litday,day,litmonth,month,year,epoch = match.groups() |
482 |
break
|
483 |
|
484 |
elif format == 'altlit': |
485 |
# Alternative US style literal date
|
486 |
match = _altlitdateRE.search(text) |
487 |
if match is not None: |
488 |
litday,litmonth,day,year,epoch = match.groups() |
489 |
month = '<missing>'
|
490 |
break
|
491 |
|
492 |
elif format == 'eurlit': |
493 |
# European style literal date
|
494 |
match = _eurlitdateRE.search(text) |
495 |
if match is not None: |
496 |
litday,day,litmonth,year,epoch = match.groups() |
497 |
month = '<missing>'
|
498 |
break
|
499 |
|
500 |
elif format == 'unknown': |
501 |
# No date part: use defaultdate
|
502 |
defaultdate = now() |
503 |
year = defaultdate.year |
504 |
month = defaultdate.month |
505 |
day = defaultdate.day |
506 |
style = format |
507 |
break
|
508 |
|
509 |
# Check success
|
510 |
if match is not None: |
511 |
# Remove date from text
|
512 |
left, right = match.span() |
513 |
if 0 and _debug: |
514 |
print 'parsed date:',repr(text[left:right]),\ |
515 |
'giving:',year,month,day
|
516 |
text = text[:left] + text[right:] |
517 |
style = format |
518 |
|
519 |
elif not style: |
520 |
# Not recognized: raise an error
|
521 |
raise ValueError, 'unknown date format: "%s"' % text |
522 |
|
523 |
# Literal date post-processing
|
524 |
if style in ('lit', 'altlit', 'eurlit'): |
525 |
if 0 and _debug: print match.groups() |
526 |
# Default to current year, January 1st
|
527 |
if not year: |
528 |
defaultdate = now() |
529 |
year = defaultdate.year |
530 |
else:
|
531 |
if len(year) == 2: |
532 |
# Y2K problem:
|
533 |
year = add_century(int(year))
|
534 |
else:
|
535 |
year = int(year)
|
536 |
if epoch and 'B' in epoch: |
537 |
year = -year + 1
|
538 |
if litmonth:
|
539 |
litmonth = litmonth.lower() |
540 |
try:
|
541 |
month = litmonthtable[litmonth] |
542 |
except KeyError: |
543 |
raise ValueError,\ |
544 |
'wrong month name: "%s"' % litmonth
|
545 |
elif month:
|
546 |
month = int(month)
|
547 |
else:
|
548 |
month = 1
|
549 |
if day:
|
550 |
day = int(day)
|
551 |
else:
|
552 |
day = 1
|
553 |
|
554 |
#print '_parse_date:',text,day,month,year,style
|
555 |
return text,day,month,year,style
|
556 |
|
557 |
def _parse_time(text): |
558 |
|
559 |
""" Parses a time part given in text and returns a tuple
|
560 |
(text,hour,minute,second,offset,style) with the following
|
561 |
meanings:
|
562 |
|
563 |
* text gives the original text without the time part
|
564 |
* hour,minute,second give the parsed time
|
565 |
* offset gives the time zone UTC offset
|
566 |
* style gives information about which parser was successful:
|
567 |
'standard' - the standard parser
|
568 |
'iso' - the ISO time format parser
|
569 |
'unknown' - no time part was found
|
570 |
|
571 |
formats may be set to a tuple specifying the parsers to use:
|
572 |
'standard' - standard time format with ':' delimiter
|
573 |
'iso' - ISO time format (superset of 'standard')
|
574 |
'unknown' - default to 0:00:00, 0 zone offset
|
575 |
|
576 |
If 'unknown' is not given in formats and the time cannot be
|
577 |
parsed, a ValueError is raised.
|
578 |
|
579 |
"""
|
580 |
match = None
|
581 |
style = ''
|
582 |
|
583 |
formats=_time_formats |
584 |
|
585 |
# Apply parsers in the order given in formats
|
586 |
for format in formats: |
587 |
|
588 |
# Standard format
|
589 |
if format == 'standard': |
590 |
match = _timeRE.search(text) |
591 |
if match is not None: |
592 |
hour,minute,second,ampm,zone = match.groups() |
593 |
style = 'standard'
|
594 |
break
|
595 |
|
596 |
# ISO format
|
597 |
if format == 'iso': |
598 |
match = _isotimeRE.search(text) |
599 |
if match is not None: |
600 |
hour,minute,second,zone = match.groups() |
601 |
ampm = None
|
602 |
style = 'iso'
|
603 |
break
|
604 |
|
605 |
# Default handling
|
606 |
elif format == 'unknown': |
607 |
hour,minute,second,offset = 0,0,0.0,0 |
608 |
style = 'unknown'
|
609 |
break
|
610 |
|
611 |
if not style: |
612 |
# If no default handling should be applied, raise an error
|
613 |
raise ValueError, 'unknown time format: "%s"' % text |
614 |
|
615 |
# Post-processing
|
616 |
if match is not None: |
617 |
|
618 |
if zone:
|
619 |
# Convert to UTC offset
|
620 |
offset = utc_offset(zone) |
621 |
else:
|
622 |
offset = 0
|
623 |
|
624 |
hour = int(hour)
|
625 |
if ampm:
|
626 |
if ampm[0] in ('p', 'P'): |
627 |
# 12pm = midday
|
628 |
if hour < 12: |
629 |
hour = hour + 12
|
630 |
else:
|
631 |
# 12am = midnight
|
632 |
if hour >= 12: |
633 |
hour = hour - 12
|
634 |
if minute:
|
635 |
minute = int(minute)
|
636 |
else:
|
637 |
minute = 0
|
638 |
if not second: |
639 |
second = 0.0
|
640 |
else:
|
641 |
if ',' in second: |
642 |
second = second.replace(',', '.') |
643 |
second = float(second)
|
644 |
|
645 |
# Remove time from text
|
646 |
left,right = match.span() |
647 |
if 0 and _debug: |
648 |
print 'parsed time:',repr(text[left:right]),\ |
649 |
'giving:',hour,minute,second,offset
|
650 |
text = text[:left] + text[right:] |
651 |
|
652 |
#print '_parse_time:',text,hour,minute,second,offset,style
|
653 |
return text,hour,minute,second,offset,style
|
654 |
|
655 |
###
|
656 |
|
657 |
def datetime_from_string(text): |
658 |
|
659 |
""" datetime_from_string(text, [formats, defaultdate])
|
660 |
|
661 |
Returns a datetime instance reflecting the date and time given
|
662 |
in text. In case a timezone is given, the returned instance
|
663 |
will point to the corresponding UTC time value. Otherwise, the
|
664 |
value is set as given in the string.
|
665 |
|
666 |
formats may be set to a tuple of strings specifying which of
|
667 |
the following parsers to use and in which order to try
|
668 |
them. Default is to try all of them in the order given below:
|
669 |
|
670 |
'euro' - the European date parser
|
671 |
'us' - the US date parser
|
672 |
'altus' - the alternative US date parser (with '-' instead of '/')
|
673 |
'iso' - the ISO date parser
|
674 |
'altiso' - the alternative ISO date parser (without '-')
|
675 |
'usiso' - US style ISO date parser (yyyy/mm/dd)
|
676 |
'lit' - the US literal date parser
|
677 |
'altlit' - the alternative US literal date parser
|
678 |
'eurlit' - the Eurpean literal date parser
|
679 |
'unknown' - if no date part is found, use defaultdate
|
680 |
|
681 |
defaultdate provides the defaults to use in case no date part
|
682 |
is found. Most of the parsers default to the current year
|
683 |
January 1 if some of these date parts are missing.
|
684 |
|
685 |
If 'unknown' is not given in formats and the date cannot
|
686 |
be parsed, a ValueError is raised.
|
687 |
|
688 |
time_formats may be set to a tuple of strings specifying which
|
689 |
of the following parsers to use and in which order to try
|
690 |
them. Default is to try all of them in the order given below:
|
691 |
|
692 |
'standard' - standard time format HH:MM:SS (with ':' delimiter)
|
693 |
'iso' - ISO time format (superset of 'standard')
|
694 |
'unknown' - default to 00:00:00 in case the time format
|
695 |
cannot be parsed
|
696 |
|
697 |
Defaults to 00:00:00.00 for time parts that are not included
|
698 |
in the textual representation.
|
699 |
|
700 |
If 'unknown' is not given in time_formats and the time cannot
|
701 |
be parsed, a ValueError is raised.
|
702 |
|
703 |
"""
|
704 |
origtext = text |
705 |
|
706 |
text,hour,minute,second,offset,timestyle = _parse_time(origtext) |
707 |
text,day,month,year,datestyle = _parse_date(text) |
708 |
|
709 |
if 0 and _debug: |
710 |
print 'tried time/date on %s, date=%s, time=%s' % (origtext, |
711 |
datestyle, |
712 |
timestyle) |
713 |
|
714 |
# If this fails, try the ISO order (date, then time)
|
715 |
if timestyle in ('iso', 'unknown'): |
716 |
text,day,month,year,datestyle = _parse_date(origtext) |
717 |
text,hour,minute,second,offset,timestyle = _parse_time(text) |
718 |
if 0 and _debug: |
719 |
print 'tried ISO on %s, date=%s, time=%s' % (origtext, |
720 |
datestyle, |
721 |
timestyle) |
722 |
|
723 |
try:
|
724 |
microsecond = int(round(1000000 * (second % 1))) |
725 |
second = int(second)
|
726 |
return dt.datetime(year,month,day,hour,minute,second, microsecond) - \
|
727 |
dt.timedelta(minutes=offset) |
728 |
except ValueError, why: |
729 |
raise RangeError,\
|
730 |
'Failed to parse "%s": %s' % (origtext, why)
|
731 |
|
732 |
def date_from_string(text): |
733 |
|
734 |
""" date_from_string(text, [formats, defaultdate])
|
735 |
|
736 |
Returns a datetime instance reflecting the date given in
|
737 |
text. A possibly included time part is ignored.
|
738 |
|
739 |
formats and defaultdate work just like for
|
740 |
datetime_from_string().
|
741 |
|
742 |
"""
|
743 |
_text,day,month,year,datestyle = _parse_date(text) |
744 |
|
745 |
try:
|
746 |
return dt.datetime(year,month,day)
|
747 |
except ValueError, why: |
748 |
raise RangeError,\
|
749 |
'Failed to parse "%s": %s' % (text, why)
|
750 |
|
751 |
def validateDateTimeString(text): |
752 |
|
753 |
""" validateDateTimeString(text, [formats, defaultdate])
|
754 |
|
755 |
Validates the given text and returns 1/0 depending on whether
|
756 |
text includes parseable date and time values or not.
|
757 |
|
758 |
formats works just like for datetime_from_string() and defines
|
759 |
the order of date/time parsers to apply. It defaults to the
|
760 |
same list of parsers as for datetime_from_string().
|
761 |
|
762 |
XXX Undocumented !
|
763 |
|
764 |
"""
|
765 |
try:
|
766 |
datetime_from_string(text) |
767 |
except ValueError, why: |
768 |
return 0 |
769 |
return 1 |
770 |
|
771 |
|
772 |
def validateDateString(text): |
773 |
|
774 |
""" validateDateString(text, [formats, defaultdate])
|
775 |
|
776 |
Validates the given text and returns 1/0 depending on whether
|
777 |
text includes a parseable date value or not.
|
778 |
|
779 |
formats works just like for datetime_from_string() and defines
|
780 |
the order of date/time parsers to apply. It defaults to the
|
781 |
same list of parsers as for datetime_from_string().
|
782 |
|
783 |
XXX Undocumented !
|
784 |
|
785 |
"""
|
786 |
try:
|
787 |
date_from_string(text) |
788 |
except ValueError, why: |
789 |
return 0 |
790 |
return 1 |
791 |
|
792 |
### Tests
|
793 |
|
794 |
def _test(): |
795 |
|
796 |
import sys |
797 |
|
798 |
t = dt.datetime.now() |
799 |
_date = t.strftime('%Y-%m-%d')
|
800 |
|
801 |
print 'Testing DateTime Parser...' |
802 |
|
803 |
l = [ |
804 |
|
805 |
# Literal formats
|
806 |
('Sun Nov 6 08:49:37 1994', '1994-11-06 08:49:37.00'), |
807 |
('sun nov 6 08:49:37 1994', '1994-11-06 08:49:37.00'), |
808 |
('sUN NOV 6 08:49:37 1994', '1994-11-06 08:49:37.00'), |
809 |
('Sunday, 06-Nov-94 08:49:37 GMT', '1994-11-06 08:49:37.00'), |
810 |
('Sun, 06 Nov 1994 08:49:37 GMT', '1994-11-06 08:49:37.00'), |
811 |
('06-Nov-94 08:49:37', '1994-11-06 08:49:37.00'), |
812 |
('06-Nov-94', '1994-11-06 00:00:00.00'), |
813 |
('06-NOV-94', '1994-11-06 00:00:00.00'), |
814 |
('November 19 08:49:37', '%s-11-19 08:49:37.00' % t.year), |
815 |
('Nov. 9', '%s-11-09 00:00:00.00' % t.year), |
816 |
('Sonntag, der 6. November 1994, 08:49:37 GMT', '1994-11-06 08:49:37.00'), |
817 |
('6. November 2001, 08:49:37', '2001-11-06 08:49:37.00'), |
818 |
('sep 6', '%s-09-06 00:00:00.00' % t.year), |
819 |
('sep 6 2000', '2000-09-06 00:00:00.00'), |
820 |
('September 29', '%s-09-29 00:00:00.00' % t.year), |
821 |
('Sep. 29', '%s-09-29 00:00:00.00' % t.year), |
822 |
('6 sep', '%s-09-06 00:00:00.00' % t.year), |
823 |
('29 September', '%s-09-29 00:00:00.00' % t.year), |
824 |
('29 Sep.', '%s-09-29 00:00:00.00' % t.year), |
825 |
('sep 6 2001', '2001-09-06 00:00:00.00'), |
826 |
('Sep 6, 2001', '2001-09-06 00:00:00.00'), |
827 |
('September 6, 2001', '2001-09-06 00:00:00.00'), |
828 |
('sep 6 01', '2001-09-06 00:00:00.00'), |
829 |
('Sep 6, 01', '2001-09-06 00:00:00.00'), |
830 |
('September 6, 01', '2001-09-06 00:00:00.00'), |
831 |
('30 Apr 2006 20:19:00', '2006-04-30 20:19:00.00'), |
832 |
|
833 |
# ISO formats
|
834 |
('1994-11-06 08:49:37', '1994-11-06 08:49:37.00'), |
835 |
('010203', '2001-02-03 00:00:00.00'), |
836 |
('2001-02-03 00:00:00.00', '2001-02-03 00:00:00.00'), |
837 |
('2001-02 00:00:00.00', '2001-02-01 00:00:00.00'), |
838 |
('2001-02-03', '2001-02-03 00:00:00.00'), |
839 |
('2001-02', '2001-02-01 00:00:00.00'), |
840 |
('20000824/2300', '2000-08-24 23:00:00.00'), |
841 |
('20000824/0102', '2000-08-24 01:02:00.00'), |
842 |
('20000824', '2000-08-24 00:00:00.00'), |
843 |
('20000824/020301', '2000-08-24 02:03:01.00'), |
844 |
('20000824 020301', '2000-08-24 02:03:01.00'), |
845 |
('20000824T020301', '2000-08-24 02:03:01.00'), |
846 |
('20000824 020301', '2000-08-24 02:03:01.00'), |
847 |
('2000-08-24 02:03:01.00', '2000-08-24 02:03:01.00'), |
848 |
('T020311', '%s 02:03:11.00' % _date), |
849 |
('2003-12-9', '2003-12-09 00:00:00.00'), |
850 |
('03-12-9', '2003-12-09 00:00:00.00'), |
851 |
('003-12-9', '0003-12-09 00:00:00.00'), |
852 |
('0003-12-9', '0003-12-09 00:00:00.00'), |
853 |
('2003-1-9', '2003-01-09 00:00:00.00'), |
854 |
('03-1-9', '2003-01-09 00:00:00.00'), |
855 |
('003-1-9', '0003-01-09 00:00:00.00'), |
856 |
('0003-1-9', '0003-01-09 00:00:00.00'), |
857 |
|
858 |
# US formats
|
859 |
('06/11/94 08:49:37', '1994-06-11 08:49:37.00'), |
860 |
('11/06/94 08:49:37', '1994-11-06 08:49:37.00'), |
861 |
('9/23/2001', '2001-09-23 00:00:00.00'), |
862 |
('9-23-2001', '2001-09-23 00:00:00.00'), |
863 |
('9/6', '%s-09-06 00:00:00.00' % t.year), |
864 |
('09/6', '%s-09-06 00:00:00.00' % t.year), |
865 |
('9/06', '%s-09-06 00:00:00.00' % t.year), |
866 |
('09/06', '%s-09-06 00:00:00.00' % t.year), |
867 |
('9/6/2001', '2001-09-06 00:00:00.00'), |
868 |
('09/6/2001', '2001-09-06 00:00:00.00'), |
869 |
('9/06/2001', '2001-09-06 00:00:00.00'), |
870 |
('09/06/2001', '2001-09-06 00:00:00.00'), |
871 |
('9-6-2001', '2001-09-06 00:00:00.00'), |
872 |
('09-6-2001', '2001-09-06 00:00:00.00'), |
873 |
('9-06-2001', '2001-09-06 00:00:00.00'), |
874 |
('09-06-2001', '2001-09-06 00:00:00.00'), |
875 |
('2002/05/28 13:10:56.114700 GMT+2', '2002-05-28 13:10:56.114700'), |
876 |
('1970/01/01', '1970-01-01 00:00:00.00'), |
877 |
('20021025 12:00 PM', '2002-10-25 12:00:00.00'), |
878 |
('20021025 12:30 PM', '2002-10-25 12:30:00.00'), |
879 |
('20021025 12:00 AM', '2002-10-25 00:00:00.00'), |
880 |
('20021025 12:30 AM', '2002-10-25 00:30:00.00'), |
881 |
('20021025 1:00 PM', '2002-10-25 13:00:00.00'), |
882 |
('20021025 2:00 AM', '2002-10-25 02:00:00.00'), |
883 |
('Thursday, February 06, 2003 12:40 PM', '2003-02-06 12:40:00.00'), |
884 |
('Mon, 18 Sep 2006 23:03:00', '2006-09-18 23:03:00.00'), |
885 |
|
886 |
# European formats
|
887 |
('6.11.2001, 08:49:37', '2001-11-06 08:49:37.00'), |
888 |
('06.11.2001, 08:49:37', '2001-11-06 08:49:37.00'), |
889 |
('06.11. 08:49:37', '%s-11-06 08:49:37.00' % t.year), |
890 |
#('21/12/2002', '2002-12-21 00:00:00.00'),
|
891 |
#('21/08/2002', '2002-08-21 00:00:00.00'),
|
892 |
#('21-08-2002', '2002-08-21 00:00:00.00'),
|
893 |
#('13/01/03', '2003-01-13 00:00:00.00'),
|
894 |
#('13/1/03', '2003-01-13 00:00:00.00'),
|
895 |
#('13/1/3', '2003-01-13 00:00:00.00'),
|
896 |
#('13/01/3', '2003-01-13 00:00:00.00'),
|
897 |
|
898 |
# Time only formats
|
899 |
('01:03', '%s 01:03:00.00' % _date), |
900 |
('01:03:11', '%s 01:03:11.00' % _date), |
901 |
('01:03:11.50', '%s 01:03:11.500000' % _date), |
902 |
('01:03:11.50 AM', '%s 01:03:11.500000' % _date), |
903 |
('01:03:11.50 PM', '%s 13:03:11.500000' % _date), |
904 |
('01:03:11.50 a.m.', '%s 01:03:11.500000' % _date), |
905 |
('01:03:11.50 p.m.', '%s 13:03:11.500000' % _date), |
906 |
|
907 |
# Invalid formats
|
908 |
('6..2001, 08:49:37', '%s 08:49:37.00' % _date), |
909 |
('9//2001', 'ignore'), |
910 |
('06--94 08:49:37', 'ignore'), |
911 |
('20-03 00:00:00.00', 'ignore'), |
912 |
('9/2001', 'ignore'), |
913 |
('9-6', 'ignore'), |
914 |
('09-6', 'ignore'), |
915 |
('9-06', 'ignore'), |
916 |
('09-06', 'ignore'), |
917 |
('20000824/23', 'ignore'), |
918 |
('November 1994 08:49:37', 'ignore'), |
919 |
] |
920 |
|
921 |
# Add Unicode versions
|
922 |
try:
|
923 |
unicode
|
924 |
except NameError: |
925 |
pass
|
926 |
else:
|
927 |
k = [] |
928 |
for text, result in l: |
929 |
k.append((unicode(text), result))
|
930 |
l.extend(k) |
931 |
|
932 |
for text, reference in l: |
933 |
try:
|
934 |
value = datetime_from_string(text) |
935 |
except:
|
936 |
if reference is None: |
937 |
continue
|
938 |
else:
|
939 |
value = str(sys.exc_info()[1]) |
940 |
valid_datetime = validateDateTimeString(text) |
941 |
valid_date = validateDateString(text) |
942 |
|
943 |
if reference[-3:] == '.00': reference = reference[:-3] |
944 |
|
945 |
if str(value) != reference and \ |
946 |
not reference == 'ignore': |
947 |
print 'Failed to parse "%s"' % text |
948 |
print ' expected: %s' % (reference or '<exception>') |
949 |
print ' parsed: %s' % value |
950 |
elif _debug:
|
951 |
print 'Parsed "%s" successfully' % text |
952 |
if _debug:
|
953 |
if not valid_datetime: |
954 |
print ' "%s" failed date/time validation' % text |
955 |
if not valid_date: |
956 |
print ' "%s" failed date validation' % text |
957 |
|
958 |
et = dt.datetime.now() |
959 |
print 'done. (after %f seconds)' % ((et-t).seconds) |
960 |
|
961 |
if __name__ == '__main__': |
962 |
_test() |