Statistics
| Revision:

svn-gvsig-desktop / tags / v1_1_Build_1003 / extensions / extScripting / scripts / jython / Lib / rfc822.py @ 12271

History | View | Annotate | Download (30.5 KB)

1
"""RFC-822 message manipulation class.
2

3
XXX This is only a very rough sketch of a full RFC-822 parser;
4
in particular the tokenizing of addresses does not adhere to all the
5
quoting rules.
6

7
Directions for use:
8

9
To create a Message object: first open a file, e.g.:
10
  fp = open(file, 'r')
11
You can use any other legal way of getting an open file object, e.g. use
12
sys.stdin or call os.popen().
13
Then pass the open file object to the Message() constructor:
14
  m = Message(fp)
15

16
This class can work with any input object that supports a readline
17
method.  If the input object has seek and tell capability, the
18
rewindbody method will work; also illegal lines will be pushed back
19
onto the input stream.  If the input object lacks seek but has an
20
`unread' method that can push back a line of input, Message will use
21
that to push back illegal lines.  Thus this class can be used to parse
22
messages coming from a buffered stream.
23

24
The optional `seekable' argument is provided as a workaround for
25
certain stdio libraries in which tell() discards buffered data before
26
discovering that the lseek() system call doesn't work.  For maximum
27
portability, you should set the seekable argument to zero to prevent
28
that initial \code{tell} when passing in an unseekable object such as
29
a a file object created from a socket object.  If it is 1 on entry --
30
which it is by default -- the tell() method of the open file object is
31
called once; if this raises an exception, seekable is reset to 0.  For
32
other nonzero values of seekable, this test is not made.
33

34
To get the text of a particular header there are several methods:
35
  str = m.getheader(name)
36
  str = m.getrawheader(name)
37
where name is the name of the header, e.g. 'Subject'.
38
The difference is that getheader() strips the leading and trailing
39
whitespace, while getrawheader() doesn't.  Both functions retain
40
embedded whitespace (including newlines) exactly as they are
41
specified in the header, and leave the case of the text unchanged.
42

43
For addresses and address lists there are functions
44
  realname, mailaddress = m.getaddr(name) and
45
  list = m.getaddrlist(name)
46
where the latter returns a list of (realname, mailaddr) tuples.
47

48
There is also a method
49
  time = m.getdate(name)
50
which parses a Date-like field and returns a time-compatible tuple,
51
i.e. a tuple such as returned by time.localtime() or accepted by
52
time.mktime().
53

54
See the class definition for lower level access methods.
55

56
There are also some utility functions here.
57
"""
58
# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
59

    
60
import time
61

    
62
__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
63

    
64
_blanklines = ('\r\n', '\n')            # Optimization for islast()
65

    
66

    
67
class Message:
68
    """Represents a single RFC-822-compliant message."""
69

    
70
    def __init__(self, fp, seekable = 1):
71
        """Initialize the class instance and read the headers."""
72
        if seekable == 1:
73
            # Exercise tell() to make sure it works
74
            # (and then assume seek() works, too)
75
            try:
76
                fp.tell()
77
            except:
78
                seekable = 0
79
            else:
80
                seekable = 1
81
        self.fp = fp
82
        self.seekable = seekable
83
        self.startofheaders = None
84
        self.startofbody = None
85
        #
86
        if self.seekable:
87
            try:
88
                self.startofheaders = self.fp.tell()
89
            except IOError:
90
                self.seekable = 0
91
        #
92
        self.readheaders()
93
        #
94
        if self.seekable:
95
            try:
96
                self.startofbody = self.fp.tell()
97
            except IOError:
98
                self.seekable = 0
99

    
100
    def rewindbody(self):
101
        """Rewind the file to the start of the body (if seekable)."""
102
        if not self.seekable:
103
            raise IOError, "unseekable file"
104
        self.fp.seek(self.startofbody)
105

    
106
    def readheaders(self):
107
        """Read header lines.
108

109
        Read header lines up to the entirely blank line that
110
        terminates them.  The (normally blank) line that ends the
111
        headers is skipped, but not included in the returned list.
112
        If a non-header line ends the headers, (which is an error),
113
        an attempt is made to backspace over it; it is never
114
        included in the returned list.
115

116
        The variable self.status is set to the empty string if all
117
        went well, otherwise it is an error message.
118
        The variable self.headers is a completely uninterpreted list
119
        of lines contained in the header (so printing them will
120
        reproduce the header exactly as it appears in the file).
121
        """
122
        self.dict = {}
123
        self.unixfrom = ''
124
        self.headers = list = []
125
        self.status = ''
126
        headerseen = ""
127
        firstline = 1
128
        startofline = unread = tell = None
129
        if hasattr(self.fp, 'unread'):
130
            unread = self.fp.unread
131
        elif self.seekable:
132
            tell = self.fp.tell
133
        while 1:
134
            if tell:
135
                try:
136
                    startofline = tell()
137
                except IOError:
138
                    startofline = tell = None
139
                    self.seekable = 0
140
            line = self.fp.readline()
141
            if not line:
142
                self.status = 'EOF in headers'
143
                break
144
            # Skip unix From name time lines
145
            if firstline and line.startswith('From '):
146
                self.unixfrom = self.unixfrom + line
147
                continue
148
            firstline = 0
149
            if headerseen and line[0] in ' \t':
150
                # It's a continuation line.
151
                list.append(line)
152
                x = (self.dict[headerseen] + "\n " + line.strip())
153
                self.dict[headerseen] = x.strip()
154
                continue
155
            elif self.iscomment(line):
156
                # It's a comment.  Ignore it.
157
                continue
158
            elif self.islast(line):
159
                # Note! No pushback here!  The delimiter line gets eaten.
160
                break
161
            headerseen = self.isheader(line)
162
            if headerseen:
163
                # It's a legal header line, save it.
164
                list.append(line)
165
                self.dict[headerseen] = line[len(headerseen)+1:].strip()
166
                continue
167
            else:
168
                # It's not a header line; throw it back and stop here.
169
                if not self.dict:
170
                    self.status = 'No headers'
171
                else:
172
                    self.status = 'Non-header line where header expected'
173
                # Try to undo the read.
174
                if unread:
175
                    unread(line)
176
                elif tell:
177
                    self.fp.seek(startofline)
178
                else:
179
                    self.status = self.status + '; bad seek'
180
                break
181

    
182
    def isheader(self, line):
183
        """Determine whether a given line is a legal header.
184

185
        This method should return the header name, suitably canonicalized.
186
        You may override this method in order to use Message parsing
187
        on tagged data in RFC822-like formats with special header formats.
188
        """
189
        i = line.find(':')
190
        if i > 0:
191
            return line[:i].lower()
192
        else:
193
            return None
194

    
195
    def islast(self, line):
196
        """Determine whether a line is a legal end of RFC-822 headers.
197

198
        You may override this method if your application wants
199
        to bend the rules, e.g. to strip trailing whitespace,
200
        or to recognize MH template separators ('--------').
201
        For convenience (e.g. for code reading from sockets) a
202
        line consisting of \r\n also matches.
203
        """
204
        return line in _blanklines
205

    
206
    def iscomment(self, line):
207
        """Determine whether a line should be skipped entirely.
208

209
        You may override this method in order to use Message parsing
210
        on tagged data in RFC822-like formats that support embedded
211
        comments or free-text data.
212
        """
213
        return None
214

    
215
    def getallmatchingheaders(self, name):
216
        """Find all header lines matching a given header name.
217

218
        Look through the list of headers and find all lines
219
        matching a given header name (and their continuation
220
        lines).  A list of the lines is returned, without
221
        interpretation.  If the header does not occur, an
222
        empty list is returned.  If the header occurs multiple
223
        times, all occurrences are returned.  Case is not
224
        important in the header name.
225
        """
226
        name = name.lower() + ':'
227
        n = len(name)
228
        list = []
229
        hit = 0
230
        for line in self.headers:
231
            if line[:n].lower() == name:
232
                hit = 1
233
            elif not line[:1].isspace():
234
                hit = 0
235
            if hit:
236
                list.append(line)
237
        return list
238

    
239
    def getfirstmatchingheader(self, name):
240
        """Get the first header line matching name.
241

242
        This is similar to getallmatchingheaders, but it returns
243
        only the first matching header (and its continuation
244
        lines).
245
        """
246
        name = name.lower() + ':'
247
        n = len(name)
248
        list = []
249
        hit = 0
250
        for line in self.headers:
251
            if hit:
252
                if not line[:1].isspace():
253
                    break
254
            elif line[:n].lower() == name:
255
                hit = 1
256
            if hit:
257
                list.append(line)
258
        return list
259

    
260
    def getrawheader(self, name):
261
        """A higher-level interface to getfirstmatchingheader().
262

263
        Return a string containing the literal text of the
264
        header but with the keyword stripped.  All leading,
265
        trailing and embedded whitespace is kept in the
266
        string, however.
267
        Return None if the header does not occur.
268
        """
269

    
270
        list = self.getfirstmatchingheader(name)
271
        if not list:
272
            return None
273
        list[0] = list[0][len(name) + 1:]
274
        return ''.join(list)
275

    
276
    def getheader(self, name, default=None):
277
        """Get the header value for a name.
278

279
        This is the normal interface: it returns a stripped
280
        version of the header value for a given header name,
281
        or None if it doesn't exist.  This uses the dictionary
282
        version which finds the *last* such header.
283
        """
284
        try:
285
            return self.dict[name.lower()]
286
        except KeyError:
287
            return default
288
    get = getheader
289

    
290
    def getheaders(self, name):
291
        """Get all values for a header.
292

293
        This returns a list of values for headers given more than once;
294
        each value in the result list is stripped in the same way as the
295
        result of getheader().  If the header is not given, return an
296
        empty list.
297
        """
298
        result = []
299
        current = ''
300
        have_header = 0
301
        for s in self.getallmatchingheaders(name):
302
            if s[0].isspace():
303
                if current:
304
                    current = "%s\n %s" % (current, s.strip())
305
                else:
306
                    current = s.strip()
307
            else:
308
                if have_header:
309
                    result.append(current)
310
                current = s[s.find(":") + 1:].strip()
311
                have_header = 1
312
        if have_header:
313
            result.append(current)
314
        return result
315

    
316
    def getaddr(self, name):
317
        """Get a single address from a header, as a tuple.
318

319
        An example return value:
320
        ('Guido van Rossum', 'guido@cwi.nl')
321
        """
322
        # New, by Ben Escoto
323
        alist = self.getaddrlist(name)
324
        if alist:
325
            return alist[0]
326
        else:
327
            return (None, None)
328

    
329
    def getaddrlist(self, name):
330
        """Get a list of addresses from a header.
331

332
        Retrieves a list of addresses from a header, where each address is a
333
        tuple as returned by getaddr().  Scans all named headers, so it works
334
        properly with multiple To: or Cc: headers for example.
335

336
        """
337
        raw = []
338
        for h in self.getallmatchingheaders(name):
339
            if h[0] in ' \t':
340
                raw.append(h)
341
            else:
342
                if raw:
343
                    raw.append(', ')
344
                i = h.find(':')
345
                if i > 0:
346
                    addr = h[i+1:]
347
                raw.append(addr)
348
        alladdrs = ''.join(raw)
349
        a = AddrlistClass(alladdrs)
350
        return a.getaddrlist()
351

    
352
    def getdate(self, name):
353
        """Retrieve a date field from a header.
354

355
        Retrieves a date field from the named header, returning
356
        a tuple compatible with time.mktime().
357
        """
358
        try:
359
            data = self[name]
360
        except KeyError:
361
            return None
362
        return parsedate(data)
363

    
364
    def getdate_tz(self, name):
365
        """Retrieve a date field from a header as a 10-tuple.
366

367
        The first 9 elements make up a tuple compatible with
368
        time.mktime(), and the 10th is the offset of the poster's
369
        time zone from GMT/UTC.
370
        """
371
        try:
372
            data = self[name]
373
        except KeyError:
374
            return None
375
        return parsedate_tz(data)
376

    
377

    
378
    # Access as a dictionary (only finds *last* header of each type):
379

    
380
    def __len__(self):
381
        """Get the number of headers in a message."""
382
        return len(self.dict)
383

    
384
    def __getitem__(self, name):
385
        """Get a specific header, as from a dictionary."""
386
        return self.dict[name.lower()]
387

    
388
    def __setitem__(self, name, value):
389
        """Set the value of a header.
390

391
        Note: This is not a perfect inversion of __getitem__, because
392
        any changed headers get stuck at the end of the raw-headers list
393
        rather than where the altered header was.
394
        """
395
        del self[name] # Won't fail if it doesn't exist
396
        self.dict[name.lower()] = value
397
        text = name + ": " + value
398
        lines = text.split("\n")
399
        for line in lines:
400
            self.headers.append(line + "\n")
401

    
402
    def __delitem__(self, name):
403
        """Delete all occurrences of a specific header, if it is present."""
404
        name = name.lower()
405
        if not self.dict.has_key(name):
406
            return
407
        del self.dict[name]
408
        name = name + ':'
409
        n = len(name)
410
        list = []
411
        hit = 0
412
        for i in range(len(self.headers)):
413
            line = self.headers[i]
414
            if line[:n].lower() == name:
415
                hit = 1
416
            elif not line[:1].isspace():
417
                hit = 0
418
            if hit:
419
                list.append(i)
420
        list.reverse()
421
        for i in list:
422
            del self.headers[i]
423

    
424
    def has_key(self, name):
425
        """Determine whether a message contains the named header."""
426
        return self.dict.has_key(name.lower())
427

    
428
    def keys(self):
429
        """Get all of a message's header field names."""
430
        return self.dict.keys()
431

    
432
    def values(self):
433
        """Get all of a message's header field values."""
434
        return self.dict.values()
435

    
436
    def items(self):
437
        """Get all of a message's headers.
438

439
        Returns a list of name, value tuples.
440
        """
441
        return self.dict.items()
442

    
443
    def __str__(self):
444
        str = ''
445
        for hdr in self.headers:
446
            str = str + hdr
447
        return str
448

    
449

    
450
# Utility functions
451
# -----------------
452

    
453
# XXX Should fix unquote() and quote() to be really conformant.
454
# XXX The inverses of the parse functions may also be useful.
455

    
456

    
457
def unquote(str):
458
    """Remove quotes from a string."""
459
    if len(str) > 1:
460
        if str[0] == '"' and str[-1:] == '"':
461
            return str[1:-1]
462
        if str[0] == '<' and str[-1:] == '>':
463
            return str[1:-1]
464
    return str
465

    
466

    
467
def quote(str):
468
    """Add quotes around a string."""
469
    return str.replace('\\', '\\\\').replace('"', '\\"')
470

    
471

    
472
def parseaddr(address):
473
    """Parse an address into a (realname, mailaddr) tuple."""
474
    a = AddrlistClass(address)
475
    list = a.getaddrlist()
476
    if not list:
477
        return (None, None)
478
    else:
479
        return list[0]
480

    
481

    
482
class AddrlistClass:
483
    """Address parser class by Ben Escoto.
484

485
    To understand what this class does, it helps to have a copy of
486
    RFC-822 in front of you.
487

488
    Note: this class interface is deprecated and may be removed in the future.
489
    Use rfc822.AddressList instead.
490
    """
491

    
492
    def __init__(self, field):
493
        """Initialize a new instance.
494

495
        `field' is an unparsed address header field, containing
496
        one or more addresses.
497
        """
498
        self.specials = '()<>@,:;.\"[]'
499
        self.pos = 0
500
        self.LWS = ' \t'
501
        self.CR = '\r\n'
502
        self.atomends = self.specials + self.LWS + self.CR
503
        self.field = field
504
        self.commentlist = []
505

    
506
    def gotonext(self):
507
        """Parse up to the start of the next address."""
508
        while self.pos < len(self.field):
509
            if self.field[self.pos] in self.LWS + '\n\r':
510
                self.pos = self.pos + 1
511
            elif self.field[self.pos] == '(':
512
                self.commentlist.append(self.getcomment())
513
            else: break
514

    
515
    def getaddrlist(self):
516
        """Parse all addresses.
517

518
        Returns a list containing all of the addresses.
519
        """
520
        ad = self.getaddress()
521
        if ad:
522
            return ad + self.getaddrlist()
523
        else: return []
524

    
525
    def getaddress(self):
526
        """Parse the next address."""
527
        self.commentlist = []
528
        self.gotonext()
529

    
530
        oldpos = self.pos
531
        oldcl = self.commentlist
532
        plist = self.getphraselist()
533

    
534
        self.gotonext()
535
        returnlist = []
536

    
537
        if self.pos >= len(self.field):
538
            # Bad email address technically, no domain.
539
            if plist:
540
                returnlist = [(' '.join(self.commentlist), plist[0])]
541

    
542
        elif self.field[self.pos] in '.@':
543
            # email address is just an addrspec
544
            # this isn't very efficient since we start over
545
            self.pos = oldpos
546
            self.commentlist = oldcl
547
            addrspec = self.getaddrspec()
548
            returnlist = [(' '.join(self.commentlist), addrspec)]
549

    
550
        elif self.field[self.pos] == ':':
551
            # address is a group
552
            returnlist = []
553

    
554
            fieldlen = len(self.field)
555
            self.pos = self.pos + 1
556
            while self.pos < len(self.field):
557
                self.gotonext()
558
                if self.pos < fieldlen and self.field[self.pos] == ';':
559
                    self.pos = self.pos + 1
560
                    break
561
                returnlist = returnlist + self.getaddress()
562

    
563
        elif self.field[self.pos] == '<':
564
            # Address is a phrase then a route addr
565
            routeaddr = self.getrouteaddr()
566

    
567
            if self.commentlist:
568
                returnlist = [(' '.join(plist) + ' (' + \
569
                         ' '.join(self.commentlist) + ')', routeaddr)]
570
            else: returnlist = [(' '.join(plist), routeaddr)]
571

    
572
        else:
573
            if plist:
574
                returnlist = [(' '.join(self.commentlist), plist[0])]
575
            elif self.field[self.pos] in self.specials:
576
                self.pos = self.pos + 1
577

    
578
        self.gotonext()
579
        if self.pos < len(self.field) and self.field[self.pos] == ',':
580
            self.pos = self.pos + 1
581
        return returnlist
582

    
583
    def getrouteaddr(self):
584
        """Parse a route address (Return-path value).
585

586
        This method just skips all the route stuff and returns the addrspec.
587
        """
588
        if self.field[self.pos] != '<':
589
            return
590

    
591
        expectroute = 0
592
        self.pos = self.pos + 1
593
        self.gotonext()
594
        adlist = None
595
        while self.pos < len(self.field):
596
            if expectroute:
597
                self.getdomain()
598
                expectroute = 0
599
            elif self.field[self.pos] == '>':
600
                self.pos = self.pos + 1
601
                break
602
            elif self.field[self.pos] == '@':
603
                self.pos = self.pos + 1
604
                expectroute = 1
605
            elif self.field[self.pos] == ':':
606
                self.pos = self.pos + 1
607
                expectaddrspec = 1
608
            else:
609
                adlist = self.getaddrspec()
610
                self.pos = self.pos + 1
611
                break
612
            self.gotonext()
613

    
614
        return adlist
615

    
616
    def getaddrspec(self):
617
        """Parse an RFC-822 addr-spec."""
618
        aslist = []
619

    
620
        self.gotonext()
621
        while self.pos < len(self.field):
622
            if self.field[self.pos] == '.':
623
                aslist.append('.')
624
                self.pos = self.pos + 1
625
            elif self.field[self.pos] == '"':
626
                aslist.append('"%s"' % self.getquote())
627
            elif self.field[self.pos] in self.atomends:
628
                break
629
            else: aslist.append(self.getatom())
630
            self.gotonext()
631

    
632
        if self.pos >= len(self.field) or self.field[self.pos] != '@':
633
            return ''.join(aslist)
634

    
635
        aslist.append('@')
636
        self.pos = self.pos + 1
637
        self.gotonext()
638
        return ''.join(aslist) + self.getdomain()
639

    
640
    def getdomain(self):
641
        """Get the complete domain name from an address."""
642
        sdlist = []
643
        while self.pos < len(self.field):
644
            if self.field[self.pos] in self.LWS:
645
                self.pos = self.pos + 1
646
            elif self.field[self.pos] == '(':
647
                self.commentlist.append(self.getcomment())
648
            elif self.field[self.pos] == '[':
649
                sdlist.append(self.getdomainliteral())
650
            elif self.field[self.pos] == '.':
651
                self.pos = self.pos + 1
652
                sdlist.append('.')
653
            elif self.field[self.pos] in self.atomends:
654
                break
655
            else: sdlist.append(self.getatom())
656
        return ''.join(sdlist)
657

    
658
    def getdelimited(self, beginchar, endchars, allowcomments = 1):
659
        """Parse a header fragment delimited by special characters.
660

661
        `beginchar' is the start character for the fragment.
662
        If self is not looking at an instance of `beginchar' then
663
        getdelimited returns the empty string.
664

665
        `endchars' is a sequence of allowable end-delimiting characters.
666
        Parsing stops when one of these is encountered.
667

668
        If `allowcomments' is non-zero, embedded RFC-822 comments
669
        are allowed within the parsed fragment.
670
        """
671
        if self.field[self.pos] != beginchar:
672
            return ''
673

    
674
        slist = ['']
675
        quote = 0
676
        self.pos = self.pos + 1
677
        while self.pos < len(self.field):
678
            if quote == 1:
679
                slist.append(self.field[self.pos])
680
                quote = 0
681
            elif self.field[self.pos] in endchars:
682
                self.pos = self.pos + 1
683
                break
684
            elif allowcomments and self.field[self.pos] == '(':
685
                slist.append(self.getcomment())
686
            elif self.field[self.pos] == '\\':
687
                quote = 1
688
            else:
689
                slist.append(self.field[self.pos])
690
            self.pos = self.pos + 1
691

    
692
        return ''.join(slist)
693

    
694
    def getquote(self):
695
        """Get a quote-delimited fragment from self's field."""
696
        return self.getdelimited('"', '"\r', 0)
697

    
698
    def getcomment(self):
699
        """Get a parenthesis-delimited fragment from self's field."""
700
        return self.getdelimited('(', ')\r', 1)
701

    
702
    def getdomainliteral(self):
703
        """Parse an RFC-822 domain-literal."""
704
        return '[%s]' % self.getdelimited('[', ']\r', 0)
705

    
706
    def getatom(self):
707
        """Parse an RFC-822 atom."""
708
        atomlist = ['']
709

    
710
        while self.pos < len(self.field):
711
            if self.field[self.pos] in self.atomends:
712
                break
713
            else: atomlist.append(self.field[self.pos])
714
            self.pos = self.pos + 1
715

    
716
        return ''.join(atomlist)
717

    
718
    def getphraselist(self):
719
        """Parse a sequence of RFC-822 phrases.
720

721
        A phrase is a sequence of words, which are in turn either
722
        RFC-822 atoms or quoted-strings.  Phrases are canonicalized
723
        by squeezing all runs of continuous whitespace into one space.
724
        """
725
        plist = []
726

    
727
        while self.pos < len(self.field):
728
            if self.field[self.pos] in self.LWS:
729
                self.pos = self.pos + 1
730
            elif self.field[self.pos] == '"':
731
                plist.append(self.getquote())
732
            elif self.field[self.pos] == '(':
733
                self.commentlist.append(self.getcomment())
734
            elif self.field[self.pos] in self.atomends:
735
                break
736
            else: plist.append(self.getatom())
737

    
738
        return plist
739

    
740
class AddressList(AddrlistClass):
741
    """An AddressList encapsulates a list of parsed RFC822 addresses."""
742
    def __init__(self, field):
743
        AddrlistClass.__init__(self, field)
744
        if field:
745
            self.addresslist = self.getaddrlist()
746
        else:
747
            self.addresslist = []
748

    
749
    def __len__(self):
750
        return len(self.addresslist)
751

    
752
    def __str__(self):
753
        return ", ".join(map(dump_address_pair, self.addresslist))
754

    
755
    def __add__(self, other):
756
        # Set union
757
        newaddr = AddressList(None)
758
        newaddr.addresslist = self.addresslist[:]
759
        for x in other.addresslist:
760
            if not x in self.addresslist:
761
                newaddr.addresslist.append(x)
762
        return newaddr
763

    
764
    def __iadd__(self, other):
765
        # Set union, in-place
766
        for x in other.addresslist:
767
            if not x in self.addresslist:
768
                self.addresslist.append(x)
769
        return self
770

    
771
    def __sub__(self, other):
772
        # Set difference
773
        newaddr = AddressList(None)
774
        for x in self.addresslist:
775
            if not x in other.addresslist:
776
                newaddr.addresslist.append(x)
777
        return newaddr
778

    
779
    def __isub__(self, other):
780
        # Set difference, in-place
781
        for x in other.addresslist:
782
            if x in self.addresslist:
783
                self.addresslist.remove(x)
784
        return self
785

    
786
    def __getitem__(self, index):
787
        # Make indexing, slices, and 'in' work
788
        return self.addresslist[index]
789

    
790
def dump_address_pair(pair):
791
    """Dump a (name, address) pair in a canonicalized form."""
792
    if pair[0]:
793
        return '"' + pair[0] + '" <' + pair[1] + '>'
794
    else:
795
        return pair[1]
796

    
797
# Parse a date field
798

    
799
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
800
               'aug', 'sep', 'oct', 'nov', 'dec',
801
               'january', 'february', 'march', 'april', 'may', 'june', 'july',
802
               'august', 'september', 'october', 'november', 'december']
803
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
804

    
805
# The timezone table does not include the military time zones defined
806
# in RFC822, other than Z.  According to RFC1123, the description in
807
# RFC822 gets the signs wrong, so we can't rely on any such time
808
# zones.  RFC1123 recommends that numeric timezone indicators be used
809
# instead of timezone names.
810

    
811
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
812
              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
813
              'EST': -500, 'EDT': -400,  # Eastern
814
              'CST': -600, 'CDT': -500,  # Central
815
              'MST': -700, 'MDT': -600,  # Mountain
816
              'PST': -800, 'PDT': -700   # Pacific
817
              }
818

    
819

    
820
def parsedate_tz(data):
821
    """Convert a date string to a time tuple.
822

823
    Accounts for military timezones.
824
    """
825
    data = data.split()
826
    if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
827
        # There's a dayname here. Skip it
828
        del data[0]
829
    if len(data) == 3: # RFC 850 date, deprecated
830
        stuff = data[0].split('-')
831
        if len(stuff) == 3:
832
            data = stuff + data[1:]
833
    if len(data) == 4:
834
        s = data[3]
835
        i = s.find('+')
836
        if i > 0:
837
            data[3:] = [s[:i], s[i+1:]]
838
        else:
839
            data.append('') # Dummy tz
840
    if len(data) < 5:
841
        return None
842
    data = data[:5]
843
    [dd, mm, yy, tm, tz] = data
844
    mm = mm.lower()
845
    if not mm in _monthnames:
846
        dd, mm = mm, dd.lower()
847
        if not mm in _monthnames:
848
            return None
849
    mm = _monthnames.index(mm)+1
850
    if mm > 12: mm = mm - 12
851
    if dd[-1] == ',':
852
        dd = dd[:-1]
853
    i = yy.find(':')
854
    if i > 0:
855
        yy, tm = tm, yy
856
    if yy[-1] == ',':
857
        yy = yy[:-1]
858
    if not yy[0].isdigit():
859
        yy, tz = tz, yy
860
    if tm[-1] == ',':
861
        tm = tm[:-1]
862
    tm = tm.split(':')
863
    if len(tm) == 2:
864
        [thh, tmm] = tm
865
        tss = '0'
866
    elif len(tm) == 3:
867
        [thh, tmm, tss] = tm
868
    else:
869
        return None
870
    try:
871
        yy = int(yy)
872
        dd = int(dd)
873
        thh = int(thh)
874
        tmm = int(tmm)
875
        tss = int(tss)
876
    except ValueError:
877
        return None
878
    tzoffset = None
879
    tz = tz.upper()
880
    if _timezones.has_key(tz):
881
        tzoffset = _timezones[tz]
882
    else:
883
        try:
884
            tzoffset = int(tz)
885
        except ValueError:
886
            pass
887
    # Convert a timezone offset into seconds ; -0500 -> -18000
888
    if tzoffset:
889
        if tzoffset < 0:
890
            tzsign = -1
891
            tzoffset = -tzoffset
892
        else:
893
            tzsign = 1
894
        tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
895
    tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
896
    return tuple
897

    
898

    
899
def parsedate(data):
900
    """Convert a time string to a time tuple."""
901
    t = parsedate_tz(data)
902
    if type(t) == type( () ):
903
        return t[:9]
904
    else: return t
905

    
906

    
907
def mktime_tz(data):
908
    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
909
    if data[9] is None:
910
        # No zone info, so localtime is better assumption than GMT
911
        return time.mktime(data[:8] + (-1,))
912
    else:
913
        t = time.mktime(data[:8] + (0,))
914
        return t - data[9] - time.timezone
915

    
916
def formatdate(timeval=None):
917
    """Returns time format preferred for Internet standards.
918

919
    Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
920
    """
921
    if timeval is None:
922
        timeval = time.time()
923
    return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
924
                                time.gmtime(timeval))
925

    
926

    
927
# When used as script, run a small test program.
928
# The first command line argument must be a filename containing one
929
# message in RFC-822 format.
930

    
931
if __name__ == '__main__':
932
    import sys, os
933
    file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
934
    if sys.argv[1:]: file = sys.argv[1]
935
    f = open(file, 'r')
936
    m = Message(f)
937
    print 'From:', m.getaddr('from')
938
    print 'To:', m.getaddrlist('to')
939
    print 'Subject:', m.getheader('subject')
940
    print 'Date:', m.getheader('date')
941
    date = m.getdate_tz('date')
942
    tz = date[-1]
943
    date = time.localtime(mktime_tz(date))
944
    if date:
945
        print 'ParsedDate:', time.asctime(date),
946
        hhmmss = tz
947
        hhmm, ss = divmod(hhmmss, 60)
948
        hh, mm = divmod(hhmm, 60)
949
        print "%+03d%02d" % (hh, mm),
950
        if ss: print ".%02d" % ss,
951
        print
952
    else:
953
        print 'ParsedDate:', None
954
    m.rewindbody()
955
    n = 0
956
    while f.readline():
957
        n = n + 1
958
    print 'Lines:', n
959
    print '-'*70
960
    print 'len =', len(m)
961
    if m.has_key('Date'): print 'Date =', m['Date']
962
    if m.has_key('X-Nonsense'): pass
963
    print 'keys =', m.keys()
964
    print 'values =', m.values()
965
    print 'items =', m.items()