email.internal: Add pristine from CPython 3.3.3.

This dist-package hosts few "package private" modules with names starting with "_". Maybe this is not the best grouping, but having a separate dist-package for each such module is a bit verbose.
2014-05-24 16:15:27 +03:00 · 2014-05-24 16:15:27 +03:00 · f93de4a4d5
commit f93de4a4d5
--- a/email.internal/email/_encoded_words.py
+++ b/email.internal/email/_encoded_words.py
@ -0,0 +1,221 @@
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+
+# An ecoded word looks like this:
+#
+#        =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module.  Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case).  In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens.  There could be a public API for adding entries
+# to the CTE tables, but YAGNI for now.  'q' is Quoted Printable, 'b' is
+# Base64.  The meaning of encoded_string should be obvious.  'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects).  The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte.  'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion.  'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding.  See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+from email import errors
+
+__all__ = ['decode_q',
+           'encode_q',
+           'decode_b',
+           'encode_b',
+           'len_q',
+           'len_b',
+           'decode',
+           'encode',
+           ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+        lambda m: bytes([int(m.group(1), 16)]))
+
+def decode_q(encoded):
+    encoded = encoded.replace(b'_', b' ')
+    return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+    safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
+
+    def __missing__(self, key):
+        if key in self.safe:
+            self[key] = chr(key)
+        else:
+            self[key] = "={:02X}".format(key)
+        return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+    return ''.join(_q_byte_map[x] for x in bstring)
+
+def len_q(bstring):
+    return sum(len(_q_byte_map[x]) for x in bstring)
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+    defects = []
+    pad_err = len(encoded) % 4
+    if pad_err:
+        defects.append(errors.InvalidBase64PaddingDefect())
+        padded_encoded = encoded + b'==='[:4-pad_err]
+    else:
+        padded_encoded = encoded
+    try:
+        return base64.b64decode(padded_encoded, validate=True), defects
+    except binascii.Error:
+        # Since we had correct padding, this must an invalid char error.
+        defects = [errors.InvalidBase64CharactersDefect()]
+        # The non-alphabet characters are ignored as far as padding
+        # goes, but we don't know how many there are.  So we'll just
+        # try various padding lengths until something works.
+        for i in 0, 1, 2, 3:
+            try:
+                return base64.b64decode(encoded+b'='*i, validate=False), defects
+            except binascii.Error:
+                if i==0:
+                    defects.append(errors.InvalidBase64PaddingDefect())
+        else:
+            # This should never happen.
+            raise AssertionError("unexpected binascii.Error")
+
+def encode_b(bstring):
+    return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+    groups_of_3, leftover = divmod(len(bstring), 3)
+    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+    return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+    'q': decode_q,
+    'b': decode_b,
+    }
+
+def decode(ew):
+    """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+    An RFC 2047/2243 encoded word has the form:
+
+        =?charset*lang?cte?encoded_string?=
+
+    where '*lang' may be omitted but the other parts may not be.
+
+    This function expects exactly such a string (that is, it does not check the
+    syntax and may raise errors if the string is not well formed), and returns
+    the encoded_string decoded first from its Content Transfer Encoding and
+    then from the resulting bytes into unicode using the specified charset.  If
+    the cte-decoded string does not successfully decode using the specified
+    character set, a defect is added to the defects list and the unknown octets
+    are replaced by the unicode 'unknown' character \uFDFF.
+
+    The specified charset and language are returned.  The default for language,
+    which is rarely if ever encountered, is the empty string.
+
+    """
+    _, charset, cte, cte_string, _ = ew.split('?')
+    charset, _, lang = charset.partition('*')
+    cte = cte.lower()
+    # Recover the original bytes and do CTE decoding.
+    bstring = cte_string.encode('ascii', 'surrogateescape')
+    bstring, defects = _cte_decoders[cte](bstring)
+    # Turn the CTE decoded bytes into unicode.
+    try:
+        string = bstring.decode(charset)
+    except UnicodeError:
+        defects.append(errors.UndecodableBytesDefect("Encoded word "
+            "contains bytes not decodable using {} charset".format(charset)))
+        string = bstring.decode(charset, 'surrogateescape')
+    except LookupError:
+        string = bstring.decode('ascii', 'surrogateescape')
+        if charset.lower() != 'unknown-8bit':
+            defects.append(errors.CharsetError("Unknown charset {} "
+                "in encoded word; decoded as unknown bytes".format(charset)))
+    return string, charset, lang, defects
+
+
+_cte_encoders = {
+    'q': encode_q,
+    'b': encode_b,
+    }
+
+_cte_encode_length = {
+    'q': len_q,
+    'b': len_b,
+    }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+    """Encode string using the CTE encoding that produces the shorter result.
+
+    Produces an RFC 2047/2243 encoded word of the form:
+
+        =?charset*lang?cte?encoded_string?=
+
+    where '*lang' is omitted unless the 'lang' parameter is given a value.
+    Optional argument charset (defaults to utf-8) specifies the charset to use
+    to encode the string to binary before CTE encoding it.  Optional argument
+    'encoding' is the cte specifier for the encoding that should be used ('q'
+    or 'b'); if it is None (the default) the encoding which produces the
+    shortest encoded sequence is used, except that 'q' is preferred if it is up
+    to five characters longer.  Optional argument 'lang' (default '') gives the
+    RFC 2243 language string to specify in the encoded word.
+
+    """
+    if charset == 'unknown-8bit':
+        bstring = string.encode('ascii', 'surrogateescape')
+    else:
+        bstring = string.encode(charset)
+    if encoding is None:
+        qlen = _cte_encode_length['q'](bstring)
+        blen = _cte_encode_length['b'](bstring)
+        # Bias toward q.  5 is arbitrary.
+        encoding = 'q' if qlen - blen < 5 else 'b'
+    encoded = _cte_encoders[encoding](bstring)
+    if lang:
+        lang = '*' + lang
+    return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
--- a/email.internal/email/_parseaddr.py
+++ b/email.internal/email/_parseaddr.py
@ -0,0 +1,540 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py.  This should eventually be rewritten.
+"""
+
+__all__ = [
+    'mktime_tz',
+    'parsedate',
+    'parsedate_tz',
+    'quote',
+    ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+               'aug', 'sep', 'oct', 'nov', 'dec',
+               'january', 'february', 'march', 'april', 'may', 'june', 'july',
+               'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z.  According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones.  RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
+              'EST': -500, 'EDT': -400,  # Eastern
+              'CST': -600, 'CDT': -500,  # Central
+              'MST': -700, 'MDT': -600,  # Mountain
+              'PST': -800, 'PDT': -700   # Pacific
+              }
+
+
+def parsedate_tz(data):
+    """Convert a date string to a time tuple.
+
+    Accounts for military timezones.
+    """
+    res = _parsedate_tz(data)
+    if not res:
+        return
+    if res[9] is None:
+        res[9] = 0
+    return tuple(res)
+
+def _parsedate_tz(data):
+    """Convert date to extended time tuple.
+
+    The last (additional) element is the time zone offset in seconds, except if
+    the timezone was specified as -0000.  In that case the last element is
+    None.  This indicates a UTC timestamp that explicitly declaims knowledge of
+    the source timezone, as opposed to a +0000 timestamp that indicates the
+    source timezone really was UTC.
+
+    """
+    if not data:
+        return
+    data = data.split()
+    # The FWS after the comma after the day-of-week is optional, so search and
+    # adjust for this.
+    if data[0].endswith(',') or data[0].lower() in _daynames:
+        # There's a dayname here. Skip it
+        del data[0]
+    else:
+        i = data[0].rfind(',')
+        if i >= 0:
+            data[0] = data[0][i+1:]
+    if len(data) == 3: # RFC 850 date, deprecated
+        stuff = data[0].split('-')
+        if len(stuff) == 3:
+            data = stuff + data[1:]
+    if len(data) == 4:
+        s = data[3]
+        i = s.find('+')
+        if i == -1:
+            i = s.find('-')
+        if i > 0:
+            data[3:] = [s[:i], s[i:]]
+        else:
+            data.append('') # Dummy tz
+    if len(data) < 5:
+        return None
+    data = data[:5]
+    [dd, mm, yy, tm, tz] = data
+    mm = mm.lower()
+    if mm not in _monthnames:
+        dd, mm = mm, dd.lower()
+        if mm not in _monthnames:
+            return None
+    mm = _monthnames.index(mm) + 1
+    if mm > 12:
+        mm -= 12
+    if dd[-1] == ',':
+        dd = dd[:-1]
+    i = yy.find(':')
+    if i > 0:
+        yy, tm = tm, yy
+    if yy[-1] == ',':
+        yy = yy[:-1]
+    if not yy[0].isdigit():
+        yy, tz = tz, yy
+    if tm[-1] == ',':
+        tm = tm[:-1]
+    tm = tm.split(':')
+    if len(tm) == 2:
+        [thh, tmm] = tm
+        tss = '0'
+    elif len(tm) == 3:
+        [thh, tmm, tss] = tm
+    elif len(tm) == 1 and '.' in tm[0]:
+        # Some non-compliant MUAs use '.' to separate time elements.
+        tm = tm[0].split('.')
+        if len(tm) == 2:
+            [thh, tmm] = tm
+            tss = 0
+        elif len(tm) == 3:
+            [thh, tmm, tss] = tm
+    else:
+        return None
+    try:
+        yy = int(yy)
+        dd = int(dd)
+        thh = int(thh)
+        tmm = int(tmm)
+        tss = int(tss)
+    except ValueError:
+        return None
+    # Check for a yy specified in two-digit format, then convert it to the
+    # appropriate four-digit format, according to the POSIX standard. RFC 822
+    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+    # mandates a 4-digit yy. For more information, see the documentation for
+    # the time module.
+    if yy < 100:
+        # The year is between 1969 and 1999 (inclusive).
+        if yy > 68:
+            yy += 1900
+        # The year is between 2000 and 2068 (inclusive).
+        else:
+            yy += 2000
+    tzoffset = None
+    tz = tz.upper()
+    if tz in _timezones:
+        tzoffset = _timezones[tz]
+    else:
+        try:
+            tzoffset = int(tz)
+        except ValueError:
+            pass
+        if tzoffset==0 and tz.startswith('-'):
+            tzoffset = None
+    # Convert a timezone offset into seconds ; -0500 -> -18000
+    if tzoffset:
+        if tzoffset < 0:
+            tzsign = -1
+            tzoffset = -tzoffset
+        else:
+            tzsign = 1
+        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+    # Daylight Saving Time flag is set to -1, since DST is unknown.
+    return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+
+
+def parsedate(data):
+    """Convert a time string to a time tuple."""
+    t = parsedate_tz(data)
+    if isinstance(t, tuple):
+        return t[:9]
+    else:
+        return t
+
+
+def mktime_tz(data):
+    """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+    if data[9] is None:
+        # No zone info, so localtime is better assumption than GMT
+        return time.mktime(data[:8] + (-1,))
+    else:
+        t = calendar.timegm(data)
+        return t - data[9]
+
+
+def quote(str):
+    """Prepare string to be used in a quoted string.
+
+    Turns backslash and double quote characters into quoted pairs.  These
+    are the only characters that need to be quoted inside a quoted string.
+    Does not add the surrounding double quotes.
+    """
+    return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass:
+    """Address parser class by Ben Escoto.
+
+    To understand what this class does, it helps to have a copy of RFC 2822 in
+    front of you.
+
+    Note: this class interface is deprecated and may be removed in the future.
+    Use email.utils.AddressList instead.
+    """
+
+    def __init__(self, field):
+        """Initialize a new instance.
+
+        `field' is an unparsed address header field, containing
+        one or more addresses.
+        """
+        self.specials = '()<>@,:;.\"[]'
+        self.pos = 0
+        self.LWS = ' \t'
+        self.CR = '\r\n'
+        self.FWS = self.LWS + self.CR
+        self.atomends = self.specials + self.LWS + self.CR
+        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
+        # syntax, so allow dots in phrases.
+        self.phraseends = self.atomends.replace('.', '')
+        self.field = field
+        self.commentlist = []
+
+    def gotonext(self):
+        """Skip white space and extract comments."""
+        wslist = []
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS + '\n\r':
+                if self.field[self.pos] not in '\n\r':
+                    wslist.append(self.field[self.pos])
+                self.pos += 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            else:
+                break
+        return EMPTYSTRING.join(wslist)
+
+    def getaddrlist(self):
+        """Parse all addresses.
+
+        Returns a list containing all of the addresses.
+        """
+        result = []
+        while self.pos < len(self.field):
+            ad = self.getaddress()
+            if ad:
+                result += ad
+            else:
+                result.append(('', ''))
+        return result
+
+    def getaddress(self):
+        """Parse the next address."""
+        self.commentlist = []
+        self.gotonext()
+
+        oldpos = self.pos
+        oldcl = self.commentlist
+        plist = self.getphraselist()
+
+        self.gotonext()
+        returnlist = []
+
+        if self.pos >= len(self.field):
+            # Bad email address technically, no domain.
+            if plist:
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+        elif self.field[self.pos] in '.@':
+            # email address is just an addrspec
+            # this isn't very efficient since we start over
+            self.pos = oldpos
+            self.commentlist = oldcl
+            addrspec = self.getaddrspec()
+            returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+        elif self.field[self.pos] == ':':
+            # address is a group
+            returnlist = []
+
+            fieldlen = len(self.field)
+            self.pos += 1
+            while self.pos < len(self.field):
+                self.gotonext()
+                if self.pos < fieldlen and self.field[self.pos] == ';':
+                    self.pos += 1
+                    break
+                returnlist = returnlist + self.getaddress()
+
+        elif self.field[self.pos] == '<':
+            # Address is a phrase then a route addr
+            routeaddr = self.getrouteaddr()
+
+            if self.commentlist:
+                returnlist = [(SPACE.join(plist) + ' (' +
+                               ' '.join(self.commentlist) + ')', routeaddr)]
+            else:
+                returnlist = [(SPACE.join(plist), routeaddr)]
+
+        else:
+            if plist:
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
+            elif self.field[self.pos] in self.specials:
+                self.pos += 1
+
+        self.gotonext()
+        if self.pos < len(self.field) and self.field[self.pos] == ',':
+            self.pos += 1
+        return returnlist
+
+    def getrouteaddr(self):
+        """Parse a route address (Return-path value).
+
+        This method just skips all the route stuff and returns the addrspec.
+        """
+        if self.field[self.pos] != '<':
+            return
+
+        expectroute = False
+        self.pos += 1
+        self.gotonext()
+        adlist = ''
+        while self.pos < len(self.field):
+            if expectroute:
+                self.getdomain()
+                expectroute = False
+            elif self.field[self.pos] == '>':
+                self.pos += 1
+                break
+            elif self.field[self.pos] == '@':
+                self.pos += 1
+                expectroute = True
+            elif self.field[self.pos] == ':':
+                self.pos += 1
+            else:
+                adlist = self.getaddrspec()
+                self.pos += 1
+                break
+            self.gotonext()
+
+        return adlist
+
+    def getaddrspec(self):
+        """Parse an RFC 2822 addr-spec."""
+        aslist = []
+
+        self.gotonext()
+        while self.pos < len(self.field):
+            preserve_ws = True
+            if self.field[self.pos] == '.':
+                if aslist and not aslist[-1].strip():
+                    aslist.pop()
+                aslist.append('.')
+                self.pos += 1
+                preserve_ws = False
+            elif self.field[self.pos] == '"':
+                aslist.append('"%s"' % quote(self.getquote()))
+            elif self.field[self.pos] in self.atomends:
+                if aslist and not aslist[-1].strip():
+                    aslist.pop()
+                break
+            else:
+                aslist.append(self.getatom())
+            ws = self.gotonext()
+            if preserve_ws and ws:
+                aslist.append(ws)
+
+        if self.pos >= len(self.field) or self.field[self.pos] != '@':
+            return EMPTYSTRING.join(aslist)
+
+        aslist.append('@')
+        self.pos += 1
+        self.gotonext()
+        return EMPTYSTRING.join(aslist) + self.getdomain()
+
+    def getdomain(self):
+        """Get the complete domain name from an address."""
+        sdlist = []
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS:
+                self.pos += 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] == '[':
+                sdlist.append(self.getdomainliteral())
+            elif self.field[self.pos] == '.':
+                self.pos += 1
+                sdlist.append('.')
+            elif self.field[self.pos] in self.atomends:
+                break
+            else:
+                sdlist.append(self.getatom())
+        return EMPTYSTRING.join(sdlist)
+
+    def getdelimited(self, beginchar, endchars, allowcomments=True):
+        """Parse a header fragment delimited by special characters.
+
+        `beginchar' is the start character for the fragment.
+        If self is not looking at an instance of `beginchar' then
+        getdelimited returns the empty string.
+
+        `endchars' is a sequence of allowable end-delimiting characters.
+        Parsing stops when one of these is encountered.
+
+        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+        within the parsed fragment.
+        """
+        if self.field[self.pos] != beginchar:
+            return ''
+
+        slist = ['']
+        quote = False
+        self.pos += 1
+        while self.pos < len(self.field):
+            if quote:
+                slist.append(self.field[self.pos])
+                quote = False
+            elif self.field[self.pos] in endchars:
+                self.pos += 1
+                break
+            elif allowcomments and self.field[self.pos] == '(':
+                slist.append(self.getcomment())
+                continue        # have already advanced pos from getcomment
+            elif self.field[self.pos] == '\\':
+                quote = True
+            else:
+                slist.append(self.field[self.pos])
+            self.pos += 1
+
+        return EMPTYSTRING.join(slist)
+
+    def getquote(self):
+        """Get a quote-delimited fragment from self's field."""
+        return self.getdelimited('"', '"\r', False)
+
+    def getcomment(self):
+        """Get a parenthesis-delimited fragment from self's field."""
+        return self.getdelimited('(', ')\r', True)
+
+    def getdomainliteral(self):
+        """Parse an RFC 2822 domain-literal."""
+        return '[%s]' % self.getdelimited('[', ']\r', False)
+
+    def getatom(self, atomends=None):
+        """Parse an RFC 2822 atom.
+
+        Optional atomends specifies a different set of end token delimiters
+        (the default is to use self.atomends).  This is used e.g. in
+        getphraselist() since phrase endings must not include the `.' (which
+        is legal in phrases)."""
+        atomlist = ['']
+        if atomends is None:
+            atomends = self.atomends
+
+        while self.pos < len(self.field):
+            if self.field[self.pos] in atomends:
+                break
+            else:
+                atomlist.append(self.field[self.pos])
+            self.pos += 1
+
+        return EMPTYSTRING.join(atomlist)
+
+    def getphraselist(self):
+        """Parse a sequence of RFC 2822 phrases.
+
+        A phrase is a sequence of words, which are in turn either RFC 2822
+        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
+        runs of continuous whitespace into one space.
+        """
+        plist = []
+
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.FWS:
+                self.pos += 1
+            elif self.field[self.pos] == '"':
+                plist.append(self.getquote())
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] in self.phraseends:
+                break
+            else:
+                plist.append(self.getatom(self.phraseends))
+
+        return plist
+
+class AddressList(AddrlistClass):
+    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+    def __init__(self, field):
+        AddrlistClass.__init__(self, field)
+        if field:
+            self.addresslist = self.getaddrlist()
+        else:
+            self.addresslist = []
+
+    def __len__(self):
+        return len(self.addresslist)
+
+    def __add__(self, other):
+        # Set union
+        newaddr = AddressList(None)
+        newaddr.addresslist = self.addresslist[:]
+        for x in other.addresslist:
+            if not x in self.addresslist:
+                newaddr.addresslist.append(x)
+        return newaddr
+
+    def __iadd__(self, other):
+        # Set union, in-place
+        for x in other.addresslist:
+            if not x in self.addresslist:
+                self.addresslist.append(x)
+        return self
+
+    def __sub__(self, other):
+        # Set difference
+        newaddr = AddressList(None)
+        for x in self.addresslist:
+            if not x in other.addresslist:
+                newaddr.addresslist.append(x)
+        return newaddr
+
+    def __isub__(self, other):
+        # Set difference, in-place
+        for x in other.addresslist:
+            if x in self.addresslist:
+                self.addresslist.remove(x)
+        return self
+
+    def __getitem__(self, index):
+        # Make indexing, slices, and 'in' work
+        return self.addresslist[index]
--- a/email.internal/email/_policybase.py
+++ b/email.internal/email/_policybase.py
@ -0,0 +1,358 @@
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+
+import abc
+from email import header
+from email import charset as _charset
+from email.utils import _has_surrogates
+
+__all__ = [
+    'Policy',
+    'Compat32',
+    'compat32',
+    ]
+
+
+class _PolicyBase:
+
+    """Policy Object basic framework.
+
+    This class is useless unless subclassed.  A subclass should define
+    class attributes with defaults for any values that are to be
+    managed by the Policy object.  The constructor will then allow
+    non-default values to be set for these attributes at instance
+    creation time.  The instance will be callable, taking these same
+    attributes keyword arguments, and returning a new instance
+    identical to the called instance except for those values changed
+    by the keyword arguments.  Instances may be added, yielding new
+    instances with any non-default values from the right hand
+    operand overriding those in the left hand operand.  That is,
+
+        A + B == A(<non-default values of B>)
+
+    The repr of an instance can be used to reconstruct the object
+    if and only if the repr of the values can be used to reconstruct
+    those values.
+
+    """
+
+    def __init__(self, **kw):
+        """Create new Policy, possibly overriding some defaults.
+
+        See class docstring for a list of overridable attributes.
+
+        """
+        for name, value in kw.items():
+            if hasattr(self, name):
+                super(_PolicyBase,self).__setattr__(name, value)
+            else:
+                raise TypeError(
+                    "{!r} is an invalid keyword argument for {}".format(
+                        name, self.__class__.__name__))
+
+    def __repr__(self):
+        args = [ "{}={!r}".format(name, value)
+                 for name, value in self.__dict__.items() ]
+        return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+    def clone(self, **kw):
+        """Return a new instance with specified attributes changed.
+
+        The new instance has the same attribute values as the current object,
+        except for the changes passed in as keyword arguments.
+
+        """
+        newpolicy = self.__class__.__new__(self.__class__)
+        for attr, value in self.__dict__.items():
+            object.__setattr__(newpolicy, attr, value)
+        for attr, value in kw.items():
+            if not hasattr(self, attr):
+                raise TypeError(
+                    "{!r} is an invalid keyword argument for {}".format(
+                        attr, self.__class__.__name__))
+            object.__setattr__(newpolicy, attr, value)
+        return newpolicy
+
+    def __setattr__(self, name, value):
+        if hasattr(self, name):
+            msg = "{!r} object attribute {!r} is read-only"
+        else:
+            msg = "{!r} object has no attribute {!r}"
+        raise AttributeError(msg.format(self.__class__.__name__, name))
+
+    def __add__(self, other):
+        """Non-default values from right operand override those from left.
+
+        The object returned is a new instance of the subclass.
+
+        """
+        return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+    doc = doc.rsplit('\n', 1)[0]
+    added_doc = added_doc.split('\n', 1)[1]
+    return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+    if cls.__doc__ and cls.__doc__.startswith('+'):
+        cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+    for name, attr in cls.__dict__.items():
+        if attr.__doc__ and attr.__doc__.startswith('+'):
+            for c in (c for base in cls.__bases__ for c in base.mro()):
+                doc = getattr(getattr(c, name), '__doc__')
+                if doc:
+                    attr.__doc__ = _append_doc(doc, attr.__doc__)
+                    break
+    return cls
+
+
+class Policy(_PolicyBase, metaclass=abc.ABCMeta):
+
+    r"""Controls for how messages are interpreted and formatted.
+
+    Most of the classes and many of the methods in the email package accept
+    Policy objects as parameters.  A Policy object contains a set of values and
+    functions that control how input is interpreted and how output is rendered.
+    For example, the parameter 'raise_on_defect' controls whether or not an RFC
+    violation results in an error being raised or not, while 'max_line_length'
+    controls the maximum length of output lines when a Message is serialized.
+
+    Any valid attribute may be overridden when a Policy is created by passing
+    it as a keyword argument to the constructor.  Policy objects are immutable,
+    but a new Policy object can be created with only certain values changed by
+    calling the Policy instance with keyword arguments.  Policy objects can
+    also be added, producing a new Policy object in which the non-default
+    attributes set in the right hand operand overwrite those specified in the
+    left operand.
+
+    Settable attributes:
+
+    raise_on_defect     -- If true, then defects should be raised as errors.
+                           Default: False.
+
+    linesep             -- string containing the value to use as separation
+                           between output lines.  Default '\n'.
+
+    cte_type            -- Type of allowed content transfer encodings
+
+                           7bit  -- ASCII only
+                           8bit  -- Content-Transfer-Encoding: 8bit is allowed
+
+                           Default: 8bit.  Also controls the disposition of
+                           (RFC invalid) binary data in headers; see the
+                           documentation of the binary_fold method.
+
+    max_line_length     -- maximum length of lines, excluding 'linesep',
+                           during serialization.  None or 0 means no line
+                           wrapping is done.  Default is 78.
+
+    """
+
+    raise_on_defect = False
+    linesep = '\n'
+    cte_type = '8bit'
+    max_line_length = 78
+
+    def handle_defect(self, obj, defect):
+        """Based on policy, either raise defect or call register_defect.
+
+            handle_defect(obj, defect)
+
+        defect should be a Defect subclass, but in any case must be an
+        Exception subclass.  obj is the object on which the defect should be
+        registered if it is not raised.  If the raise_on_defect is True, the
+        defect is raised as an error, otherwise the object and the defect are
+        passed to register_defect.
+
+        This method is intended to be called by parsers that discover defects.
+        The email package parsers always call it with Defect instances.
+
+        """
+        if self.raise_on_defect:
+            raise defect
+        self.register_defect(obj, defect)
+
+    def register_defect(self, obj, defect):
+        """Record 'defect' on 'obj'.
+
+        Called by handle_defect if raise_on_defect is False.  This method is
+        part of the Policy API so that Policy subclasses can implement custom
+        defect handling.  The default implementation calls the append method of
+        the defects attribute of obj.  The objects used by the email package by
+        default that get passed to this method will always have a defects
+        attribute with an append method.
+
+        """
+        obj.defects.append(defect)
+
+    def header_max_count(self, name):
+        """Return the maximum allowed number of headers named 'name'.
+
+        Called when a header is added to a Message object.  If the returned
+        value is not 0 or None, and there are already a number of headers with
+        the name 'name' equal to the value returned, a ValueError is raised.
+
+        Because the default behavior of Message's __setitem__ is to append the
+        value to the list of headers, it is easy to create duplicate headers
+        without realizing it.  This method allows certain headers to be limited
+        in the number of instances of that header that may be added to a
+        Message programmatically.  (The limit is not observed by the parser,
+        which will faithfully produce as many headers as exist in the message
+        being parsed.)
+
+        The default implementation returns None for all header names.
+        """
+        return None
+
+    @abc.abstractmethod
+    def header_source_parse(self, sourcelines):
+        """Given a list of linesep terminated strings constituting the lines of
+        a single header, return the (name, value) tuple that should be stored
+        in the model.  The input lines should retain their terminating linesep
+        characters.  The lines passed in by the email package may contain
+        surrogateescaped binary data.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def header_store_parse(self, name, value):
+        """Given the header name and the value provided by the application
+        program, return the (name, value) that should be stored in the model.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def header_fetch_parse(self, name, value):
+        """Given the header name and the value from the model, return the value
+        to be returned to the application program that is requesting that
+        header.  The value passed in by the email package may contain
+        surrogateescaped binary data if the lines were parsed by a BytesParser.
+        The returned value should not contain any surrogateescaped data.
+
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def fold(self, name, value):
+        """Given the header name and the value from the model, return a string
+        containing linesep characters that implement the folding of the header
+        according to the policy controls.  The value passed in by the email
+        package may contain surrogateescaped binary data if the lines were
+        parsed by a BytesParser.  The returned value should not contain any
+        surrogateescaped data.
+
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def fold_binary(self, name, value):
+        """Given the header name and the value from the model, return binary
+        data containing linesep characters that implement the folding of the
+        header according to the policy controls.  The value passed in by the
+        email package may contain surrogateescaped binary data.
+
+        """
+        raise NotImplementedError
+
+
+@_extend_docstrings
+class Compat32(Policy):
+
+    """+
+    This particular policy is the backward compatibility Policy.  It
+    replicates the behavior of the email package version 5.1.
+    """
+
+    def _sanitize_header(self, name, value):
+        # If the header value contains surrogates, return a Header using
+        # the unknown-8bit charset to encode the bytes as encoded words.
+        if not isinstance(value, str):
+            # Assume it is already a header object
+            return value
+        if _has_surrogates(value):
+            return header.Header(value, charset=_charset.UNKNOWN8BIT,
+                                 header_name=name)
+        else:
+            return value
+
+    def header_source_parse(self, sourcelines):
+        """+
+        The name is parsed as everything up to the ':' and returned unmodified.
+        The value is determined by stripping leading whitespace off the
+        remainder of the first line, joining all subsequent lines together, and
+        stripping any trailing carriage return or linefeed characters.
+
+        """
+        name, value = sourcelines[0].split(':', 1)
+        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        return (name, value.rstrip('\r\n'))
+
+    def header_store_parse(self, name, value):
+        """+
+        The name and value are returned unmodified.
+        """
+        return (name, value)
+
+    def header_fetch_parse(self, name, value):
+        """+
+        If the value contains binary data, it is converted into a Header object
+        using the unknown-8bit charset.  Otherwise it is returned unmodified.
+        """
+        return self._sanitize_header(name, value)
+
+    def fold(self, name, value):
+        """+
+        Headers are folded using the Header folding algorithm, which preserves
+        existing line breaks in the value, and wraps each resulting line to the
+        max_line_length.  Non-ASCII binary data are CTE encoded using the
+        unknown-8bit charset.
+
+        """
+        return self._fold(name, value, sanitize=True)
+
+    def fold_binary(self, name, value):
+        """+
+        Headers are folded using the Header folding algorithm, which preserves
+        existing line breaks in the value, and wraps each resulting line to the
+        max_line_length.  If cte_type is 7bit, non-ascii binary data is CTE
+        encoded using the unknown-8bit charset.  Otherwise the original source
+        header is used, with its existing line breaks and/or binary data.
+
+        """
+        folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+        return folded.encode('ascii', 'surrogateescape')
+
+    def _fold(self, name, value, sanitize):
+        parts = []
+        parts.append('%s: ' % name)
+        if isinstance(value, str):
+            if _has_surrogates(value):
+                if sanitize:
+                    h = header.Header(value,
+                                      charset=_charset.UNKNOWN8BIT,
+                                      header_name=name)
+                else:
+                    # If we have raw 8bit data in a byte string, we have no idea
+                    # what the encoding is.  There is no safe way to split this
+                    # string.  If it's ascii-subset, then we could do a normal
+                    # ascii split, but if it's multibyte then we could break the
+                    # string.  There's no way to know so the least harm seems to
+                    # be to not split the string and risk it being too long.
+                    parts.append(value)
+                    h = None
+            else:
+                h = header.Header(value, header_name=name)
+        else:
+            # Assume it is a Header-like object.
+            h = value
+        if h is not None:
+            parts.append(h.encode(linesep=self.linesep,
+                                  maxlinelen=self.max_line_length))
+        parts.append(self.linesep)
+        return ''.join(parts)
+
+
+compat32 = Compat32()