kopia lustrzana https://github.com/micropython/micropython-lib
email.internal: Add pristine from CPython 3.3.3.
This dist-package hosts few "package private" modules with names starting with "_". Maybe this is not the best grouping, but having a separate dist-package for each such module is a bit verbose.pull/118/head
rodzic
67cf02d715
commit
f93de4a4d5
|
@ -0,0 +1,221 @@
|
||||||
|
""" Routines for manipulating RFC2047 encoded words.
|
||||||
|
|
||||||
|
This is currently a package-private API, but will be considered for promotion
|
||||||
|
to a public API if there is demand.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# An ecoded word looks like this:
|
||||||
|
#
|
||||||
|
# =?charset[*lang]?cte?encoded_string?=
|
||||||
|
#
|
||||||
|
# for more information about charset see the charset module. Here it is one
|
||||||
|
# of the preferred MIME charset names (hopefully; you never know when parsing).
|
||||||
|
# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
|
||||||
|
# theory other letters could be used for other encodings, but in practice this
|
||||||
|
# (almost?) never happens. There could be a public API for adding entries
|
||||||
|
# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
|
||||||
|
# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
|
||||||
|
# as indicated by the brackets (they are not part of the syntax) but is almost
|
||||||
|
# never encountered in practice.
|
||||||
|
#
|
||||||
|
# The general interface for a CTE decoder is that it takes the encoded_string
|
||||||
|
# as its argument, and returns a tuple (cte_decoded_string, defects). The
|
||||||
|
# cte_decoded_string is the original binary that was encoded using the
|
||||||
|
# specified cte. 'defects' is a list of MessageDefect instances indicating any
|
||||||
|
# problems encountered during conversion. 'charset' and 'lang' are the
|
||||||
|
# corresponding strings extracted from the EW, case preserved.
|
||||||
|
#
|
||||||
|
# The general interface for a CTE encoder is that it takes a binary sequence
|
||||||
|
# as input and returns the cte_encoded_string, which is an ascii-only string.
|
||||||
|
#
|
||||||
|
# Each decoder must also supply a length function that takes the binary
|
||||||
|
# sequence as its argument and returns the length of the resulting encoded
|
||||||
|
# string.
|
||||||
|
#
|
||||||
|
# The main API functions for the module are decode, which calls the decoder
|
||||||
|
# referenced by the cte specifier, and encode, which adds the appropriate
|
||||||
|
# RFC 2047 "chrome" to the encoded string, and can optionally automatically
|
||||||
|
# select the shortest possible encoding. See their docstrings below for
|
||||||
|
# details.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import functools
|
||||||
|
from string import ascii_letters, digits
|
||||||
|
from email import errors
|
||||||
|
|
||||||
|
__all__ = ['decode_q',
|
||||||
|
'encode_q',
|
||||||
|
'decode_b',
|
||||||
|
'encode_b',
|
||||||
|
'len_q',
|
||||||
|
'len_b',
|
||||||
|
'decode',
|
||||||
|
'encode',
|
||||||
|
]
|
||||||
|
|
||||||
|
#
|
||||||
|
# Quoted Printable
|
||||||
|
#
|
||||||
|
|
||||||
|
# regex based decoder.
|
||||||
|
_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
|
||||||
|
lambda m: bytes([int(m.group(1), 16)]))
|
||||||
|
|
||||||
|
def decode_q(encoded):
|
||||||
|
encoded = encoded.replace(b'_', b' ')
|
||||||
|
return _q_byte_subber(encoded), []
|
||||||
|
|
||||||
|
|
||||||
|
# dict mapping bytes to their encoded form
|
||||||
|
class _QByteMap(dict):
|
||||||
|
|
||||||
|
safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
|
||||||
|
|
||||||
|
def __missing__(self, key):
|
||||||
|
if key in self.safe:
|
||||||
|
self[key] = chr(key)
|
||||||
|
else:
|
||||||
|
self[key] = "={:02X}".format(key)
|
||||||
|
return self[key]
|
||||||
|
|
||||||
|
_q_byte_map = _QByteMap()
|
||||||
|
|
||||||
|
# In headers spaces are mapped to '_'.
|
||||||
|
_q_byte_map[ord(' ')] = '_'
|
||||||
|
|
||||||
|
def encode_q(bstring):
|
||||||
|
return ''.join(_q_byte_map[x] for x in bstring)
|
||||||
|
|
||||||
|
def len_q(bstring):
|
||||||
|
return sum(len(_q_byte_map[x]) for x in bstring)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Base64
|
||||||
|
#
|
||||||
|
|
||||||
|
def decode_b(encoded):
|
||||||
|
defects = []
|
||||||
|
pad_err = len(encoded) % 4
|
||||||
|
if pad_err:
|
||||||
|
defects.append(errors.InvalidBase64PaddingDefect())
|
||||||
|
padded_encoded = encoded + b'==='[:4-pad_err]
|
||||||
|
else:
|
||||||
|
padded_encoded = encoded
|
||||||
|
try:
|
||||||
|
return base64.b64decode(padded_encoded, validate=True), defects
|
||||||
|
except binascii.Error:
|
||||||
|
# Since we had correct padding, this must an invalid char error.
|
||||||
|
defects = [errors.InvalidBase64CharactersDefect()]
|
||||||
|
# The non-alphabet characters are ignored as far as padding
|
||||||
|
# goes, but we don't know how many there are. So we'll just
|
||||||
|
# try various padding lengths until something works.
|
||||||
|
for i in 0, 1, 2, 3:
|
||||||
|
try:
|
||||||
|
return base64.b64decode(encoded+b'='*i, validate=False), defects
|
||||||
|
except binascii.Error:
|
||||||
|
if i==0:
|
||||||
|
defects.append(errors.InvalidBase64PaddingDefect())
|
||||||
|
else:
|
||||||
|
# This should never happen.
|
||||||
|
raise AssertionError("unexpected binascii.Error")
|
||||||
|
|
||||||
|
def encode_b(bstring):
|
||||||
|
return base64.b64encode(bstring).decode('ascii')
|
||||||
|
|
||||||
|
def len_b(bstring):
|
||||||
|
groups_of_3, leftover = divmod(len(bstring), 3)
|
||||||
|
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||||
|
return groups_of_3 * 4 + (4 if leftover else 0)
|
||||||
|
|
||||||
|
|
||||||
|
_cte_decoders = {
|
||||||
|
'q': decode_q,
|
||||||
|
'b': decode_b,
|
||||||
|
}
|
||||||
|
|
||||||
|
def decode(ew):
|
||||||
|
"""Decode encoded word and return (string, charset, lang, defects) tuple.
|
||||||
|
|
||||||
|
An RFC 2047/2243 encoded word has the form:
|
||||||
|
|
||||||
|
=?charset*lang?cte?encoded_string?=
|
||||||
|
|
||||||
|
where '*lang' may be omitted but the other parts may not be.
|
||||||
|
|
||||||
|
This function expects exactly such a string (that is, it does not check the
|
||||||
|
syntax and may raise errors if the string is not well formed), and returns
|
||||||
|
the encoded_string decoded first from its Content Transfer Encoding and
|
||||||
|
then from the resulting bytes into unicode using the specified charset. If
|
||||||
|
the cte-decoded string does not successfully decode using the specified
|
||||||
|
character set, a defect is added to the defects list and the unknown octets
|
||||||
|
are replaced by the unicode 'unknown' character \uFDFF.
|
||||||
|
|
||||||
|
The specified charset and language are returned. The default for language,
|
||||||
|
which is rarely if ever encountered, is the empty string.
|
||||||
|
|
||||||
|
"""
|
||||||
|
_, charset, cte, cte_string, _ = ew.split('?')
|
||||||
|
charset, _, lang = charset.partition('*')
|
||||||
|
cte = cte.lower()
|
||||||
|
# Recover the original bytes and do CTE decoding.
|
||||||
|
bstring = cte_string.encode('ascii', 'surrogateescape')
|
||||||
|
bstring, defects = _cte_decoders[cte](bstring)
|
||||||
|
# Turn the CTE decoded bytes into unicode.
|
||||||
|
try:
|
||||||
|
string = bstring.decode(charset)
|
||||||
|
except UnicodeError:
|
||||||
|
defects.append(errors.UndecodableBytesDefect("Encoded word "
|
||||||
|
"contains bytes not decodable using {} charset".format(charset)))
|
||||||
|
string = bstring.decode(charset, 'surrogateescape')
|
||||||
|
except LookupError:
|
||||||
|
string = bstring.decode('ascii', 'surrogateescape')
|
||||||
|
if charset.lower() != 'unknown-8bit':
|
||||||
|
defects.append(errors.CharsetError("Unknown charset {} "
|
||||||
|
"in encoded word; decoded as unknown bytes".format(charset)))
|
||||||
|
return string, charset, lang, defects
|
||||||
|
|
||||||
|
|
||||||
|
_cte_encoders = {
|
||||||
|
'q': encode_q,
|
||||||
|
'b': encode_b,
|
||||||
|
}
|
||||||
|
|
||||||
|
_cte_encode_length = {
|
||||||
|
'q': len_q,
|
||||||
|
'b': len_b,
|
||||||
|
}
|
||||||
|
|
||||||
|
def encode(string, charset='utf-8', encoding=None, lang=''):
|
||||||
|
"""Encode string using the CTE encoding that produces the shorter result.
|
||||||
|
|
||||||
|
Produces an RFC 2047/2243 encoded word of the form:
|
||||||
|
|
||||||
|
=?charset*lang?cte?encoded_string?=
|
||||||
|
|
||||||
|
where '*lang' is omitted unless the 'lang' parameter is given a value.
|
||||||
|
Optional argument charset (defaults to utf-8) specifies the charset to use
|
||||||
|
to encode the string to binary before CTE encoding it. Optional argument
|
||||||
|
'encoding' is the cte specifier for the encoding that should be used ('q'
|
||||||
|
or 'b'); if it is None (the default) the encoding which produces the
|
||||||
|
shortest encoded sequence is used, except that 'q' is preferred if it is up
|
||||||
|
to five characters longer. Optional argument 'lang' (default '') gives the
|
||||||
|
RFC 2243 language string to specify in the encoded word.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if charset == 'unknown-8bit':
|
||||||
|
bstring = string.encode('ascii', 'surrogateescape')
|
||||||
|
else:
|
||||||
|
bstring = string.encode(charset)
|
||||||
|
if encoding is None:
|
||||||
|
qlen = _cte_encode_length['q'](bstring)
|
||||||
|
blen = _cte_encode_length['b'](bstring)
|
||||||
|
# Bias toward q. 5 is arbitrary.
|
||||||
|
encoding = 'q' if qlen - blen < 5 else 'b'
|
||||||
|
encoded = _cte_encoders[encoding](bstring)
|
||||||
|
if lang:
|
||||||
|
lang = '*' + lang
|
||||||
|
return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
|
|
@ -0,0 +1,540 @@
|
||||||
|
# Copyright (C) 2002-2007 Python Software Foundation
|
||||||
|
# Contact: email-sig@python.org
|
||||||
|
|
||||||
|
"""Email address parsing code.
|
||||||
|
|
||||||
|
Lifted directly from rfc822.py. This should eventually be rewritten.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'mktime_tz',
|
||||||
|
'parsedate',
|
||||||
|
'parsedate_tz',
|
||||||
|
'quote',
|
||||||
|
]
|
||||||
|
|
||||||
|
import time, calendar
|
||||||
|
|
||||||
|
SPACE = ' '
|
||||||
|
EMPTYSTRING = ''
|
||||||
|
COMMASPACE = ', '
|
||||||
|
|
||||||
|
# Parse a date field
|
||||||
|
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||||
|
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||||
|
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||||
|
'august', 'september', 'october', 'november', 'december']
|
||||||
|
|
||||||
|
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||||
|
|
||||||
|
# The timezone table does not include the military time zones defined
|
||||||
|
# in RFC822, other than Z. According to RFC1123, the description in
|
||||||
|
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||||||
|
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||||||
|
# instead of timezone names.
|
||||||
|
|
||||||
|
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||||||
|
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||||||
|
'EST': -500, 'EDT': -400, # Eastern
|
||||||
|
'CST': -600, 'CDT': -500, # Central
|
||||||
|
'MST': -700, 'MDT': -600, # Mountain
|
||||||
|
'PST': -800, 'PDT': -700 # Pacific
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parsedate_tz(data):
|
||||||
|
"""Convert a date string to a time tuple.
|
||||||
|
|
||||||
|
Accounts for military timezones.
|
||||||
|
"""
|
||||||
|
res = _parsedate_tz(data)
|
||||||
|
if not res:
|
||||||
|
return
|
||||||
|
if res[9] is None:
|
||||||
|
res[9] = 0
|
||||||
|
return tuple(res)
|
||||||
|
|
||||||
|
def _parsedate_tz(data):
|
||||||
|
"""Convert date to extended time tuple.
|
||||||
|
|
||||||
|
The last (additional) element is the time zone offset in seconds, except if
|
||||||
|
the timezone was specified as -0000. In that case the last element is
|
||||||
|
None. This indicates a UTC timestamp that explicitly declaims knowledge of
|
||||||
|
the source timezone, as opposed to a +0000 timestamp that indicates the
|
||||||
|
source timezone really was UTC.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
data = data.split()
|
||||||
|
# The FWS after the comma after the day-of-week is optional, so search and
|
||||||
|
# adjust for this.
|
||||||
|
if data[0].endswith(',') or data[0].lower() in _daynames:
|
||||||
|
# There's a dayname here. Skip it
|
||||||
|
del data[0]
|
||||||
|
else:
|
||||||
|
i = data[0].rfind(',')
|
||||||
|
if i >= 0:
|
||||||
|
data[0] = data[0][i+1:]
|
||||||
|
if len(data) == 3: # RFC 850 date, deprecated
|
||||||
|
stuff = data[0].split('-')
|
||||||
|
if len(stuff) == 3:
|
||||||
|
data = stuff + data[1:]
|
||||||
|
if len(data) == 4:
|
||||||
|
s = data[3]
|
||||||
|
i = s.find('+')
|
||||||
|
if i == -1:
|
||||||
|
i = s.find('-')
|
||||||
|
if i > 0:
|
||||||
|
data[3:] = [s[:i], s[i:]]
|
||||||
|
else:
|
||||||
|
data.append('') # Dummy tz
|
||||||
|
if len(data) < 5:
|
||||||
|
return None
|
||||||
|
data = data[:5]
|
||||||
|
[dd, mm, yy, tm, tz] = data
|
||||||
|
mm = mm.lower()
|
||||||
|
if mm not in _monthnames:
|
||||||
|
dd, mm = mm, dd.lower()
|
||||||
|
if mm not in _monthnames:
|
||||||
|
return None
|
||||||
|
mm = _monthnames.index(mm) + 1
|
||||||
|
if mm > 12:
|
||||||
|
mm -= 12
|
||||||
|
if dd[-1] == ',':
|
||||||
|
dd = dd[:-1]
|
||||||
|
i = yy.find(':')
|
||||||
|
if i > 0:
|
||||||
|
yy, tm = tm, yy
|
||||||
|
if yy[-1] == ',':
|
||||||
|
yy = yy[:-1]
|
||||||
|
if not yy[0].isdigit():
|
||||||
|
yy, tz = tz, yy
|
||||||
|
if tm[-1] == ',':
|
||||||
|
tm = tm[:-1]
|
||||||
|
tm = tm.split(':')
|
||||||
|
if len(tm) == 2:
|
||||||
|
[thh, tmm] = tm
|
||||||
|
tss = '0'
|
||||||
|
elif len(tm) == 3:
|
||||||
|
[thh, tmm, tss] = tm
|
||||||
|
elif len(tm) == 1 and '.' in tm[0]:
|
||||||
|
# Some non-compliant MUAs use '.' to separate time elements.
|
||||||
|
tm = tm[0].split('.')
|
||||||
|
if len(tm) == 2:
|
||||||
|
[thh, tmm] = tm
|
||||||
|
tss = 0
|
||||||
|
elif len(tm) == 3:
|
||||||
|
[thh, tmm, tss] = tm
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
yy = int(yy)
|
||||||
|
dd = int(dd)
|
||||||
|
thh = int(thh)
|
||||||
|
tmm = int(tmm)
|
||||||
|
tss = int(tss)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
# Check for a yy specified in two-digit format, then convert it to the
|
||||||
|
# appropriate four-digit format, according to the POSIX standard. RFC 822
|
||||||
|
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
||||||
|
# mandates a 4-digit yy. For more information, see the documentation for
|
||||||
|
# the time module.
|
||||||
|
if yy < 100:
|
||||||
|
# The year is between 1969 and 1999 (inclusive).
|
||||||
|
if yy > 68:
|
||||||
|
yy += 1900
|
||||||
|
# The year is between 2000 and 2068 (inclusive).
|
||||||
|
else:
|
||||||
|
yy += 2000
|
||||||
|
tzoffset = None
|
||||||
|
tz = tz.upper()
|
||||||
|
if tz in _timezones:
|
||||||
|
tzoffset = _timezones[tz]
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
tzoffset = int(tz)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if tzoffset==0 and tz.startswith('-'):
|
||||||
|
tzoffset = None
|
||||||
|
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||||||
|
if tzoffset:
|
||||||
|
if tzoffset < 0:
|
||||||
|
tzsign = -1
|
||||||
|
tzoffset = -tzoffset
|
||||||
|
else:
|
||||||
|
tzsign = 1
|
||||||
|
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
||||||
|
# Daylight Saving Time flag is set to -1, since DST is unknown.
|
||||||
|
return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
|
||||||
|
|
||||||
|
|
||||||
|
def parsedate(data):
|
||||||
|
"""Convert a time string to a time tuple."""
|
||||||
|
t = parsedate_tz(data)
|
||||||
|
if isinstance(t, tuple):
|
||||||
|
return t[:9]
|
||||||
|
else:
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
def mktime_tz(data):
|
||||||
|
"""Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
|
||||||
|
if data[9] is None:
|
||||||
|
# No zone info, so localtime is better assumption than GMT
|
||||||
|
return time.mktime(data[:8] + (-1,))
|
||||||
|
else:
|
||||||
|
t = calendar.timegm(data)
|
||||||
|
return t - data[9]
|
||||||
|
|
||||||
|
|
||||||
|
def quote(str):
|
||||||
|
"""Prepare string to be used in a quoted string.
|
||||||
|
|
||||||
|
Turns backslash and double quote characters into quoted pairs. These
|
||||||
|
are the only characters that need to be quoted inside a quoted string.
|
||||||
|
Does not add the surrounding double quotes.
|
||||||
|
"""
|
||||||
|
return str.replace('\\', '\\\\').replace('"', '\\"')
|
||||||
|
|
||||||
|
|
||||||
|
class AddrlistClass:
|
||||||
|
"""Address parser class by Ben Escoto.
|
||||||
|
|
||||||
|
To understand what this class does, it helps to have a copy of RFC 2822 in
|
||||||
|
front of you.
|
||||||
|
|
||||||
|
Note: this class interface is deprecated and may be removed in the future.
|
||||||
|
Use email.utils.AddressList instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, field):
|
||||||
|
"""Initialize a new instance.
|
||||||
|
|
||||||
|
`field' is an unparsed address header field, containing
|
||||||
|
one or more addresses.
|
||||||
|
"""
|
||||||
|
self.specials = '()<>@,:;.\"[]'
|
||||||
|
self.pos = 0
|
||||||
|
self.LWS = ' \t'
|
||||||
|
self.CR = '\r\n'
|
||||||
|
self.FWS = self.LWS + self.CR
|
||||||
|
self.atomends = self.specials + self.LWS + self.CR
|
||||||
|
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
||||||
|
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
||||||
|
# syntax, so allow dots in phrases.
|
||||||
|
self.phraseends = self.atomends.replace('.', '')
|
||||||
|
self.field = field
|
||||||
|
self.commentlist = []
|
||||||
|
|
||||||
|
def gotonext(self):
|
||||||
|
"""Skip white space and extract comments."""
|
||||||
|
wslist = []
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.LWS + '\n\r':
|
||||||
|
if self.field[self.pos] not in '\n\r':
|
||||||
|
wslist.append(self.field[self.pos])
|
||||||
|
self.pos += 1
|
||||||
|
elif self.field[self.pos] == '(':
|
||||||
|
self.commentlist.append(self.getcomment())
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return EMPTYSTRING.join(wslist)
|
||||||
|
|
||||||
|
def getaddrlist(self):
|
||||||
|
"""Parse all addresses.
|
||||||
|
|
||||||
|
Returns a list containing all of the addresses.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
ad = self.getaddress()
|
||||||
|
if ad:
|
||||||
|
result += ad
|
||||||
|
else:
|
||||||
|
result.append(('', ''))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getaddress(self):
|
||||||
|
"""Parse the next address."""
|
||||||
|
self.commentlist = []
|
||||||
|
self.gotonext()
|
||||||
|
|
||||||
|
oldpos = self.pos
|
||||||
|
oldcl = self.commentlist
|
||||||
|
plist = self.getphraselist()
|
||||||
|
|
||||||
|
self.gotonext()
|
||||||
|
returnlist = []
|
||||||
|
|
||||||
|
if self.pos >= len(self.field):
|
||||||
|
# Bad email address technically, no domain.
|
||||||
|
if plist:
|
||||||
|
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||||
|
|
||||||
|
elif self.field[self.pos] in '.@':
|
||||||
|
# email address is just an addrspec
|
||||||
|
# this isn't very efficient since we start over
|
||||||
|
self.pos = oldpos
|
||||||
|
self.commentlist = oldcl
|
||||||
|
addrspec = self.getaddrspec()
|
||||||
|
returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
||||||
|
|
||||||
|
elif self.field[self.pos] == ':':
|
||||||
|
# address is a group
|
||||||
|
returnlist = []
|
||||||
|
|
||||||
|
fieldlen = len(self.field)
|
||||||
|
self.pos += 1
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
self.gotonext()
|
||||||
|
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||||||
|
self.pos += 1
|
||||||
|
break
|
||||||
|
returnlist = returnlist + self.getaddress()
|
||||||
|
|
||||||
|
elif self.field[self.pos] == '<':
|
||||||
|
# Address is a phrase then a route addr
|
||||||
|
routeaddr = self.getrouteaddr()
|
||||||
|
|
||||||
|
if self.commentlist:
|
||||||
|
returnlist = [(SPACE.join(plist) + ' (' +
|
||||||
|
' '.join(self.commentlist) + ')', routeaddr)]
|
||||||
|
else:
|
||||||
|
returnlist = [(SPACE.join(plist), routeaddr)]
|
||||||
|
|
||||||
|
else:
|
||||||
|
if plist:
|
||||||
|
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||||
|
elif self.field[self.pos] in self.specials:
|
||||||
|
self.pos += 1
|
||||||
|
|
||||||
|
self.gotonext()
|
||||||
|
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||||
|
self.pos += 1
|
||||||
|
return returnlist
|
||||||
|
|
||||||
|
def getrouteaddr(self):
|
||||||
|
"""Parse a route address (Return-path value).
|
||||||
|
|
||||||
|
This method just skips all the route stuff and returns the addrspec.
|
||||||
|
"""
|
||||||
|
if self.field[self.pos] != '<':
|
||||||
|
return
|
||||||
|
|
||||||
|
expectroute = False
|
||||||
|
self.pos += 1
|
||||||
|
self.gotonext()
|
||||||
|
adlist = ''
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if expectroute:
|
||||||
|
self.getdomain()
|
||||||
|
expectroute = False
|
||||||
|
elif self.field[self.pos] == '>':
|
||||||
|
self.pos += 1
|
||||||
|
break
|
||||||
|
elif self.field[self.pos] == '@':
|
||||||
|
self.pos += 1
|
||||||
|
expectroute = True
|
||||||
|
elif self.field[self.pos] == ':':
|
||||||
|
self.pos += 1
|
||||||
|
else:
|
||||||
|
adlist = self.getaddrspec()
|
||||||
|
self.pos += 1
|
||||||
|
break
|
||||||
|
self.gotonext()
|
||||||
|
|
||||||
|
return adlist
|
||||||
|
|
||||||
|
def getaddrspec(self):
|
||||||
|
"""Parse an RFC 2822 addr-spec."""
|
||||||
|
aslist = []
|
||||||
|
|
||||||
|
self.gotonext()
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
preserve_ws = True
|
||||||
|
if self.field[self.pos] == '.':
|
||||||
|
if aslist and not aslist[-1].strip():
|
||||||
|
aslist.pop()
|
||||||
|
aslist.append('.')
|
||||||
|
self.pos += 1
|
||||||
|
preserve_ws = False
|
||||||
|
elif self.field[self.pos] == '"':
|
||||||
|
aslist.append('"%s"' % quote(self.getquote()))
|
||||||
|
elif self.field[self.pos] in self.atomends:
|
||||||
|
if aslist and not aslist[-1].strip():
|
||||||
|
aslist.pop()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
aslist.append(self.getatom())
|
||||||
|
ws = self.gotonext()
|
||||||
|
if preserve_ws and ws:
|
||||||
|
aslist.append(ws)
|
||||||
|
|
||||||
|
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||||
|
return EMPTYSTRING.join(aslist)
|
||||||
|
|
||||||
|
aslist.append('@')
|
||||||
|
self.pos += 1
|
||||||
|
self.gotonext()
|
||||||
|
return EMPTYSTRING.join(aslist) + self.getdomain()
|
||||||
|
|
||||||
|
def getdomain(self):
|
||||||
|
"""Get the complete domain name from an address."""
|
||||||
|
sdlist = []
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.LWS:
|
||||||
|
self.pos += 1
|
||||||
|
elif self.field[self.pos] == '(':
|
||||||
|
self.commentlist.append(self.getcomment())
|
||||||
|
elif self.field[self.pos] == '[':
|
||||||
|
sdlist.append(self.getdomainliteral())
|
||||||
|
elif self.field[self.pos] == '.':
|
||||||
|
self.pos += 1
|
||||||
|
sdlist.append('.')
|
||||||
|
elif self.field[self.pos] in self.atomends:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
sdlist.append(self.getatom())
|
||||||
|
return EMPTYSTRING.join(sdlist)
|
||||||
|
|
||||||
|
def getdelimited(self, beginchar, endchars, allowcomments=True):
|
||||||
|
"""Parse a header fragment delimited by special characters.
|
||||||
|
|
||||||
|
`beginchar' is the start character for the fragment.
|
||||||
|
If self is not looking at an instance of `beginchar' then
|
||||||
|
getdelimited returns the empty string.
|
||||||
|
|
||||||
|
`endchars' is a sequence of allowable end-delimiting characters.
|
||||||
|
Parsing stops when one of these is encountered.
|
||||||
|
|
||||||
|
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
||||||
|
within the parsed fragment.
|
||||||
|
"""
|
||||||
|
if self.field[self.pos] != beginchar:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
slist = ['']
|
||||||
|
quote = False
|
||||||
|
self.pos += 1
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if quote:
|
||||||
|
slist.append(self.field[self.pos])
|
||||||
|
quote = False
|
||||||
|
elif self.field[self.pos] in endchars:
|
||||||
|
self.pos += 1
|
||||||
|
break
|
||||||
|
elif allowcomments and self.field[self.pos] == '(':
|
||||||
|
slist.append(self.getcomment())
|
||||||
|
continue # have already advanced pos from getcomment
|
||||||
|
elif self.field[self.pos] == '\\':
|
||||||
|
quote = True
|
||||||
|
else:
|
||||||
|
slist.append(self.field[self.pos])
|
||||||
|
self.pos += 1
|
||||||
|
|
||||||
|
return EMPTYSTRING.join(slist)
|
||||||
|
|
||||||
|
def getquote(self):
|
||||||
|
"""Get a quote-delimited fragment from self's field."""
|
||||||
|
return self.getdelimited('"', '"\r', False)
|
||||||
|
|
||||||
|
def getcomment(self):
|
||||||
|
"""Get a parenthesis-delimited fragment from self's field."""
|
||||||
|
return self.getdelimited('(', ')\r', True)
|
||||||
|
|
||||||
|
def getdomainliteral(self):
|
||||||
|
"""Parse an RFC 2822 domain-literal."""
|
||||||
|
return '[%s]' % self.getdelimited('[', ']\r', False)
|
||||||
|
|
||||||
|
def getatom(self, atomends=None):
|
||||||
|
"""Parse an RFC 2822 atom.
|
||||||
|
|
||||||
|
Optional atomends specifies a different set of end token delimiters
|
||||||
|
(the default is to use self.atomends). This is used e.g. in
|
||||||
|
getphraselist() since phrase endings must not include the `.' (which
|
||||||
|
is legal in phrases)."""
|
||||||
|
atomlist = ['']
|
||||||
|
if atomends is None:
|
||||||
|
atomends = self.atomends
|
||||||
|
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in atomends:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
atomlist.append(self.field[self.pos])
|
||||||
|
self.pos += 1
|
||||||
|
|
||||||
|
return EMPTYSTRING.join(atomlist)
|
||||||
|
|
||||||
|
def getphraselist(self):
|
||||||
|
"""Parse a sequence of RFC 2822 phrases.
|
||||||
|
|
||||||
|
A phrase is a sequence of words, which are in turn either RFC 2822
|
||||||
|
atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
||||||
|
runs of continuous whitespace into one space.
|
||||||
|
"""
|
||||||
|
plist = []
|
||||||
|
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.FWS:
|
||||||
|
self.pos += 1
|
||||||
|
elif self.field[self.pos] == '"':
|
||||||
|
plist.append(self.getquote())
|
||||||
|
elif self.field[self.pos] == '(':
|
||||||
|
self.commentlist.append(self.getcomment())
|
||||||
|
elif self.field[self.pos] in self.phraseends:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
plist.append(self.getatom(self.phraseends))
|
||||||
|
|
||||||
|
return plist
|
||||||
|
|
||||||
|
class AddressList(AddrlistClass):
|
||||||
|
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
||||||
|
def __init__(self, field):
|
||||||
|
AddrlistClass.__init__(self, field)
|
||||||
|
if field:
|
||||||
|
self.addresslist = self.getaddrlist()
|
||||||
|
else:
|
||||||
|
self.addresslist = []
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.addresslist)
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
# Set union
|
||||||
|
newaddr = AddressList(None)
|
||||||
|
newaddr.addresslist = self.addresslist[:]
|
||||||
|
for x in other.addresslist:
|
||||||
|
if not x in self.addresslist:
|
||||||
|
newaddr.addresslist.append(x)
|
||||||
|
return newaddr
|
||||||
|
|
||||||
|
def __iadd__(self, other):
|
||||||
|
# Set union, in-place
|
||||||
|
for x in other.addresslist:
|
||||||
|
if not x in self.addresslist:
|
||||||
|
self.addresslist.append(x)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __sub__(self, other):
|
||||||
|
# Set difference
|
||||||
|
newaddr = AddressList(None)
|
||||||
|
for x in self.addresslist:
|
||||||
|
if not x in other.addresslist:
|
||||||
|
newaddr.addresslist.append(x)
|
||||||
|
return newaddr
|
||||||
|
|
||||||
|
def __isub__(self, other):
|
||||||
|
# Set difference, in-place
|
||||||
|
for x in other.addresslist:
|
||||||
|
if x in self.addresslist:
|
||||||
|
self.addresslist.remove(x)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
# Make indexing, slices, and 'in' work
|
||||||
|
return self.addresslist[index]
|
|
@ -0,0 +1,358 @@
|
||||||
|
"""Policy framework for the email package.
|
||||||
|
|
||||||
|
Allows fine grained feature control of how the package parses and emits data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import abc
|
||||||
|
from email import header
|
||||||
|
from email import charset as _charset
|
||||||
|
from email.utils import _has_surrogates
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'Policy',
|
||||||
|
'Compat32',
|
||||||
|
'compat32',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class _PolicyBase:
|
||||||
|
|
||||||
|
"""Policy Object basic framework.
|
||||||
|
|
||||||
|
This class is useless unless subclassed. A subclass should define
|
||||||
|
class attributes with defaults for any values that are to be
|
||||||
|
managed by the Policy object. The constructor will then allow
|
||||||
|
non-default values to be set for these attributes at instance
|
||||||
|
creation time. The instance will be callable, taking these same
|
||||||
|
attributes keyword arguments, and returning a new instance
|
||||||
|
identical to the called instance except for those values changed
|
||||||
|
by the keyword arguments. Instances may be added, yielding new
|
||||||
|
instances with any non-default values from the right hand
|
||||||
|
operand overriding those in the left hand operand. That is,
|
||||||
|
|
||||||
|
A + B == A(<non-default values of B>)
|
||||||
|
|
||||||
|
The repr of an instance can be used to reconstruct the object
|
||||||
|
if and only if the repr of the values can be used to reconstruct
|
||||||
|
those values.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kw):
|
||||||
|
"""Create new Policy, possibly overriding some defaults.
|
||||||
|
|
||||||
|
See class docstring for a list of overridable attributes.
|
||||||
|
|
||||||
|
"""
|
||||||
|
for name, value in kw.items():
|
||||||
|
if hasattr(self, name):
|
||||||
|
super(_PolicyBase,self).__setattr__(name, value)
|
||||||
|
else:
|
||||||
|
raise TypeError(
|
||||||
|
"{!r} is an invalid keyword argument for {}".format(
|
||||||
|
name, self.__class__.__name__))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
args = [ "{}={!r}".format(name, value)
|
||||||
|
for name, value in self.__dict__.items() ]
|
||||||
|
return "{}({})".format(self.__class__.__name__, ', '.join(args))
|
||||||
|
|
||||||
|
def clone(self, **kw):
|
||||||
|
"""Return a new instance with specified attributes changed.
|
||||||
|
|
||||||
|
The new instance has the same attribute values as the current object,
|
||||||
|
except for the changes passed in as keyword arguments.
|
||||||
|
|
||||||
|
"""
|
||||||
|
newpolicy = self.__class__.__new__(self.__class__)
|
||||||
|
for attr, value in self.__dict__.items():
|
||||||
|
object.__setattr__(newpolicy, attr, value)
|
||||||
|
for attr, value in kw.items():
|
||||||
|
if not hasattr(self, attr):
|
||||||
|
raise TypeError(
|
||||||
|
"{!r} is an invalid keyword argument for {}".format(
|
||||||
|
attr, self.__class__.__name__))
|
||||||
|
object.__setattr__(newpolicy, attr, value)
|
||||||
|
return newpolicy
|
||||||
|
|
||||||
|
def __setattr__(self, name, value):
|
||||||
|
if hasattr(self, name):
|
||||||
|
msg = "{!r} object attribute {!r} is read-only"
|
||||||
|
else:
|
||||||
|
msg = "{!r} object has no attribute {!r}"
|
||||||
|
raise AttributeError(msg.format(self.__class__.__name__, name))
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
"""Non-default values from right operand override those from left.
|
||||||
|
|
||||||
|
The object returned is a new instance of the subclass.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return self.clone(**other.__dict__)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_doc(doc, added_doc):
|
||||||
|
doc = doc.rsplit('\n', 1)[0]
|
||||||
|
added_doc = added_doc.split('\n', 1)[1]
|
||||||
|
return doc + '\n' + added_doc
|
||||||
|
|
||||||
|
def _extend_docstrings(cls):
|
||||||
|
if cls.__doc__ and cls.__doc__.startswith('+'):
|
||||||
|
cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
|
||||||
|
for name, attr in cls.__dict__.items():
|
||||||
|
if attr.__doc__ and attr.__doc__.startswith('+'):
|
||||||
|
for c in (c for base in cls.__bases__ for c in base.mro()):
|
||||||
|
doc = getattr(getattr(c, name), '__doc__')
|
||||||
|
if doc:
|
||||||
|
attr.__doc__ = _append_doc(doc, attr.__doc__)
|
||||||
|
break
|
||||||
|
return cls
|
||||||
|
|
||||||
|
|
||||||
|
class Policy(_PolicyBase, metaclass=abc.ABCMeta):
|
||||||
|
|
||||||
|
r"""Controls for how messages are interpreted and formatted.
|
||||||
|
|
||||||
|
Most of the classes and many of the methods in the email package accept
|
||||||
|
Policy objects as parameters. A Policy object contains a set of values and
|
||||||
|
functions that control how input is interpreted and how output is rendered.
|
||||||
|
For example, the parameter 'raise_on_defect' controls whether or not an RFC
|
||||||
|
violation results in an error being raised or not, while 'max_line_length'
|
||||||
|
controls the maximum length of output lines when a Message is serialized.
|
||||||
|
|
||||||
|
Any valid attribute may be overridden when a Policy is created by passing
|
||||||
|
it as a keyword argument to the constructor. Policy objects are immutable,
|
||||||
|
but a new Policy object can be created with only certain values changed by
|
||||||
|
calling the Policy instance with keyword arguments. Policy objects can
|
||||||
|
also be added, producing a new Policy object in which the non-default
|
||||||
|
attributes set in the right hand operand overwrite those specified in the
|
||||||
|
left operand.
|
||||||
|
|
||||||
|
Settable attributes:
|
||||||
|
|
||||||
|
raise_on_defect -- If true, then defects should be raised as errors.
|
||||||
|
Default: False.
|
||||||
|
|
||||||
|
linesep -- string containing the value to use as separation
|
||||||
|
between output lines. Default '\n'.
|
||||||
|
|
||||||
|
cte_type -- Type of allowed content transfer encodings
|
||||||
|
|
||||||
|
7bit -- ASCII only
|
||||||
|
8bit -- Content-Transfer-Encoding: 8bit is allowed
|
||||||
|
|
||||||
|
Default: 8bit. Also controls the disposition of
|
||||||
|
(RFC invalid) binary data in headers; see the
|
||||||
|
documentation of the binary_fold method.
|
||||||
|
|
||||||
|
max_line_length -- maximum length of lines, excluding 'linesep',
|
||||||
|
during serialization. None or 0 means no line
|
||||||
|
wrapping is done. Default is 78.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
raise_on_defect = False
|
||||||
|
linesep = '\n'
|
||||||
|
cte_type = '8bit'
|
||||||
|
max_line_length = 78
|
||||||
|
|
||||||
|
def handle_defect(self, obj, defect):
|
||||||
|
"""Based on policy, either raise defect or call register_defect.
|
||||||
|
|
||||||
|
handle_defect(obj, defect)
|
||||||
|
|
||||||
|
defect should be a Defect subclass, but in any case must be an
|
||||||
|
Exception subclass. obj is the object on which the defect should be
|
||||||
|
registered if it is not raised. If the raise_on_defect is True, the
|
||||||
|
defect is raised as an error, otherwise the object and the defect are
|
||||||
|
passed to register_defect.
|
||||||
|
|
||||||
|
This method is intended to be called by parsers that discover defects.
|
||||||
|
The email package parsers always call it with Defect instances.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if self.raise_on_defect:
|
||||||
|
raise defect
|
||||||
|
self.register_defect(obj, defect)
|
||||||
|
|
||||||
|
def register_defect(self, obj, defect):
|
||||||
|
"""Record 'defect' on 'obj'.
|
||||||
|
|
||||||
|
Called by handle_defect if raise_on_defect is False. This method is
|
||||||
|
part of the Policy API so that Policy subclasses can implement custom
|
||||||
|
defect handling. The default implementation calls the append method of
|
||||||
|
the defects attribute of obj. The objects used by the email package by
|
||||||
|
default that get passed to this method will always have a defects
|
||||||
|
attribute with an append method.
|
||||||
|
|
||||||
|
"""
|
||||||
|
obj.defects.append(defect)
|
||||||
|
|
||||||
|
def header_max_count(self, name):
|
||||||
|
"""Return the maximum allowed number of headers named 'name'.
|
||||||
|
|
||||||
|
Called when a header is added to a Message object. If the returned
|
||||||
|
value is not 0 or None, and there are already a number of headers with
|
||||||
|
the name 'name' equal to the value returned, a ValueError is raised.
|
||||||
|
|
||||||
|
Because the default behavior of Message's __setitem__ is to append the
|
||||||
|
value to the list of headers, it is easy to create duplicate headers
|
||||||
|
without realizing it. This method allows certain headers to be limited
|
||||||
|
in the number of instances of that header that may be added to a
|
||||||
|
Message programmatically. (The limit is not observed by the parser,
|
||||||
|
which will faithfully produce as many headers as exist in the message
|
||||||
|
being parsed.)
|
||||||
|
|
||||||
|
The default implementation returns None for all header names.
|
||||||
|
"""
|
||||||
|
return None
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def header_source_parse(self, sourcelines):
|
||||||
|
"""Given a list of linesep terminated strings constituting the lines of
|
||||||
|
a single header, return the (name, value) tuple that should be stored
|
||||||
|
in the model. The input lines should retain their terminating linesep
|
||||||
|
characters. The lines passed in by the email package may contain
|
||||||
|
surrogateescaped binary data.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def header_store_parse(self, name, value):
|
||||||
|
"""Given the header name and the value provided by the application
|
||||||
|
program, return the (name, value) that should be stored in the model.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def header_fetch_parse(self, name, value):
|
||||||
|
"""Given the header name and the value from the model, return the value
|
||||||
|
to be returned to the application program that is requesting that
|
||||||
|
header. The value passed in by the email package may contain
|
||||||
|
surrogateescaped binary data if the lines were parsed by a BytesParser.
|
||||||
|
The returned value should not contain any surrogateescaped data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def fold(self, name, value):
|
||||||
|
"""Given the header name and the value from the model, return a string
|
||||||
|
containing linesep characters that implement the folding of the header
|
||||||
|
according to the policy controls. The value passed in by the email
|
||||||
|
package may contain surrogateescaped binary data if the lines were
|
||||||
|
parsed by a BytesParser. The returned value should not contain any
|
||||||
|
surrogateescaped data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def fold_binary(self, name, value):
|
||||||
|
"""Given the header name and the value from the model, return binary
|
||||||
|
data containing linesep characters that implement the folding of the
|
||||||
|
header according to the policy controls. The value passed in by the
|
||||||
|
email package may contain surrogateescaped binary data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
@_extend_docstrings
|
||||||
|
class Compat32(Policy):
|
||||||
|
|
||||||
|
"""+
|
||||||
|
This particular policy is the backward compatibility Policy. It
|
||||||
|
replicates the behavior of the email package version 5.1.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _sanitize_header(self, name, value):
|
||||||
|
# If the header value contains surrogates, return a Header using
|
||||||
|
# the unknown-8bit charset to encode the bytes as encoded words.
|
||||||
|
if not isinstance(value, str):
|
||||||
|
# Assume it is already a header object
|
||||||
|
return value
|
||||||
|
if _has_surrogates(value):
|
||||||
|
return header.Header(value, charset=_charset.UNKNOWN8BIT,
|
||||||
|
header_name=name)
|
||||||
|
else:
|
||||||
|
return value
|
||||||
|
|
||||||
|
def header_source_parse(self, sourcelines):
|
||||||
|
"""+
|
||||||
|
The name is parsed as everything up to the ':' and returned unmodified.
|
||||||
|
The value is determined by stripping leading whitespace off the
|
||||||
|
remainder of the first line, joining all subsequent lines together, and
|
||||||
|
stripping any trailing carriage return or linefeed characters.
|
||||||
|
|
||||||
|
"""
|
||||||
|
name, value = sourcelines[0].split(':', 1)
|
||||||
|
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
|
||||||
|
return (name, value.rstrip('\r\n'))
|
||||||
|
|
||||||
|
def header_store_parse(self, name, value):
|
||||||
|
"""+
|
||||||
|
The name and value are returned unmodified.
|
||||||
|
"""
|
||||||
|
return (name, value)
|
||||||
|
|
||||||
|
def header_fetch_parse(self, name, value):
|
||||||
|
"""+
|
||||||
|
If the value contains binary data, it is converted into a Header object
|
||||||
|
using the unknown-8bit charset. Otherwise it is returned unmodified.
|
||||||
|
"""
|
||||||
|
return self._sanitize_header(name, value)
|
||||||
|
|
||||||
|
def fold(self, name, value):
|
||||||
|
"""+
|
||||||
|
Headers are folded using the Header folding algorithm, which preserves
|
||||||
|
existing line breaks in the value, and wraps each resulting line to the
|
||||||
|
max_line_length. Non-ASCII binary data are CTE encoded using the
|
||||||
|
unknown-8bit charset.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return self._fold(name, value, sanitize=True)
|
||||||
|
|
||||||
|
def fold_binary(self, name, value):
|
||||||
|
"""+
|
||||||
|
Headers are folded using the Header folding algorithm, which preserves
|
||||||
|
existing line breaks in the value, and wraps each resulting line to the
|
||||||
|
max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
|
||||||
|
encoded using the unknown-8bit charset. Otherwise the original source
|
||||||
|
header is used, with its existing line breaks and/or binary data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
|
||||||
|
return folded.encode('ascii', 'surrogateescape')
|
||||||
|
|
||||||
|
def _fold(self, name, value, sanitize):
|
||||||
|
parts = []
|
||||||
|
parts.append('%s: ' % name)
|
||||||
|
if isinstance(value, str):
|
||||||
|
if _has_surrogates(value):
|
||||||
|
if sanitize:
|
||||||
|
h = header.Header(value,
|
||||||
|
charset=_charset.UNKNOWN8BIT,
|
||||||
|
header_name=name)
|
||||||
|
else:
|
||||||
|
# If we have raw 8bit data in a byte string, we have no idea
|
||||||
|
# what the encoding is. There is no safe way to split this
|
||||||
|
# string. If it's ascii-subset, then we could do a normal
|
||||||
|
# ascii split, but if it's multibyte then we could break the
|
||||||
|
# string. There's no way to know so the least harm seems to
|
||||||
|
# be to not split the string and risk it being too long.
|
||||||
|
parts.append(value)
|
||||||
|
h = None
|
||||||
|
else:
|
||||||
|
h = header.Header(value, header_name=name)
|
||||||
|
else:
|
||||||
|
# Assume it is a Header-like object.
|
||||||
|
h = value
|
||||||
|
if h is not None:
|
||||||
|
parts.append(h.encode(linesep=self.linesep,
|
||||||
|
maxlinelen=self.max_line_length))
|
||||||
|
parts.append(self.linesep)
|
||||||
|
return ''.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
compat32 = Compat32()
|
Ładowanie…
Reference in New Issue