[Python-checkins] python/dist/src/Lib/email _parseaddr.py,NONE,1.5.2.1 Charset.py,1.7.2.3,1.7.2.4 Generator.py,1.6.10.3,1.6.10.4 Header.py,1.13.2.2,1.13.2.3 MIMEText.py,1.3.10.1,1.3.10.2 Message.py,1.9.6.2,1.9.6.3 Parser.py,1.5.10.3,1.5.10.4 Utils.py,1.9.6.1,1.9.6.2 init.py,1.4.10.4,1.4.10.5 _compat21.py,1.4.2.1,1.4.2.2 _compat22.py,1.4.2.1,1.4.2.2 base64MIME.py,1.5.2.1,1.5.2.2 quopriMIME.py,1.4.2.1,1.4.2.2

2003年3月21日 13:09:34 -0800

Update of /cvsroot/python/python/dist/src/Lib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv27730/Lib/email
Modified Files:
 Tag: release22-maint
	Charset.py Generator.py Header.py MIMEText.py Message.py 
	Parser.py Utils.py __init__.py _compat21.py _compat22.py 
	base64MIME.py quopriMIME.py 
Added Files:
 Tag: release22-maint
	_parseaddr.py 
Log Message:
Backporting email 2.5 to Python 2.2 maint branch.
--- NEW FILE: _parseaddr.py ---
# Copyright (C) 2002 Python Software Foundation
"""Email address parsing code.
Lifted directly from rfc822.py. This should eventually be rewritten.
"""
import time
from types import TupleType
try:
 True, False
except NameError:
 True = 1
 False = 0
SPACE = ' '
EMPTYSTRING = ''
COMMASPACE = ', '
# Parse a date field
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
 'aug', 'sep', 'oct', 'nov', 'dec',
 'january', 'february', 'march', 'april', 'may', 'june', 'july',
 'august', 'september', 'october', 'november', 'december']
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
# The timezone table does not include the military time zones defined
# in RFC822, other than Z. According to RFC1123, the description in
# RFC822 gets the signs wrong, so we can't rely on any such time
# zones. RFC1123 recommends that numeric timezone indicators be used
# instead of timezone names.
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
 'EST': -500, 'EDT': -400, # Eastern
 'CST': -600, 'CDT': -500, # Central
 'MST': -700, 'MDT': -600, # Mountain
 'PST': -800, 'PDT': -700 # Pacific
 }
def parsedate_tz(data):
 """Convert a date string to a time tuple.
 Accounts for military timezones.
 """
 data = data.split()
 # The FWS after the comma after the day-of-week is optional, so search and
 # adjust for this.
 if data[0].endswith(',') or data[0].lower() in _daynames:
 # There's a dayname here. Skip it
 del data[0]
 else:
 i = data[0].rfind(',')
 if i < 0:
 return None
 data[0] = data[0][i+1:]
 if len(data) == 3: # RFC 850 date, deprecated
 stuff = data[0].split('-')
 if len(stuff) == 3:
 data = stuff + data[1:]
 if len(data) == 4:
 s = data[3]
 i = s.find('+')
 if i > 0:
 data[3:] = [s[:i], s[i+1:]]
 else:
 data.append('') # Dummy tz
 if len(data) < 5:
 return None
 data = data[:5]
 [dd, mm, yy, tm, tz] = data
 mm = mm.lower()
 if mm not in _monthnames:
 dd, mm = mm, dd.lower()
 if mm not in _monthnames:
 return None
 mm = _monthnames.index(mm) + 1
 if mm > 12:
 mm -= 12
 if dd[-1] == ',':
 dd = dd[:-1]
 i = yy.find(':')
 if i > 0:
 yy, tm = tm, yy
 if yy[-1] == ',':
 yy = yy[:-1]
 if not yy[0].isdigit():
 yy, tz = tz, yy
 if tm[-1] == ',':
 tm = tm[:-1]
 tm = tm.split(':')
 if len(tm) == 2:
 [thh, tmm] = tm
 tss = '0'
 elif len(tm) == 3:
 [thh, tmm, tss] = tm
 else:
 return None
 try:
 yy = int(yy)
 dd = int(dd)
 thh = int(thh)
 tmm = int(tmm)
 tss = int(tss)
 except ValueError:
 return None
 tzoffset = None
 tz = tz.upper()
 if _timezones.has_key(tz):
 tzoffset = _timezones[tz]
 else:
 try:
 tzoffset = int(tz)
 except ValueError:
 pass
 # Convert a timezone offset into seconds ; -0500 -> -18000
 if tzoffset:
 if tzoffset < 0:
 tzsign = -1
 tzoffset = -tzoffset
 else:
 tzsign = 1
 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
 return tuple
def parsedate(data):
 """Convert a time string to a time tuple."""
 t = parsedate_tz(data)
 if isinstance(t, TupleType):
 return t[:9]
 else:
 return t
def mktime_tz(data):
 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
 if data[9] is None:
 # No zone info, so localtime is better assumption than GMT
 return time.mktime(data[:8] + (-1,))
 else:
 t = time.mktime(data[:8] + (0,))
 return t - data[9] - time.timezone
def quote(str):
 """Add quotes around a string."""
 return str.replace('\\', '\\\\').replace('"', '\\"')
class AddrlistClass:
 """Address parser class by Ben Escoto.
 To understand what this class does, it helps to have a copy of RFC 2822 in
 front of you.
 Note: this class interface is deprecated and may be removed in the future.
 Use rfc822.AddressList instead.
 """
 def __init__(self, field):
 """Initialize a new instance.
 `field' is an unparsed address header field, containing
 one or more addresses.
 """
 self.specials = '()<>@,:;.\"[]'
 self.pos = 0
 self.LWS = ' \t'
 self.CR = '\r\n'
 self.atomends = self.specials + self.LWS + self.CR
 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
 # syntax, so allow dots in phrases.
 self.phraseends = self.atomends.replace('.', '')
 self.field = field
 self.commentlist = []
 def gotonext(self):
 """Parse up to the start of the next address."""
 while self.pos < len(self.field):
 if self.field[self.pos] in self.LWS + '\n\r':
 self.pos += 1
 elif self.field[self.pos] == '(':
 self.commentlist.append(self.getcomment())
 else:
 break
 def getaddrlist(self):
 """Parse all addresses.
 Returns a list containing all of the addresses.
 """
 result = []
 while self.pos < len(self.field):
 ad = self.getaddress()
 if ad:
 result += ad
 else:
 result.append(('', ''))
 return result
 def getaddress(self):
 """Parse the next address."""
 self.commentlist = []
 self.gotonext()
 oldpos = self.pos
 oldcl = self.commentlist
 plist = self.getphraselist()
 self.gotonext()
 returnlist = []
 if self.pos >= len(self.field):
 # Bad email address technically, no domain.
 if plist:
 returnlist = [(SPACE.join(self.commentlist), plist[0])]
 elif self.field[self.pos] in '.@':
 # email address is just an addrspec
 # this isn't very efficient since we start over
 self.pos = oldpos
 self.commentlist = oldcl
 addrspec = self.getaddrspec()
 returnlist = [(SPACE.join(self.commentlist), addrspec)]
 elif self.field[self.pos] == ':':
 # address is a group
 returnlist = []
 fieldlen = len(self.field)
 self.pos += 1
 while self.pos < len(self.field):
 self.gotonext()
 if self.pos < fieldlen and self.field[self.pos] == ';':
 self.pos += 1
 break
 returnlist = returnlist + self.getaddress()
 elif self.field[self.pos] == '<':
 # Address is a phrase then a route addr
 routeaddr = self.getrouteaddr()
 if self.commentlist:
 returnlist = [(SPACE.join(plist) + ' (' +
 ' '.join(self.commentlist) + ')', routeaddr)]
 else:
 returnlist = [(SPACE.join(plist), routeaddr)]
 else:
 if plist:
 returnlist = [(SPACE.join(self.commentlist), plist[0])]
 elif self.field[self.pos] in self.specials:
 self.pos += 1
 self.gotonext()
 if self.pos < len(self.field) and self.field[self.pos] == ',':
 self.pos += 1
 return returnlist
 def getrouteaddr(self):
 """Parse a route address (Return-path value).
 This method just skips all the route stuff and returns the addrspec.
 """
 if self.field[self.pos] != '<':
 return
 expectroute = False
 self.pos += 1
 self.gotonext()
 adlist = ''
 while self.pos < len(self.field):
 if expectroute:
 self.getdomain()
 expectroute = False
 elif self.field[self.pos] == '>':
 self.pos += 1
 break
 elif self.field[self.pos] == '@':
 self.pos += 1
 expectroute = True
 elif self.field[self.pos] == ':':
 self.pos += 1
 else:
 adlist = self.getaddrspec()
 self.pos += 1
 break
 self.gotonext()
 return adlist
 def getaddrspec(self):
 """Parse an RFC 2822 addr-spec."""
 aslist = []
 self.gotonext()
 while self.pos < len(self.field):
 if self.field[self.pos] == '.':
 aslist.append('.')
 self.pos += 1
 elif self.field[self.pos] == '"':
 aslist.append('"%s"' % self.getquote())
 elif self.field[self.pos] in self.atomends:
 break
 else:
 aslist.append(self.getatom())
 self.gotonext()
 if self.pos >= len(self.field) or self.field[self.pos] != '@':
 return EMPTYSTRING.join(aslist)
 aslist.append('@')
 self.pos += 1
 self.gotonext()
 return EMPTYSTRING.join(aslist) + self.getdomain()
 def getdomain(self):
 """Get the complete domain name from an address."""
 sdlist = []
 while self.pos < len(self.field):
 if self.field[self.pos] in self.LWS:
 self.pos += 1
 elif self.field[self.pos] == '(':
 self.commentlist.append(self.getcomment())
 elif self.field[self.pos] == '[':
 sdlist.append(self.getdomainliteral())
 elif self.field[self.pos] == '.':
 self.pos += 1
 sdlist.append('.')
 elif self.field[self.pos] in self.atomends:
 break
 else:
 sdlist.append(self.getatom())
 return EMPTYSTRING.join(sdlist)
 def getdelimited(self, beginchar, endchars, allowcomments=True):
 """Parse a header fragment delimited by special characters.
 `beginchar' is the start character for the fragment.
 If self is not looking at an instance of `beginchar' then
 getdelimited returns the empty string.
 `endchars' is a sequence of allowable end-delimiting characters.
 Parsing stops when one of these is encountered.
 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
 within the parsed fragment.
 """
 if self.field[self.pos] != beginchar:
 return ''
 slist = ['']
 quote = False
 self.pos += 1
 while self.pos < len(self.field):
 if quote:
 slist.append(self.field[self.pos])
 quote = False
 elif self.field[self.pos] in endchars:
 self.pos += 1
 break
 elif allowcomments and self.field[self.pos] == '(':
 slist.append(self.getcomment())
 elif self.field[self.pos] == '\\':
 quote = True
 else:
 slist.append(self.field[self.pos])
 self.pos += 1
 return EMPTYSTRING.join(slist)
 def getquote(self):
 """Get a quote-delimited fragment from self's field."""
 return self.getdelimited('"', '"\r', False)
 def getcomment(self):
 """Get a parenthesis-delimited fragment from self's field."""
 return self.getdelimited('(', ')\r', True)
 def getdomainliteral(self):
 """Parse an RFC 2822 domain-literal."""
 return '[%s]' % self.getdelimited('[', ']\r', False)
 def getatom(self, atomends=None):
 """Parse an RFC 2822 atom.
 Optional atomends specifies a different set of end token delimiters
 (the default is to use self.atomends). This is used e.g. in
 getphraselist() since phrase endings must not include the `.' (which
 is legal in phrases)."""
 atomlist = ['']
 if atomends is None:
 atomends = self.atomends
 while self.pos < len(self.field):
 if self.field[self.pos] in atomends:
 break
 else:
 atomlist.append(self.field[self.pos])
 self.pos += 1
 return EMPTYSTRING.join(atomlist)
 def getphraselist(self):
 """Parse a sequence of RFC 2822 phrases.
 A phrase is a sequence of words, which are in turn either RFC 2822
 atoms or quoted-strings. Phrases are canonicalized by squeezing all
 runs of continuous whitespace into one space.
 """
 plist = []
 while self.pos < len(self.field):
 if self.field[self.pos] in self.LWS:
 self.pos += 1
 elif self.field[self.pos] == '"':
 plist.append(self.getquote())
 elif self.field[self.pos] == '(':
 self.commentlist.append(self.getcomment())
 elif self.field[self.pos] in self.phraseends:
 break
 else:
 plist.append(self.getatom(self.phraseends))
 return plist
class AddressList(AddrlistClass):
 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
 def __init__(self, field):
 AddrlistClass.__init__(self, field)
 if field:
 self.addresslist = self.getaddrlist()
 else:
 self.addresslist = []
 def __len__(self):
 return len(self.addresslist)
 def __str__(self):
 return COMMASPACE.join(map(dump_address_pair, self.addresslist))
 def __add__(self, other):
 # Set union
 newaddr = AddressList(None)
 newaddr.addresslist = self.addresslist[:]
 for x in other.addresslist:
 if not x in self.addresslist:
 newaddr.addresslist.append(x)
 return newaddr
 def __iadd__(self, other):
 # Set union, in-place
 for x in other.addresslist:
 if not x in self.addresslist:
 self.addresslist.append(x)
 return self
 def __sub__(self, other):
 # Set difference
 newaddr = AddressList(None)
 for x in self.addresslist:
 if not x in other.addresslist:
 newaddr.addresslist.append(x)
 return newaddr
 def __isub__(self, other):
 # Set difference, in-place
 for x in other.addresslist:
 if x in self.addresslist:
 self.addresslist.remove(x)
 return self
 def __getitem__(self, index):
 # Make indexing, slices, and 'in' work
 return self.addresslist[index]
Index: Charset.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Charset.py,v
retrieving revision 1.7.2.3
retrieving revision 1.7.2.4
diff -C2 -d -r1.7.2.3 -r1.7.2.4
*** Charset.py	14 Oct 2002 17:26:00 -0000	1.7.2.3
--- Charset.py	21 Mar 2003 21:09:31 -0000	1.7.2.4
***************
*** 36,39 ****
--- 36,53 ----
 'iso-8859-1': (QP, QP, None),
 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
 'us-ascii': (None, None, None),
 'big5': (BASE64, BASE64, None),
***************
*** 53,56 ****
--- 67,89 ----
 'latin_1': 'iso-8859-1',
 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
 'ascii': 'us-ascii',
 }
***************
*** 70,73 ****
--- 103,110 ----
 'iso-2022-jp': 'japanese.iso-2022-jp',
 'shift_jis': 'japanese.shift_jis',
+ 'euc-kr': 'korean.euc-kr',
+ 'ks_c_5601-1987': 'korean.cp949',
+ 'iso-2022-kr': 'korean.iso-2022-kr',
+ 'johab': 'korean.johab',
 'gb2132': 'eucgb2312_cn',
 'big5': 'big5_tw',
***************
*** 198,201 ****
--- 235,240 ----
 return self.input_charset.lower()

+ __repr__ = __str__
+ 
 def __eq__(self, other):
 return str(self) == str(other).lower()
***************
*** 322,326 ****
 return email.base64MIME.header_encode(s, cset)
 elif self.header_encoding == QP:
! return email.quopriMIME.header_encode(s, cset)
 elif self.header_encoding == SHORTEST:
 lenb64 = email.base64MIME.base64_len(s)
--- 361,365 ----
 return email.base64MIME.header_encode(s, cset)
 elif self.header_encoding == QP:
! return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
 elif self.header_encoding == SHORTEST:
 lenb64 = email.base64MIME.base64_len(s)
***************
*** 329,333 ****
 return email.base64MIME.header_encode(s, cset)
 else:
! return email.quopriMIME.header_encode(s, cset)
 else:
 return s
--- 368,372 ----
 return email.base64MIME.header_encode(s, cset)
 else:
! return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
 else:
 return s
***************
*** 349,353 ****
 if self.body_encoding is BASE64:
 return email.base64MIME.body_encode(s)
! elif self.header_encoding is QP:
 return email.quopriMIME.body_encode(s)
 else:
--- 388,392 ----
 if self.body_encoding is BASE64:
 return email.base64MIME.body_encode(s)
! elif self.body_encoding is QP:
 return email.quopriMIME.body_encode(s)
 else:
Index: Generator.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Generator.py,v
retrieving revision 1.6.10.3
retrieving revision 1.6.10.4
diff -C2 -d -r1.6.10.3 -r1.6.10.4
*** Generator.py	14 Oct 2002 17:26:01 -0000	1.6.10.3
--- Generator.py	21 Mar 2003 21:09:31 -0000	1.6.10.4
***************
*** 5,10 ****
 """

- import time
 import re
 import random

--- 5,11 ----
 """

 import re
+ import time
+ import locale
 import random

***************
*** 13,16 ****
--- 14,18 ----

 from email.Header import Header
+ from email.Parser import NLCRE

 try:
***************
*** 160,201 ****
 def _write_headers(self, msg):
 for h, v in msg.items():
! # RFC 2822 says that lines SHOULD be no more than maxheaderlen
! # characters wide, so we're well within our rights to split long
! # headers.
! text = '%s: %s' % (h, v)
! if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
! text = self._split_header(text)
! print >> self._fp, text
 # A blank line always separates headers from body
 print >> self._fp

- def _split_header(self, text):
- maxheaderlen = self.__maxheaderlen
- # Find out whether any lines in the header are really longer than
- # maxheaderlen characters wide. There could be continuation lines
- # that actually shorten it. Also, replace hard tabs with 8 spaces.
- lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
- for line in lines:
- if len(line) > maxheaderlen:
- break
- else:
- # No line was actually longer than maxheaderlen characters, so
- # just return the original unchanged.
- return text
- # If we have raw 8bit data in a byte string, we have no idea what the
- # encoding is. I think there is no safe way to split this string. If
- # it's ascii-subset, then we could do a normal ascii split, but if
- # it's multibyte then we could break the string. There's no way to
- # know so the least harm seems to be to not split the string and risk
- # it being too long.
- if _is8bitstring(text):
- return text
- # The `text' argument already has the field name prepended, so don't
- # provide it here or the first line will get folded too short.
- h = Header(text, maxlinelen=maxheaderlen,
- # For backwards compatibility, we use a hard tab here
- continuation_ws='\t')
- return h.encode()
- 
 #
 # Handlers for writing types and subtypes
--- 162,188 ----
 def _write_headers(self, msg):
 for h, v in msg.items():
! print >> self._fp, '%s:' % h,
! if self.__maxheaderlen == 0:
! # Explicit no-wrapping
! print >> self._fp, v
! elif isinstance(v, Header):
! # Header instances know what to do
! print >> self._fp, v.encode()
! elif _is8bitstring(v):
! # If we have raw 8bit data in a byte string, we have no idea
! # what the encoding is. There is no safe way to split this
! # string. If it's ascii-subset, then we could do a normal
! # ascii split, but if it's multibyte then we could break the
! # string. There's no way to know so the least harm seems to
! # be to not split the string and risk it being too long.
! print >> self._fp, v
! else:
! # Header's got lots of smarts, so use it.
! print >> self._fp, Header(
! v, maxlinelen=self.__maxheaderlen,
! header_name=h, continuation_ws='\t').encode()
 # A blank line always separates headers from body
 print >> self._fp

 #
 # Handlers for writing types and subtypes
***************
*** 259,262 ****
--- 246,257 ----
 if msg.preamble is not None:
 self._fp.write(msg.preamble)
+ # If preamble is the empty string, the length of the split will be
+ # 1, but the last element will be the empty string. If it's
+ # anything else but does not end in a line separator, the length
+ # will be > 1 and not end in an empty string. We need to
+ # guarantee a newline after the preamble, but don't add too many.
+ plines = NLCRE.split(msg.preamble)
+ if plines <> [''] and plines[-1] <> '':
+ self._fp.write('\n')
 # First boundary is a bit different; it doesn't have a leading extra
 # newline.
***************
*** 365,369 ****
 # Craft a random boundary. If text is given, ensure that the chosen
 # boundary doesn't appear in the text.
! boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
 if text is None:
 return boundary
--- 360,365 ----
 # Craft a random boundary. If text is given, ensure that the chosen
 # boundary doesn't appear in the text.
! dp = locale.localeconv().get('decimal_point', '.')
! boundary = ('=' * 15) + repr(random.random()).split(dp)[1] + '=='
 if text is None:
 return boundary
Index: Header.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Header.py,v
retrieving revision 1.13.2.2
retrieving revision 1.13.2.3
diff -C2 -d -r1.13.2.2 -r1.13.2.3
*** Header.py	14 Oct 2002 17:26:02 -0000	1.13.2.2
--- Header.py	21 Mar 2003 21:09:31 -0000	1.13.2.3
***************
*** 5,12 ****
--- 5,14 ----

 import re
+ import binascii
 from types import StringType, UnicodeType

 import email.quopriMIME
 import email.base64MIME
+ from email.Errors import HeaderParseError
 from email.Charset import Charset

***************
*** 26,31 ****
--- 28,36 ----
 CRLF = '\r\n'
 NL = '\n'
+ SPACE = ' '
+ USPACE = u' '
 SPACE8 = ' ' * 8
 EMPTYSTRING = ''
+ UEMPTYSTRING = u''

 MAXLINELEN = 76
***************
*** 48,51 ****
--- 53,63 ----
 ''', re.VERBOSE | re.IGNORECASE)

+ pcre = re.compile('([,;])')
+ 
+ # Field name regexp, including trailing colon, but not separating whitespace,
+ # according to RFC 2822. Character range is from tilde to exclamation mark.
+ # For use with .match()
+ fcre = re.compile(r'[041円-176円]+:$')
+ 

***************
*** 62,65 ****
--- 74,80 ----
 header, otherwise a lower-case string containing the name of the character
 set specified in the encoded string.
+ 
+ An email.Errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
 """
 # If no encoding, just return the header
***************
*** 80,84 ****
 # Should we continue a long line?
 if decoded and decoded[-1][1] is None:
! decoded[-1] = (decoded[-1][0] + dec, None)
 else:
 decoded.append((unenc, None))
--- 95,99 ----
 # Should we continue a long line?
 if decoded and decoded[-1][1] is None:
! decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
 else:
 decoded.append((unenc, None))
***************
*** 86,95 ****
 charset, encoding = [s.lower() for s in parts[0:2]]
 encoded = parts[2]
! dec = ''
 if encoding == 'q':
 dec = email.quopriMIME.header_decode(encoded)
 elif encoding == 'b':
! dec = email.base64MIME.decode(encoded)
! else:
 dec = encoded

--- 101,116 ----
 charset, encoding = [s.lower() for s in parts[0:2]]
 encoded = parts[2]
! dec = None
 if encoding == 'q':
 dec = email.quopriMIME.header_decode(encoded)
 elif encoding == 'b':
! try:
! dec = email.base64MIME.decode(encoded)
! except binascii.Error:
! # Turn this into a higher level exception. BAW: Right
! # now we throw the lower level exception away but
! # when/if we get exception chaining, we'll preserve it.
! raise HeaderParseError
! if dec is None:
 dec = encoded

***************
*** 127,132 ****

 class Header:
! def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
! continuation_ws=' '):
 """Create a MIME-compliant header that can contain many character sets.

--- 148,154 ----

 class Header:
! def __init__(self, s=None, charset=None,
! maxlinelen=None, header_name=None,
! continuation_ws=' ', errors='strict'):
 """Create a MIME-compliant header that can contain many character sets.

***************
*** 151,154 ****
--- 173,178 ----
 either a space or a hard tab) which will be prepended to continuation
 lines.
+ 
+ errors is passed through to the .append() call.
 """
 if charset is None:
***************
*** 162,166 ****
 self._chunks = []
 if s is not None:
! self.append(s, charset)
 if maxlinelen is None:
 maxlinelen = MAXLINELEN
--- 186,190 ----
 self._chunks = []
 if s is not None:
! self.append(s, charset, errors)
 if maxlinelen is None:
 maxlinelen = MAXLINELEN
***************
*** 183,189 ****
 def __unicode__(self):
 """Helper for the built-in unicode function."""
! # charset item is a Charset instance so we need to stringify it.
! uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
! return u''.join(uchunks)

 # Rich comparison operators for equality only. BAW: does it make sense to
--- 207,228 ----
 def __unicode__(self):
 """Helper for the built-in unicode function."""
! uchunks = []
! lastcs = None
! for s, charset in self._chunks:
! # We must preserve spaces between encoded and non-encoded word
! # boundaries, which means for us we need to add a space when we go
! # from a charset to None/us-ascii, or from None/us-ascii to a
! # charset. Only do this for the second and subsequent chunks.
! nextcs = charset
! if uchunks:
! if lastcs is not None:
! if nextcs is None or nextcs == 'us-ascii':
! uchunks.append(USPACE)
! nextcs = None
! elif nextcs is not None and nextcs <> 'us-ascii':
! uchunks.append(USPACE)
! lastcs = nextcs
! uchunks.append(unicode(s, str(charset)))
! return UEMPTYSTRING.join(uchunks)

 # Rich comparison operators for equality only. BAW: does it make sense to
***************
*** 197,201 ****
 return not self == other

! def append(self, s, charset=None):
 """Append a string to the MIME header.

--- 236,240 ----
 return not self == other

! def append(self, s, charset=None, errors='strict'):
 """Append a string to the MIME header.

***************
*** 214,217 ****
--- 253,259 ----
 following charsets in order: us-ascii, the charset hint, utf-8. The
 first character set not to provoke a UnicodeError is used.
+ 
+ Optional `errors' is passed as the third argument to any unicode() or
+ ustr.encode() call.
 """
 if charset is None:
***************
*** 228,237 ****
 # converted to a unicode with the input codec of the charset.
 incodec = charset.input_codec or 'us-ascii'
! ustr = unicode(s, incodec)
 # Now make sure that the unicode could be converted back to a
 # byte string with the output codec, which may be different
 # than the iput coded. Still, use the original byte string.
 outcodec = charset.output_codec or 'us-ascii'
! ustr.encode(outcodec)
 elif isinstance(s, UnicodeType):
 # Now we have to be sure the unicode string can be converted
--- 270,279 ----
 # converted to a unicode with the input codec of the charset.
 incodec = charset.input_codec or 'us-ascii'
! ustr = unicode(s, incodec, errors)
 # Now make sure that the unicode could be converted back to a
 # byte string with the output codec, which may be different
 # than the iput coded. Still, use the original byte string.
 outcodec = charset.output_codec or 'us-ascii'
! ustr.encode(outcodec, errors)
 elif isinstance(s, UnicodeType):
 # Now we have to be sure the unicode string can be converted
***************
*** 241,245 ****
 try:
 outcodec = charset.output_codec or 'us-ascii'
! s = s.encode(outcodec)
 break
 except UnicodeError:
--- 283,287 ----
 try:
 outcodec = charset.output_codec or 'us-ascii'
! s = s.encode(outcodec, errors)
 break
 except UnicodeError:
***************
*** 249,259 ****
 self._chunks.append((s, charset))

! def _split(self, s, charset, firstline=False):
 # Split up a header safely for use with encode_chunks.
 splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable)
 elen = charset.encoded_header_len(encoded)
! 
! if elen <= self._maxlinelen:
 return [(encoded, charset)]
 # If we have undetermined raw 8bit characters sitting in a byte
--- 291,301 ----
 self._chunks.append((s, charset))

! def _split(self, s, charset, maxlinelen, splitchars):
 # Split up a header safely for use with encode_chunks.
 splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable, True)
 elen = charset.encoded_header_len(encoded)
! # If the line's encoded length first, just return it
! if elen <= maxlinelen:
 return [(encoded, charset)]
 # If we have undetermined raw 8bit characters sitting in a byte
***************
*** 263,267 ****
 # be to not split the header at all, but that means they could go out
 # longer than maxlinelen.
! elif charset == '8bit':
 return [(s, charset)]
 # BAW: I'm not sure what the right test here is. What we're trying to
--- 305,309 ----
 # be to not split the header at all, but that means they could go out
 # longer than maxlinelen.
! if charset == '8bit':
 return [(s, charset)]
 # BAW: I'm not sure what the right test here is. What we're trying to
***************
*** 276,374 ****
 # although it's possible that other charsets may also benefit from the
 # higher-level syntactic breaks.
- #
 elif charset == 'us-ascii':
! return self._ascii_split(s, charset, firstline)
 # BAW: should we use encoded?
 elif elen == len(s):
 # We can split on _maxlinelen boundaries because we know that the
 # encoding won't change the size of the string
! splitpnt = self._maxlinelen
 first = charset.from_splittable(splittable[:splitpnt], False)
 last = charset.from_splittable(splittable[splitpnt:], False)
 else:
! # Divide and conquer.
! halfway = _floordiv(len(splittable), 2)
! first = charset.from_splittable(splittable[:halfway], False)
! last = charset.from_splittable(splittable[halfway:], False)
! # Do the split
! return self._split(first, charset, firstline) + \
! self._split(last, charset)

! def _ascii_split(self, s, charset, firstline):
! # Attempt to split the line at the highest-level syntactic break
! # possible. Note that we don't have a lot of smarts about field
! # syntax; we just try to break on semi-colons, then whitespace.
! rtn = []
! lines = s.splitlines()
! while lines:
! line = lines.pop(0)
! if firstline:
! maxlinelen = self._firstlinelen
! firstline = False
! else:
! #line = line.lstrip()
! maxlinelen = self._maxlinelen
! # Short lines can remain unchanged
! if len(line.replace('\t', SPACE8)) <= maxlinelen:
! rtn.append(line)
! else:
! oldlen = len(line)
! # Try to break the line on semicolons, but if that doesn't
! # work, try to split on folding whitespace.
! while len(line) > maxlinelen:
! i = line.rfind(';', 0, maxlinelen)
! if i < 0:
! break
! rtn.append(line[:i] + ';')
! line = line[i+1:]
! # Is the remaining stuff still longer than maxlinelen?
! if len(line) <= maxlinelen:
! # Splitting on semis worked
! rtn.append(line)
! continue
! # Splitting on semis didn't finish the job. If it did any
! # work at all, stick the remaining junk on the front of the
! # `lines' sequence and let the next pass do its thing.
! if len(line) <> oldlen:
! lines.insert(0, line)
! continue
! # Otherwise, splitting on semis didn't help at all.
! parts = re.split(r'(\s+)', line)
! if len(parts) == 1 or (len(parts) == 3 and
! parts[0].endswith(':')):
! # This line can't be split on whitespace. There's now
! # little we can do to get this into maxlinelen. BAW:
! # We're still potentially breaking the RFC by possibly
! # allowing lines longer than the absolute maximum of 998
! # characters. For now, let it slide.
! #
! # len(parts) will be 1 if this line has no `Field: '
! # prefix, otherwise it will be len(3).
! rtn.append(line)
! continue
! # There is whitespace we can split on.
! first = parts.pop(0)
! sublines = [first]
! acc = len(first)
! while parts:
! len0 = len(parts[0])
! len1 = len(parts[1])
! if acc + len0 + len1 <= maxlinelen:
! sublines.append(parts.pop(0))
! sublines.append(parts.pop(0))
! acc += len0 + len1
! else:
! # Split it here, but don't forget to ignore the
! # next whitespace-only part
! if first <> '':
! rtn.append(EMPTYSTRING.join(sublines))
! del parts[0]
! first = parts.pop(0)
! sublines = [first]
! acc = len(first)
! rtn.append(EMPTYSTRING.join(sublines))
! return [(chunk, charset) for chunk in rtn]

! def _encode_chunks(self, newchunks):
 # MIME-encode a header with many different charsets and/or encodings.
 #
--- 318,346 ----
 # although it's possible that other charsets may also benefit from the
 # higher-level syntactic breaks.
 elif charset == 'us-ascii':
! return self._split_ascii(s, charset, maxlinelen, splitchars)
 # BAW: should we use encoded?
 elif elen == len(s):
 # We can split on _maxlinelen boundaries because we know that the
 # encoding won't change the size of the string
! splitpnt = maxlinelen
 first = charset.from_splittable(splittable[:splitpnt], False)
 last = charset.from_splittable(splittable[splitpnt:], False)
 else:
! # Binary search for split point
! first, last = _binsplit(splittable, charset, maxlinelen)
! # first is of the proper length so just wrap it in the appropriate
! # chrome. last must be recursively split.
! fsplittable = charset.to_splittable(first)
! fencoded = charset.from_splittable(fsplittable, True)
! chunk = [(fencoded, charset)]
! return chunk + self._split(last, charset, self._maxlinelen, splitchars)

! def _split_ascii(self, s, charset, firstlen, splitchars):
! chunks = _split_ascii(s, firstlen, self._maxlinelen,
! self._continuation_ws, splitchars)
! return zip(chunks, [charset]*len(chunks))

! def _encode_chunks(self, newchunks, maxlinelen):
 # MIME-encode a header with many different charsets and/or encodings.
 #
***************
*** 388,404 ****
 # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
 # =?charset2?b?SvxyZ2VuIEL2aW5n?="
- #
 chunks = []
 for header, charset in newchunks:
 if charset is None or charset.header_encoding is None:
! # There's no encoding for this chunk's charsets
! _max_append(chunks, header, self._maxlinelen)
 else:
! _max_append(chunks, charset.header_encode(header),
! self._maxlinelen, ' ')
 joiner = NL + self._continuation_ws
 return joiner.join(chunks)

! def encode(self):
 """Encode a message header into an RFC-compliant format.

--- 360,381 ----
 # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
 # =?charset2?b?SvxyZ2VuIEL2aW5n?="
 chunks = []
 for header, charset in newchunks:
+ if not header:
+ continue
 if charset is None or charset.header_encoding is None:
! s = header
 else:
! s = charset.header_encode(header)
! # Don't add more folding whitespace than necessary
! if chunks and chunks[-1].endswith(' '):
! extra = ''
! else:
! extra = ' '
! _max_append(chunks, s, maxlinelen, extra)
 joiner = NL + self._continuation_ws
 return joiner.join(chunks)

! def encode(self, splitchars=';, '):
 """Encode a message header into an RFC-compliant format.

***************
*** 417,423 ****
 If the given charset is not known or an error occurs during
 conversion, this function will return the header untouched.
 """
 newchunks = []
 for s, charset in self._chunks:
! newchunks += self._split(s, charset, True)
! return self._encode_chunks(newchunks)
--- 394,515 ----
 If the given charset is not known or an error occurs during
 conversion, this function will return the header untouched.
+ 
+ Optional splitchars is a string containing characters to split long
+ ASCII lines on, in rough support of RFC 2822's `highest level
+ syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
 """
 newchunks = []
+ maxlinelen = self._firstlinelen
+ lastlen = 0
 for s, charset in self._chunks:
! # The first bit of the next chunk should be just long enough to
! # fill the next line. Don't forget the space separating the
! # encoded words.
! targetlen = maxlinelen - lastlen - 1
! if targetlen < charset.encoded_header_len(''):
! # Stick it on the next line
! targetlen = maxlinelen
! newchunks += self._split(s, charset, targetlen, splitchars)
! lastchunk, lastcharset = newchunks[-1]
! lastlen = lastcharset.encoded_header_len(lastchunk)
! return self._encode_chunks(newchunks, maxlinelen)
! 
! 
! 
! def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
! lines = []
! maxlen = firstlen
! for line in s.splitlines():
! # Ignore any leading whitespace (i.e. continuation whitespace) already
! # on the line, since we'll be adding our own.
! line = line.lstrip()
! if len(line) < maxlen:
! lines.append(line)
! maxlen = restlen
! continue
! # Attempt to split the line at the highest-level syntactic break
! # possible. Note that we don't have a lot of smarts about field
! # syntax; we just try to break on semi-colons, then commas, then
! # whitespace.
! for ch in splitchars:
! if line.find(ch) >= 0:
! break
! else:
! # There's nothing useful to split the line on, not even spaces, so
! # just append this line unchanged
! lines.append(line)
! maxlen = restlen
! continue
! # Now split the line on the character plus trailing whitespace
! cre = re.compile(r'%s\s*' % ch)
! if ch in ';,':
! eol = ch
! else:
! eol = ''
! joiner = eol + ' '
! joinlen = len(joiner)
! wslen = len(continuation_ws.replace('\t', SPACE8))
! this = []
! linelen = 0
! for part in cre.split(line):
! curlen = linelen + max(0, len(this)-1) * joinlen
! partlen = len(part)
! onfirstline = not lines
! # We don't want to split after the field name, if we're on the
! # first line and the field name is present in the header string.
! if ch == ' ' and onfirstline and \
! len(this) == 1 and fcre.match(this[0]):
! this.append(part)
! linelen += partlen
! elif curlen + partlen > maxlen:
! if this:
! lines.append(joiner.join(this) + eol)
! # If this part is longer than maxlen and we aren't already
! # splitting on whitespace, try to recursively split this line
! # on whitespace.
! if partlen > maxlen and ch <> ' ':
! subl = _split_ascii(part, maxlen, restlen,
! continuation_ws, ' ')
! lines.extend(subl[:-1])
! this = [subl[-1]]
! else:
! this = [part]
! linelen = wslen + len(this[-1])
! maxlen = restlen
! else:
! this.append(part)
! linelen += partlen
! # Put any left over parts on a line by themselves
! if this:
! lines.append(joiner.join(this))
! return lines
! 
! 
! 
! def _binsplit(splittable, charset, maxlinelen):
! i = 0
! j = len(splittable)
! while i < j:
! # Invariants:
! # 1. splittable[:k] fits for all k <= i (note that we *assume*,
! # at the start, that splittable[:0] fits).
! # 2. splittable[:k] does not fit for any k > j (at the start,
! # this means we shouldn't look at any k > len(splittable)).
! # 3. We don't know about splittable[:k] for k in i+1..j.
! # 4. We want to set i to the largest k that fits, with i <= k <= j.
! #
! m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
! chunk = charset.from_splittable(splittable[:m], True)
! chunklen = charset.encoded_header_len(chunk)
! if chunklen <= maxlinelen:
! # m is acceptable, so is a new lower bound.
! i = m
! else:
! # m is not acceptable, so final i must be < m.
! j = m - 1
! # i == j. Invariant #1 implies that splittable[:i] fits, and
! # invariant #2 implies that splittable[:i+1] does not fit, so i
! # is what we're looking for.
! first = charset.from_splittable(splittable[:i], False)
! last = charset.from_splittable(splittable[i:], False)
! return first, last
Index: MIMEText.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/MIMEText.py,v
retrieving revision 1.3.10.1
retrieving revision 1.3.10.2
diff -C2 -d -r1.3.10.1 -r1.3.10.2
*** MIMEText.py	4 Oct 2002 17:24:24 -0000	1.3.10.1
--- MIMEText.py	21 Mar 2003 21:09:31 -0000	1.3.10.2
***************
*** 18,23 ****
 """Create a text/* type MIME document.

! _text is the string for this message object. If the text does not end
! in a newline, one is added.

 _subtype is the MIME sub content type, defaulting to "plain".
--- 18,22 ----
 """Create a text/* type MIME document.

! _text is the string for this message object.

 _subtype is the MIME sub content type, defaulting to "plain".
***************
*** 36,41 ****
 MIMENonMultipart.__init__(self, 'text', _subtype,
 **{'charset': _charset})
- if _text and not _text.endswith('\n'):
- _text += '\n'
 self.set_payload(_text, _charset)
 if _encoder is not None:
--- 35,38 ----
Index: Message.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Message.py,v
retrieving revision 1.9.6.2
retrieving revision 1.9.6.3
diff -C2 -d -r1.9.6.2 -r1.9.6.3
*** Message.py	10 Oct 2002 19:09:24 -0000	1.9.6.2
--- Message.py	21 Mar 2003 21:09:31 -0000	1.9.6.3
***************
*** 6,9 ****
--- 6,11 ----

 import re
+ import uu
+ import binascii
 import warnings
 from cStringIO import StringIO
***************
*** 11,16 ****

 # Intrapackage imports
- from email import Errors
 from email import Utils
 from email import Charset

--- 13,18 ----

 # Intrapackage imports
 from email import Utils
+ from email import Errors
 from email import Charset

***************
*** 165,176 ****
 i returns that index into the payload.

! Optional decode is a flag (defaulting to False) indicating whether the
! payload should be decoded or not, according to the
! Content-Transfer-Encoding header. When True and the message is not a
! multipart, the payload will be decoded if this header's value is
! `quoted-printable' or `base64'. If some other encoding is used, or
! the header is missing, the payload is returned as-is (undecoded). If
! the message is a multipart and the decode flag is True, then None is
! returned.
 """
 if i is None:
--- 167,182 ----
 i returns that index into the payload.

! Optional decode is a flag indicating whether the payload should be
! decoded or not, according to the Content-Transfer-Encoding header
! (default is False).
! 
! When True and the message is not a multipart, the payload will be
! decoded if this header's value is `quoted-printable' or `base64'. If
! some other encoding is used, or the header is missing, or if the
! payload has bogus data (i.e. bogus base64 or uuencoded data), the
! payload is returned as-is.
! 
! If the message is a multipart and the decode flag is True, then None
! is returned.
 """
 if i is None:
***************
*** 183,191 ****
 if self.is_multipart():
 return None
! cte = self.get('content-transfer-encoding', '')
! if cte.lower() == 'quoted-printable':
 return Utils._qdecode(payload)
! elif cte.lower() == 'base64':
! return Utils._bdecode(payload)
 # Everything else, including encodings with 8bit or 7bit are returned
 # unchanged.
--- 189,209 ----
 if self.is_multipart():
 return None
! cte = self.get('content-transfer-encoding', '').lower()
! if cte == 'quoted-printable':
 return Utils._qdecode(payload)
! elif cte == 'base64':
! try:
! return Utils._bdecode(payload)
! except binascii.Error:
! # Incorrect padding
! return payload
! elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
! sfp = StringIO()
! try:
! uu.decode(StringIO(payload+'\n'), sfp)
! payload = sfp.getvalue()
! except uu.Error:
! # Some decoding problem
! return payload
 # Everything else, including encodings with 8bit or 7bit are returned
 # unchanged.
Index: Parser.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Parser.py,v
retrieving revision 1.5.10.3
retrieving revision 1.5.10.4
diff -C2 -d -r1.5.10.3 -r1.5.10.4
*** Parser.py	7 Oct 2002 17:02:40 -0000	1.5.10.3
--- Parser.py	21 Mar 2003 21:09:31 -0000	1.5.10.4
***************
*** 21,25 ****
 False = 0

! nlcre = re.compile('\r\n|\r|\n')

--- 21,25 ----
 False = 0

! NLCRE = re.compile('\r\n|\r|\n')

***************
*** 60,66 ****
 """
 root = self._class()
! self._parseheaders(root, fp)
 if not headersonly:
! self._parsebody(root, fp)
 return root

--- 60,66 ----
 """
 root = self._class()
! firstbodyline = self._parseheaders(root, fp)
 if not headersonly:
! self._parsebody(root, fp, firstbodyline)
 return root

***************
*** 81,84 ****
--- 81,85 ----
 lastvalue = []
 lineno = 0
+ firstbodyline = None
 while True:
 # Don't strip the line before we test for the end condition,
***************
*** 121,131 ****
 if self._strict:
 raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''"%line)
 elif lineno == 1 and line.startswith('--'):
 # allow through duplicate boundary tags.
 continue
 else:
! raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''"%line)
 if lastheader:
 container[lastheader] = NL.join(lastvalue)
--- 122,135 ----
 if self._strict:
 raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''" % line)
 elif lineno == 1 and line.startswith('--'):
 # allow through duplicate boundary tags.
 continue
 else:
! # There was no separating blank line as mandated by RFC
! # 2822, but we're in non-strict mode. So just offer up
! # this current line as the first body line.
! firstbodyline = line
! break
 if lastheader:
 container[lastheader] = NL.join(lastvalue)
***************
*** 135,140 ****
 if lastheader:
 container[lastheader] = NL.join(lastvalue)

! def _parsebody(self, container, fp):
 # Parse the body, but first split the payload on the content-type
 # boundary if present.
--- 139,145 ----
 if lastheader:
 container[lastheader] = NL.join(lastvalue)
+ return firstbodyline

! def _parsebody(self, container, fp, firstbodyline=None):
 # Parse the body, but first split the payload on the content-type
 # boundary if present.
***************
*** 153,156 ****
--- 158,163 ----
 separator = '--' + boundary
 payload = fp.read()
+ if firstbodyline is not None:
+ payload = firstbodyline + '\n' + payload
 # We use an RE here because boundaries can have trailing
 # whitespace.
***************
*** 170,174 ****
 # Find out what kind of line endings we're using
 start += len(mo.group('sep')) + len(mo.group('ws'))
! mo = nlcre.search(payload, start)
 if mo:
 start += len(mo.group(0))
--- 177,181 ----
 # Find out what kind of line endings we're using
 start += len(mo.group('sep')) + len(mo.group('ws'))
! mo = NLCRE.search(payload, start)
 if mo:
 start += len(mo.group(0))
***************
*** 222,228 ****
 msgobj = self.parsestr(parthdrs, headersonly=1)
 # while submsgobj is the message itself
- submsgobj = self.parsestr(part)
- msgobj.attach(submsgobj)
 msgobj.set_default_type('message/rfc822')
 else:
 msgobj = self.parsestr(part)
--- 229,239 ----
 msgobj = self.parsestr(parthdrs, headersonly=1)
 # while submsgobj is the message itself
 msgobj.set_default_type('message/rfc822')
+ maintype = msgobj.get_content_maintype()
+ if maintype in ('message', 'multipart'):
+ submsgobj = self.parsestr(part)
+ msgobj.attach(submsgobj)
+ else:
+ msgobj.set_payload(part)
 else:
 msgobj = self.parsestr(part)
***************
*** 257,261 ****
 container.attach(msg)
 else:
! container.set_payload(fp.read())

--- 268,275 ----
 container.attach(msg)
 else:
! text = fp.read()
! if firstbodyline is not None:
! text = firstbodyline + '\n' + text
! container.set_payload(text)

***************
*** 271,275 ****
 interested in is the message headers.
 """
! def _parsebody(self, container, fp):
 # Consume but do not parse, the body
! container.set_payload(fp.read())
--- 285,292 ----
 interested in is the message headers.
 """
! def _parsebody(self, container, fp, firstbodyline=None):
 # Consume but do not parse, the body
! text = fp.read()
! if firstbodyline is not None:
! text = firstbodyline + '\n' + text
! container.set_payload(text)
Index: Utils.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Utils.py,v
retrieving revision 1.9.6.1
retrieving revision 1.9.6.2
diff -C2 -d -r1.9.6.1 -r1.9.6.2
*** Utils.py	4 Oct 2002 17:24:24 -0000	1.9.6.1
--- Utils.py	21 Mar 2003 21:09:31 -0000	1.9.6.2
***************
*** 14,24 ****
 from types import ListType

! from rfc822 import quote
! from rfc822 import AddressList as _AddressList
! from rfc822 import mktime_tz

 # We need wormarounds for bugs in these methods in older Pythons (see below)
! from rfc822 import parsedate as _parsedate
! from rfc822 import parsedate_tz as _parsedate_tz

 try:
--- 14,24 ----
 from types import ListType

! from email._parseaddr import quote
! from email._parseaddr import AddressList as _AddressList
! from email._parseaddr import mktime_tz

 # We need wormarounds for bugs in these methods in older Pythons (see below)
! from email._parseaddr import parsedate as _parsedate
! from email._parseaddr import parsedate_tz as _parsedate_tz

 try:
***************
*** 55,60 ****
 CRLF = '\r\n'

! specialsre = re.compile(r'[][\()<>@,:;".]')
! escapesre = re.compile(r'[][\()"]')

--- 55,60 ----
 CRLF = '\r\n'

! specialsre = re.compile(r'[][\\()<>@,:;".]')
! escapesre = re.compile(r'[][\\()"]')

***************
*** 67,72 ****

 def _bdecode(s):
- if not s:
- return s
 # We can't quite use base64.encodestring() since it tacks on a "courtesy
 # newline". Blech!
--- 67,70 ----
***************
*** 281,287 ****
 """Decode string according to RFC 2231"""
 import urllib
! charset, language, s = s.split("'", 2)
! s = urllib.unquote(s)
! return charset, language, s

--- 279,287 ----
 """Decode string according to RFC 2231"""
 import urllib
! parts = s.split("'", 2)
! if len(parts) == 1:
! return None, None, s
! charset, language, s = parts
! return charset, language, urllib.unquote(s)

***************
*** 336,340 ****
 value.append(continuation)
 charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
! new_params.append((name,
! (charset, language, '"%s"' % quote(value))))
 return new_params
--- 336,340 ----
 value.append(continuation)
 charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
! new_params.append(
! (name, (charset, language, '"%s"' % quote(value))))
 return new_params
Index: __init__.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/__init__.py,v
retrieving revision 1.4.10.4
retrieving revision 1.4.10.5
diff -C2 -d -r1.4.10.4 -r1.4.10.5
*** __init__.py	14 Oct 2002 17:26:02 -0000	1.4.10.4
--- __init__.py	21 Mar 2003 21:09:31 -0000	1.4.10.5
***************
*** 5,9 ****
 """

! __version__ = '2.4.3'

 __all__ = [
--- 5,9 ----
 """

! __version__ = '2.5'

 __all__ = [
Index: _compat21.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat21.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat21.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- _compat21.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 8,11 ****
--- 8,14 ----
 from types import StringType, UnicodeType

+ False = 0
+ True = 1
+ 

***************
*** 32,36 ****

 def _isstring(obj):
! return isinstance(obj, StringType) or isinstance(obj, UnicodeType) 

--- 35,39 ----

 def _isstring(obj):
! return isinstance(obj, StringType) or isinstance(obj, UnicodeType)

***************
*** 38,46 ****
 # These two functions are imported into the Iterators.py interface module.
 # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
! """Iterate over the parts, returning string payloads line-by-line."""
 lines = []
 for subpart in msg.walk():
! payload = subpart.get_payload()
 if _isstring(payload):
 for line in StringIO(payload).readlines():
--- 41,52 ----
 # These two functions are imported into the Iterators.py interface module.
 # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
! """Iterate over the parts, returning string payloads line-by-line.
! 
! Optional decode (default False) is passed through to .get_payload().
! """
 lines = []
 for subpart in msg.walk():
! payload = subpart.get_payload(decode=decode)
 if _isstring(payload):
 for line in StringIO(payload).readlines():
Index: _compat22.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat22.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat22.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- _compat22.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 39,46 ****
 # These two functions are imported into the Iterators.py interface module.
 # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
! """Iterate over the parts, returning string payloads line-by-line."""
 for subpart in msg.walk():
! payload = subpart.get_payload()
 if _isstring(payload):
 for line in StringIO(payload):
--- 39,49 ----
 # These two functions are imported into the Iterators.py interface module.
 # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
! """Iterate over the parts, returning string payloads line-by-line.
! 
! Optional decode (default False) is passed through to .get_payload().
! """
 for subpart in msg.walk():
! payload = subpart.get_payload(decode=decode)
 if _isstring(payload):
 for line in StringIO(payload):
Index: base64MIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/base64MIME.py,v
retrieving revision 1.5.2.1
retrieving revision 1.5.2.2
diff -C2 -d -r1.5.2.1 -r1.5.2.2
*** base64MIME.py	4 Oct 2002 17:24:24 -0000	1.5.2.1
--- base64MIME.py	21 Mar 2003 21:09:31 -0000	1.5.2.2
***************
*** 103,109 ****
 max_unencoded = _floordiv(max_encoded * 3, 4)

- # BAW: Ben's original code used a step of max_unencoded, but I think it
- # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
- # still not sure what the
 for i in range(0, len(header), max_unencoded):
 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
--- 103,106 ----
Index: quopriMIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/quopriMIME.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** quopriMIME.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- quopriMIME.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 83,87 ****
 if not L:
 L.append(s.lstrip())
! elif len(L[-1]) + len(s) < maxlen:
 L[-1] += extra + s
 else:
--- 83,87 ----
 if not L:
 L.append(s.lstrip())
! elif len(L[-1]) + len(s) <= maxlen:
 L[-1] += extra + s
 else:
***************
*** 117,121 ****

 with each line wrapped safely at, at most, maxlinelen characters (defaults
! to 76 characters).

 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
--- 117,122 ----

 with each line wrapped safely at, at most, maxlinelen characters (defaults
! to 76 characters). If maxlinelen is None, the entire string is encoded in
! one chunk with no splitting.

 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
***************
*** 135,141 ****

 # Quopri encode each line, in encoded chunks no greater than maxlinelen in
! # lenght, after the RFC chrome is added in.
 quoted = []
! max_encoded = maxlinelen - len(charset) - MISC_LEN

 for c in header:
--- 136,146 ----

 # Quopri encode each line, in encoded chunks no greater than maxlinelen in
! # length, after the RFC chrome is added in.
 quoted = []
! if maxlinelen is None:
! # An obnoxiously large number that's good enough
! max_encoded = 100000
! else:
! max_encoded = maxlinelen - len(charset) - MISC_LEN - 1

 for c in header: