Random notes from mg
a blog by Marius Gedminas

Sending Unicode emails in Python

Sending a properly encoded email that contains non-ASCII characters is not as trivial as it should be. Here's more or less what I want:

# U+263A and U+263B are smiley faces (☺ and ☻)
sender = u'Sender \u263A <sender@example.com>'
recipient = u'Recipient \u263B <recipient@example.com>'
subject = u'Smile! \u263A'
body = u'Smile!\n\u263B'
send_email(sender, recipient, subject, body)

The hard part is getting all the unicode strings to be properly encoded in the email. Details like multiple recipients, additional headers, attachments, SMTP configuration and error handling are ignored for the purposes of this article.

Here's the solution:

from smtplib import SMTP
from email.MIMEText import MIMEText
from email.Header import Header
from email.Utils import parseaddr, formataddr
def send_email(sender, recipient, subject, body):
 """Send an email.
 All arguments should be Unicode strings (plain ASCII works as well).
 Only the real name part of sender and recipient addresses may contain
 non-ASCII characters.
 The email will be properly MIME encoded and delivered though SMTP to
 localhost port 25. This is easy to change if you want something different.
 The charset of the email will be the first one out of US-ASCII, ISO-8859-1
 and UTF-8 that can represent all the characters occurring in the email.
 """
 # Header class is smart enough to try US-ASCII, then the charset we
 # provide, then fall back to UTF-8.
 header_charset = 'ISO-8859-1'
 # We must choose the body charset manually
 for body_charset in 'US-ASCII', 'ISO-8859-1', 'UTF-8':
 try:
 body.encode(body_charset)
 except UnicodeError:
 pass
 else:
 break
 # Split real name (which is optional) and email address parts
 sender_name, sender_addr = parseaddr(sender)
 recipient_name, recipient_addr = parseaddr(recipient)
 # We must always pass Unicode strings to Header, otherwise it will
 # use RFC 2047 encoding even on plain ASCII strings.
 sender_name = str(Header(unicode(sender_name), header_charset))
 recipient_name = str(Header(unicode(recipient_name), header_charset))
 # Make sure email addresses do not contain non-ASCII characters
 sender_addr = sender_addr.encode('ascii')
 recipient_addr = recipient_addr.encode('ascii')
 # Create the message ('plain' stands for Content-Type: text/plain)
 msg = MIMEText(body.encode(body_charset), 'plain', body_charset)
 msg['From'] = formataddr((sender_name, sender_addr))
 msg['To'] = formataddr((recipient_name, recipient_addr))
 msg['Subject'] = Header(unicode(subject), header_charset)
 # Send the message via SMTP to localhost:25
 smtp = SMTP("localhost")
 smtp.sendmail(sender, recipient, msg.as_string())
 smtp.quit()

I wish I could write it like this:

from smtplib import SMTP
from email.MIMEText import MIMEText
def send_email(sender, recipient, subject, body):
 """Science-fictional simple version of send_email."""
 # The email module should be able to deal with Unicode message bodies and
 # headers and pick an appropriate charset automatically. Today (on Python
 # 2.3) it just bombs out with an Unicode error when as_string() is called.
 msg = MIMEText(body) # won't work
 msg['From'] = sender # won't work
 msg['To'] = recipient # won't work
 msg['Subject'] = subject # won't work
 # At least the SMTP module is smart enough to discard the real name part
 # that it doesn't need
 smtp = SMTP("localhost")
 smtp.sendmail(sender, recipient, msg.as_string())
 smtp.quit()

AltStyle によって変換されたページ (->オリジナル) /