#! /usr/bin/env python
# -*- coding: utf-8 -*-
importos
importre
importtime
importemail
importpoplib
importimaplib
importcStringIO
fromhashlibimportmd5
# Configuration
# -------------
# Email address
MAILADDR="username@163.com"
# Email password
PASSWORD="password"
# Mail Server (pop/imap)
SERVER="pop.163.com"
# Transfer protocol (pop3/imap4)
PROTOCOL="pop3"
# Use SSL? (True/False)
USE_SSL=True
# Main output direcotory
OUTDIR="result"
# Static variable
# ---------------
# Default port of each protocol
DEFAULT_PORT={
"pop3":{False:110,True:995},
"imap4":{False:143,True:993},
}
# Function
# --------
defexit_script(reason,e=""):
"""Print error reason and exit this script
:param reason: exit error reason
:param e: exception
"""
# Print exit string
exit_str="[-] {0}".format(reason)
ife:
exit_str+=" ({0})".format(e)
print(exit_str)
# Remove result path
remove_dir(result_path)
# Exit script
print("[-] Fetch email failed!")
exit(-1)
defparse_protocol(protocol):
"""Parse transfer protocol
:param protocol: transfer protocol
:return: handled protocol
"""
ifprotocol in["pop","pop3"]:
return"pop3"
elifprotocol in["imap","imap4"]:
return"imap4"
else:
exit_script("Parse protocol failed: {0}".format(protocol))
defparse_server(server,use_ssl,protocol):
"""Change server to host and port. If no port specified, use default value
:param server: mail server (host, host:port)
:param use_ssl: True if use SSL else False
:param protocol: transfer protocol (pop3/imap4)
:return: host and port
"""
ifnotserver:
exit_script("No available server")
server_item=server.split(":")
server_item_len=len(server_item)
ifserver_item_len>2:
exit_script("Too many colons in server: {0}".format(server))
try:
host=server_item[0]
port=DEFAULT_PORT[protocol][use_ssl]ifserver_item_len==1elseint(server_item[1])
exceptBaseExceptionase:
exit_script("Parse server format failed: {0}".format(server),e)
returnhost,port
defcreate_dir(result_path):
"""Create output directory if not exist
:param result_path: main result path
"""
try:
ifnotos.path.exists(result_path):
os.mkdir(result_path)
print("[*] Create directory {0} successfully".format(result_path))
else:
ifos.path.isfile(result_path):
exit_script("{0} is file".format(result_path))
else:
print("[*] Directory {0} has already existed".format(result_path))
exceptBaseExceptionase:
exit_script("Create directory {0} failed".format(result_path),e)
defremove_dir(result_path):
"""Remove output directory if no file in this directory
:param result_path: main result path
"""
try:
ifos.path.isdir(result_path):
iflen(os.listdir(result_path))==0:
os.rmdir(result_path)
print("[*] Remove directory {0} successfully".format(result_path))
else:
print("[*] Directory {0} is not empty, no need remove".format(result_path))
else:
print("[*] No directory {0}".format(result_path))
exceptBaseExceptionase:
print("[-] Remove directory {0} failed: {1}".format(result_path,e))
defprotocol_manager(protocol,host,port,usr,pwd,use_ssl):
"""Choose handle function according to transfer protocol
:param protocol: transfer protocol (pop3/imap4)
:param host: host
:param port: port
:param usr: username
:param pwd: password
:param use_ssl: True if use ssl else False
"""
import__main__
ifhasattr(__main__,protocol):
getattr(__main__,protocol)(host,port,usr,pwd,use_ssl)
else:
exit_script("Wrong protocol: {0}".format(protocol))
defpop3(host,port,usr,pwd,use_ssl):
"""Pop3 handler
:param host: host
:param port: port
:param usr: username
:param pwd: password
:param use_ssl: True if use SSL else False
"""
# Connect to mail server
try:
conn=poplib.POP3_SSL(host,port)ifuse_ssl elsepoplib.POP3(host,port)
conn.user(usr)
conn.pass_(pwd)
print("[+] Connect to {0}:{1} successfully".format(host,port))
exceptBaseExceptionase:
exit_script("Connect to {0}:{1} failed".format(host,port),e)
# Get email message number
try:
msg_num=len(conn.list()[1])
print("[*] {0} emails found in {1}".format(msg_num,usr))
exceptBaseExceptionase:
exit_script("Can't get email number",e)
# Get email content and attachments
foriinrange(1,msg_num+1):
print("[*] Downloading email {0}/{1}".format(i,msg_num))
# Retrieve email message lines, and write to buffer
try:
msg_lines=conn.retr(i)[1]
buf=cStringIO.StringIO()
forline inmsg_lines:
print>>buf,line
buf.seek(0)
exceptBaseExceptionase:
print"[-] Retrieve email {0} failed: {1}".format(i,e)
continue
# Read buffer
try:
msg=email.message_from_file(buf)
exceptBaseExceptionase:
print"[-] Read buffer of email {0} failed: {1}".format(i,e)
continue
# Parse and save email content/attachments
try:
parse_email(msg,i)
exceptBaseExceptionase:
print("[-] Parse email {0} failed: {1}".format(i,e))
# Quit mail server
conn.quit()
defimap4(host,port,usr,pwd,use_ssl):
"""Imap4 handler
:param host: host
:param port: port
:param usr: username
:param pwd: password
:param use_ssl: True if use SSL else False
"""
# Connect to mail server
try:
conn=imaplib.IMAP4_SSL(host,port)ifuse_ssl elseimaplib.IMAP4(host,port)
conn.login(usr,pwd)
print("[+] Connect to {0}:{1} successfully".format(host,port))
exceptBaseExceptionase:
exit_script("Connect to {0}:{1} failed".format(host,port),e)
# Initial some variable
list_pattern=re.compile(r'\((?P<flags>.*?)\) "(?P<delimiter>.*)" (?P<name>.*)')
download_num=0
download_hash=[]
# Get all folders
try:
type_,folders=conn.list()
exceptBaseExceptionase:
exit_script("Get folder list failed",e)
forfolder infolders:
# Parse folder info and get folder name
try:
flags,delimiter,folder_name=list_pattern.match(folder).groups()
folder_name=folder_name.strip('"')
print"[*] Handling folder: {0}".format(folder_name)
exceptBaseExceptionase:
print"[-] Parse folder {0} failed: {1}".format(folder,e)
continue
# Select and search folder
try:
conn.select(folder_name,readonly=True)
type_,data=conn.search(None,"ALL")
exceptBaseExceptionase:
print"[-] Search folder {0} failed: {1}".format(folder_name,e)
continue
# Get email number of this folder
try:
msg_id_list=[int(i)foriindata[0].split()]
msg_num=len(msg_id_list)
print"[*] {0} emails found in {1} ({2})".format(msg_num,usr,folder_name)
exceptBaseExceptionase:
print"[-] Can't get email number of {0}: {1}".format(folder_name,e)
continue
# Get email content and attachments
foriinmsg_id_list:
print"[*] Downloading email {0}/{1}".format(i,msg_num)
# Get email message
try:
type_,data=conn.fetch(i,"(RFC822)")
msg=email.message_from_string(data[0][1])
exceptBaseExceptionase:
print"[-] Retrieve email {0} failed: {1}".format(i,e)
continue
# If message already exist, skip this message
try:
msg_md5=md5(data[0][1]).hexdigest()
ifmsg_md5 indownload_hash:
print"[-] This email has been downloaded in other folder"
continue
else:
download_hash.append(msg_md5)
download_num+=1
exceptBaseExceptionase:
print"[-] Parse message md5 failed: {0}".format(e)
continue
# Parse and save email content/attachments
try:
parse_email(msg,download_num)
exceptBaseExceptionase:
print"[-] Parse email {0} failed: {1}".format(i,e)
# Logout this account
conn.logout()
defparse_email(msg,i):
"""Parse email message and save content & attachments to file
:param msg: mail message
:param i: ordinal number
"""
globalresult_file
# Parse and save email content and attachments
forpart inmsg.walk():
ifnotpart.is_multipart():
filename=part.get_filename()
content=part.get_payload(decode=True)
iffilename:# Attachment
# Decode filename
h=email.Header.Header(filename)
dh=email.Header.decode_header(h)
filename=dh[0][0]
result_file=os.path.join(result_path,"mail{0}_attach_{1}".format(i,filename))
else:# Main content
result_file=os.path.join(result_path,"mail{0}_text".format(i))
try:
withopen(result_file,"wb")asf:
f.write(content)
exceptBaseExceptionase:
print("[-] Write file of email {0} failed: {1}".format(i,e))
if__name__=="__main__":
print("[*] Start download email script")
start_time=time.time()
mailaddr=MAILADDR
password=PASSWORD
server=SERVER
protocol=PROTOCOL
use_ssl=USE_SSL
outdir=OUTDIR
result_path=os.path.join(OUTDIR,mailaddr)
protocol=parse_protocol(protocol)
host,port=parse_server(server,use_ssl,protocol)
create_dir(result_path)
protocol_manager(protocol,host,port,mailaddr,password,use_ssl)
remove_dir(result_path)
end_time=time.time()
exec_time=end_time-start_time
print("[*] Finish download email of {0} in {1:.2f}s".format(mailaddr,exec_time))
[em_sbq] 貌似代码挺长,略复杂。。
() “/” “INBOX”
(‘OK’, [‘() “/” “INBOX”‘, ‘(\\Drafts) “/” “&g0l6P3ux-“‘, ‘(\\Sent) “/” “&XfJT0ZAB-“‘, ‘(\\Trash) “/” “&XfJSIJZk-“‘, ‘(\\Junk) “/” “&V4NXPpCuTvY-“‘, ‘() “/” “&Xn9USpCuTvY-“‘, ‘() “/” “&i6KWBZCuTvY-“‘])
[*] Handling folder: INBOX
333
[-] Search folder INBOX failed: command SEARCH illegal in state AUTH, only allowed in states SELECTED
有人怀疑163只能用邮箱大师收,不能用其他客户端收。
163 邮箱今天屏蔽了所有 “未知” 客户端的邮件收发功能
http://www.v2ex.com/t/152504
不是这样的,你是需要在邮箱客户端里面进行设置的。我的163邮箱进行设置后就能收发邮件了
端口定义中IMAP的键名写错了,应该为imap4:
DEFAULT_PORT = {
“pop3”: {False: 110, True: 995},
“imap3”: {False: 143, True: 993},
}
源代码中为imap3,使用imap收取邮件会报错的.
谢谢,确实是
有没有更高效的方式验证已下载的邮件,例如用邮件的唯一性标识,而不是抓取邮件类容用md5来验证,
可以看下IMAP4.uid方法(https://docs.python.org/2/library/imaplib.html#imaplib.IMAP4.uid)
正文有图片怎么处理? [em_sbq] [em_sbq] [em_sbq] [em_sbq] [em_sbq] [em_sbq] [em_sbq] [em_sbq]
暂无 Trackback