src/fc2rss/fc2rss.py

"""fc2rss
Screen scrape an OU FirstClass conference or two into RSS files.
Avoid the appalling FC interface!
Stuart Langridge, http://www.kryogenix.org/
v1.0 2003年10月11日
"""
import urllib,re,urlparse,os,cgi
# Required variables
OU_USERNAME = 'YOUR_OU_USERNAME'
OU_PASSWORD = 'YOUR_OU_PASSWORD'
CONFERENCES = [
 'http://oufcnt2.open.ac.uk/Login/0007584D-80000001/', # m874
 'http://oufcnt2.open.ac.uk/Login/00075E75-80000001/' # m879
]
RSS_PATH = '/home/httpd/test/ou/rss'
debugmode = 0
reTitle = re.compile(r'^\s*<title>(?P<title>[^<]+)</title>\s*$')
reItem = re.compile(r'\s*leaf\[\d+\]=new Array\(\d+,\d+,\d+,\d+,"(?P<id>[^"]+)", "(?P<author>[^"]+)"\);\s*$')
reDiv = re.compile(r'\s*<div ')
reEndDiv = re.compile(r'\s*</div>')
reHTML = re.compile('<[^>]+>')
ws = re.compile(r'[\x0b\x0c\n\r]')
# Handle required authentication
class myOpener(urllib.FancyURLopener):
 def prompt_user_passwd(self,host,realm):
 return (OU_USERNAME,OU_PASSWORD)
urllib._urlopener = myOpener()
def parseConf(conf):
 title = ''
 items = []
 fp = urllib.urlopen(conf)
 data = ws.split(fp.read())
 for line in data:
 t = reTitle.match(line)
 if t: title = t.groupdict()['title']
 if t and debugmode: print '---%s---' % title
 i = reItem.match(line)
 if i:
 title2 = ''
 text2 = ''
 capture = 0
 iid = i.groupdict()['id']
 iauth = i.groupdict()['author']
 itemuri = urlparse.urljoin(conf,iid)
 fp2 = urllib.urlopen(itemuri)
 data2 = ws.split(fp2.read())
 for line2 in data2:
 t2 = reTitle.match(line2)
 if t2: title2 = t2.groupdict()['title']
 if t2 and debugmode: print ' %s' % title2
 divs = reDiv.match(line2)
 if divs: capture = 1
 if capture: text2 += line2
 dive = reEndDiv.search(line2)
 if dive: capture = 0
 fp2.close()
 text2 = re.sub('<br>','\n',text2)
 text2 = reHTML.sub('',text2)
 text2 = re.sub('\n','<br>',text2)
 text2 = re.sub('\x00','',text2)
 items.append((iid,iauth,itemuri,title2,text2))
 fp.close()
 return title,items
# Walk the conferences
confdata = {}
for conf in CONFERENCES:
 title,items = parseConf(conf)
 if title and items: confdata[title] = (conf,items)
for conf in confdata.keys():
 fp = open(os.path.join(RSS_PATH,conf.replace(' ','_')) + '.xml','w')
 url,items = confdata[conf]
 fp.write('''<rss version="2.0">
 <channel>
 <title>%s</title>
 <link>%s</link>
 <description>%s</description>
''' % (conf,url,conf))
 for item in items:
 iid,auth,uri,title,text = item
 fp.write(''' <item>
 <author>%s</author>
 <description>%s</description>
 <link>%s</link>
 <title>%s</title>
 </item>''' % (auth,cgi.escape(text),uri,title))
 fp.write(' </channel>\n</rss>\n')

AltStyle によって変換されたページ (->オリジナル) /