#!/usr/bin/python2 # Thomas Heller, 2002年04月04日 # # A script to look up keywords in the Python manuals index # # $Id: pyhelp.py,v 1.9 2002年04月05日 19:45:14 thomas Exp $ # # $Log: pyhelp.py,v $ # Revision 1.9 2002年04月05日 19:45:14 thomas # More text in the CGI HTML page. # # Revision 1.5 2002年04月05日 19:33:19 thomas # More HTML code. # # Revision 1.4 2002年04月05日 16:15:07 thomas # Valid HTML 4.01. charset is utf-8, is this correct? # # Revision 1.3 2002年04月05日 14:51:26 thomas # Now also works as CGI script (has even been tested on starship) # # Revision 1.2 2002年04月05日 14:49:34 thomas # Pickles the found links to disk instead of downloading the index pages # everytime. # # Revision 1.1 2002年04月05日 14:48:15 thomas # First version, posted to python-dev asking for comments. # import htmllib, formatter, re import urllib, webbrowser import sys, os if __name__ == '__main__': __file__ = sys.argv[0] __version__ = "$Revision: 1.9 $"[11:-2] DOCMAP = { "2.0": "http://www.python.org/doc/2.0/", "2.1": "http://www.python.org/doc/2.1/", "2.2": "http://www.python.org/doc/2.2/", "devel": "http://www.python.org/dev/doc/devel/", "current": "http://www.python.org/doc/current/", # Can alternatively use local documentation! ## "2.2": "file:c:/python22/doc/", ## "2.1": "file:c:/python21/doc/", ## "2.0": "file:c:/python20/doc/", } INDEXPAGE = "genindex.html" # XXX Only valid for 2.0 and above SECTIONS = "api/ ref/ lib/".split() # modified from an example in the eff-bot guide to the Python Library... class Parser(htmllib.HTMLParser): def __init__(self, url, verbose=0): self.anchors = {} f = formatter.NullFormatter() htmllib.HTMLParser.__init__(self, f, verbose) self.last_text = "" self.url = url def anchor_bgn(self, href, name, type): self.save_bgn() self.anchor = self.url + href def anchor_end(self): text = self.save_end().strip() if text == "[Link]" and self.last_text: text = self.last_text if self.anchor and text: self.anchors[text] = self.anchors.get(text, []) + [self.anchor] self.last_text = text def get_anchors(version, rebuild): # returns a list of (topic, url) pairs # if rebuild is true, the index is rebuilt # if rebuild is false, the index is rebuild if not present import cPickle baseurl = DOCMAP[version] pathname = baseurl for char in ":/\\": pathname = pathname.replace(char, "-") pathname = pathname + version + ".index" if not rebuild: try: file = open(pathname, "rb") data = cPickle.load(file) return data except (IOError, cPickle.PickleError): pass a = [] for sec in SECTIONS: print "Downloading", baseurl + sec file = urllib.urlopen(baseurl + sec + INDEXPAGE) html = file.read() file.close() print "Parsing", baseurl + sec p = Parser(baseurl + sec) p.feed(html) p.close() a.extend(p.anchors.items()) try: file = open(pathname, "wb") except IOError, detail: print detail print os.path.abspath("index" + version) else: cPickle.dump(a, file, 1) return a def find_topics(topic, version, regexp, rebuild): v = [] if regexp: pat = re.compile(topic) for key, urls in get_anchors(version, rebuild): if pat.match(key): for url in urls: v.append((key, url)) else: for key, urls in get_anchors(version, rebuild): if key.startswith(topic): for url in urls: v.append((key, url)) v.sort() return v def get_tempdir(): import tempfile tempfile.mktemp() return tempfile.tempdir def help(topic, version="2.2", regexp=0, rebuild=0): baseurl = DOCMAP[version] v = find_topics(topic, version, regexp, rebuild) if len(v) == 0: print "Not found" elif len(v) == 1: # only one topic found, display directly webbrowser.open(v[0][1]) else: # create a temporary HTML page displaying links to the # search results. Unfortunately the file cannot be deleted, # because it may still be needed by the browser. path = os.path.join(get_tempdir(), "pyhelp-results.html") print path file = open(path, "w") file.write("
%d search results for '%s':
" % (len(v), topic) for topic, url in v: print '%s