#!/usr/bin/python2 # Thomas Heller, 2002年04月04日 # # A script to look up keywords in the Python manuals index # # $Id: pyhelp.py,v 1.9 2002年04月05日 19:45:14 thomas Exp $ # # $Log: pyhelp.py,v $ # Revision 1.9 2002年04月05日 19:45:14 thomas # More text in the CGI HTML page. # # Revision 1.5 2002年04月05日 19:33:19 thomas # More HTML code. # # Revision 1.4 2002年04月05日 16:15:07 thomas # Valid HTML 4.01. charset is utf-8, is this correct? # # Revision 1.3 2002年04月05日 14:51:26 thomas # Now also works as CGI script (has even been tested on starship) # # Revision 1.2 2002年04月05日 14:49:34 thomas # Pickles the found links to disk instead of downloading the index pages # everytime. # # Revision 1.1 2002年04月05日 14:48:15 thomas # First version, posted to python-dev asking for comments. # import htmllib, formatter, re import urllib, webbrowser import sys, os if __name__ == '__main__': __file__ = sys.argv[0] __version__ = "$Revision: 1.9 $"[11:-2] DOCMAP = { "2.0": "http://www.python.org/doc/2.0/", "2.1": "http://www.python.org/doc/2.1/", "2.2": "http://www.python.org/doc/2.2/", "devel": "http://www.python.org/dev/doc/devel/", "current": "http://www.python.org/doc/current/", # Can alternatively use local documentation! ## "2.2": "file:c:/python22/doc/", ## "2.1": "file:c:/python21/doc/", ## "2.0": "file:c:/python20/doc/", } INDEXPAGE = "genindex.html" # XXX Only valid for 2.0 and above SECTIONS = "api/ ref/ lib/".split() # modified from an example in the eff-bot guide to the Python Library... class Parser(htmllib.HTMLParser): def __init__(self, url, verbose=0): self.anchors = {} f = formatter.NullFormatter() htmllib.HTMLParser.__init__(self, f, verbose) self.last_text = "" self.url = url def anchor_bgn(self, href, name, type): self.save_bgn() self.anchor = self.url + href def anchor_end(self): text = self.save_end().strip() if text == "[Link]" and self.last_text: text = self.last_text if self.anchor and text: self.anchors[text] = self.anchors.get(text, []) + [self.anchor] self.last_text = text def get_anchors(version, rebuild): # returns a list of (topic, url) pairs # if rebuild is true, the index is rebuilt # if rebuild is false, the index is rebuild if not present import cPickle baseurl = DOCMAP[version] pathname = baseurl for char in ":/\\": pathname = pathname.replace(char, "-") pathname = pathname + version + ".index" if not rebuild: try: file = open(pathname, "rb") data = cPickle.load(file) return data except (IOError, cPickle.PickleError): pass a = [] for sec in SECTIONS: print "Downloading", baseurl + sec file = urllib.urlopen(baseurl + sec + INDEXPAGE) html = file.read() file.close() print "Parsing", baseurl + sec p = Parser(baseurl + sec) p.feed(html) p.close() a.extend(p.anchors.items()) try: file = open(pathname, "wb") except IOError, detail: print detail print os.path.abspath("index" + version) else: cPickle.dump(a, file, 1) return a def find_topics(topic, version, regexp, rebuild): v = [] if regexp: pat = re.compile(topic) for key, urls in get_anchors(version, rebuild): if pat.match(key): for url in urls: v.append((key, url)) else: for key, urls in get_anchors(version, rebuild): if key.startswith(topic): for url in urls: v.append((key, url)) v.sort() return v def get_tempdir(): import tempfile tempfile.mktemp() return tempfile.tempdir def help(topic, version="2.2", regexp=0, rebuild=0): baseurl = DOCMAP[version] v = find_topics(topic, version, regexp, rebuild) if len(v) == 0: print "Not found" elif len(v) == 1: # only one topic found, display directly webbrowser.open(v[0][1]) else: # create a temporary HTML page displaying links to the # search results. Unfortunately the file cannot be deleted, # because it may still be needed by the browser. path = os.path.join(get_tempdir(), "pyhelp-results.html") print path file = open(path, "w") file.write("

\n") if regexp: file.write("

Search results for '%s':

\n" % topic) else: file.write("

Regexp search results for '%s':

\n" % topic) for topic, url in v: file.write("%s
\n" % (url, topic)) file.write("

\n") file.close() webbrowser.open(path) def cgi_help(): import cgi, cgitb cgitb.enable() form = cgi.FieldStorage() print "Content-type: text/html; charset=utf-8" print print '' print " Search Python Manual" print "" version = "2.2" if form.has_key("version"): version = form["version"].value regexp = 0 if form.has_key("regexp"): regexp = form["regexp"].value if form.has_key("keyword"): baseurl = DOCMAP[version] v = find_topics(form["keyword"].value, version=version, regexp=regexp, rebuild=0) print "

" topic = form["keyword"].value print "

%d search results for '%s':

" % (len(v), topic) for topic, url in v: print '%s
\n' % (url, topic) print '''

This script looks up keywords in the Python Library Reference , Language Reference , and Python/C API manuals.
For full text searches better consult the usual web search engines. ''' print '''

Valid HTML 4.01! Powered by Python ''' print '''
%s, version %s
Please send any comments/suggestions to Thomas Heller. ''' % (os.path.basename(__file__), __version__) print "" def main(): os.chdir(os.path.dirname(os.path.abspath(sys.argv[0]))) if os.environ.has_key("SCRIPT_NAME"): cgi_help() sys.exit() import getopt try: opts, args = getopt.getopt(sys.argv[1:], "v:rb", ["version=", "useregexp", "rebuild"]) except getopt.GetoptError: print "Usage: %s [-b] [-r] [-v version] topic" % sys.argv[0] sys.exit(1) version = "2.2" regexp = 0 rebuild = 0 for o, a in opts: if o in ("-v", "--version"): if a not in DOCMAP.keys(): print "version must be one of %s" % ", ".join(DOCMAP.keys()) sys.exit(1) version = a if o in ("-r", "--useregexpg"): regexp = 1 if o in ("-b", "--rebuild"): rebuild = 1 if len(args) != 1: print "Usage: %s [-r] [-v version] topic" % sys.argv[0] sys.exit(1) os.chdir(os.path.dirname(os.path.abspath(sys.argv[0]))) help(args[0], version, regexp, rebuild) if __name__ == '__main__': main() # -- EOF --