[Python-checkins] commit of r41650 - sandbox/trunk/seealso/make-seealso.py sandbox/trunk/seealso/parse-seealso.py
andrew.kuchling
python-checkins at python.org
Mon Dec 12 15:59:17 CET 2005
Author: andrew.kuchling
Date: Mon Dec 12 15:59:17 2005
New Revision: 41650
Modified:
sandbox/trunk/seealso/make-seealso.py
sandbox/trunk/seealso/parse-seealso.py
Log:
Parse document author, URL, and title, and include them in the generated output
Modified: sandbox/trunk/seealso/make-seealso.py
==============================================================================
--- sandbox/trunk/seealso/make-seealso.py (original)
+++ sandbox/trunk/seealso/make-seealso.py Mon Dec 12 15:59:17 2005
@@ -8,7 +8,7 @@
def main ():
if len(sys.argv) < 3:
- print 'Usage: %s database-filename example-dir'
+ print 'Usage: %s database-filename example-dir' % sys.argv[0]
sys.exit(1)
db_file = sys.argv[1]
@@ -18,7 +18,6 @@
for fn in os.listdir(example_dir):
if fn.endswith('.tex'):
p = os.path.join(example_dir, fn)
- print p
os.remove(p)
# Read dictionary
@@ -40,14 +39,30 @@
# Write file containing examples for this module
p = os.path.join(example_dir, module + '.tex')
output = open(p, 'w')
- for title, url, excerpt in examples:
+ for (url, document_title, document_url,
+ author, title, excerpt) in examples:
+
+ attribution = ""
+ if document_title:
+ attribution += ' from "%s"' % tex_escape(document_title)
+ if document_url:
+ attribution += ' (\url{%s})' % (tex_escape(document_url))
+ if author:
+ attribution += " by %s" % (tex_escape(author))
+
+ if attribution:
+ attribution = ',' + attribution
+
if excerpt is None:
- output.write("\seeurl{%s}{%s}\n" % (tex_escape(url),
- tex_escape(title)))
+ output.write("\seeurl{%s}{%s%s.}\n" % (tex_escape(url),
+ tex_escape(title),
+ attribution,
+ ))
else:
- output.write("\seeurl{%s}{%s\n\n%s}\n" % (tex_escape(url),
- tex_escape(title),
- tex_escape(excerpt)))
+ output.write("\seeurl{%s}{%s%s.\n\n%s}\n" % (tex_escape(url),
+ tex_escape(title),
+ attribution,
+ tex_escape(excerpt)))
output.close()
Modified: sandbox/trunk/seealso/parse-seealso.py
==============================================================================
--- sandbox/trunk/seealso/parse-seealso.py (original)
+++ sandbox/trunk/seealso/parse-seealso.py Mon Dec 12 15:59:17 2005
@@ -6,6 +6,9 @@
import urllib, pickle
from xml.dom import minidom
+# Dublin Core namespace
+DC_NS = 'http://purl.org/dc/elements/1.1/'
+
def main ():
if len(sys.argv) < 3:
print 'Usage: %s URL database-filename'
@@ -30,7 +33,27 @@
elif c.nodeType == c.ELEMENT_NODE:
t += get_text(c)
return t
-
+
+ # Get document title and author
+ author = document_title = document_url = None
+
+ document_title_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'title'))
+ document_url_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'identifier'))
+ author_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'creator'))
+
+ assert len(document_title_nodes) <= 1
+ assert len(document_url_nodes) <= 1
+ assert len(author_nodes) <= 1
+
+ if document_title_nodes:
+ document_title = get_text(document_title_nodes[0])
+ if author_nodes:
+ author = get_text(author_nodes[0])
+ if document_url_nodes:
+ document_url = get_text(document_url_nodes[0])
+
+
+ # Loop over items
for item in dom.getElementsByTagNameNS(None, 'item'):
href = item.getAttributeNS(None, 'href')
title_node = item.getElementsByTagNameNS(None, 'title')[0]
@@ -47,7 +70,8 @@
target_nodes = item.getElementsByTagNameNS(None, 'target')
for t in target_nodes:
target = get_text(t)
- L.append((target, title, href, excerpt))
+ L.append((target, href, document_title, document_url, author,
+ title, excerpt))
@@ -61,18 +85,20 @@
input = open(db_file, 'rb')
db = pickle.load(input)
input.close()
- for module, title, url, excerpt in L:
+ for entry in L:
# Check if URL is already listed; if yes, delete the old entry
- exlist = db.setdefault(module, [])
- exlist = [t for t in exlist if t[1] != url]
+ module = entry[0]
+ url = entry[1]
+ exlist = db.get(module, [])
+ exlist = [t for t in exlist if t[0] != url]
# Append to list
- exlist.append((title, url, excerpt))
+ exlist.append(entry[1:])
# Reinsert (since the list-comp created a new list)
db[module] = exlist
-
- ##import pprint ; pprint.pprint(db)
+
+ import pprint ; pprint.pprint(db)
output = open(db_file, 'wb')
pickle.dump(db, output)
output.close()
More information about the Python-checkins
mailing list