[Python-checkins] commit of r41650 - sandbox/trunk/seealso/make-seealso.py sandbox/trunk/seealso/parse-seealso.py

Mon Dec 12 15:59:17 CET 2005

Author: andrew.kuchling
Date: Mon Dec 12 15:59:17 2005
New Revision: 41650
Modified:
 sandbox/trunk/seealso/make-seealso.py
 sandbox/trunk/seealso/parse-seealso.py
Log:
Parse document author, URL, and title, and include them in the generated output
Modified: sandbox/trunk/seealso/make-seealso.py
==============================================================================

--- sandbox/trunk/seealso/make-seealso.py	(original)
+++ sandbox/trunk/seealso/make-seealso.py	Mon Dec 12 15:59:17 2005
@@ -8,7 +8,7 @@
 
 def main ():
 if len(sys.argv) < 3:
- print 'Usage: %s database-filename example-dir'
+ print 'Usage: %s database-filename example-dir' % sys.argv[0]
 sys.exit(1)
 
 db_file = sys.argv[1]
@@ -18,7 +18,6 @@
 for fn in os.listdir(example_dir):
 	if fn.endswith('.tex'):
 	 p = os.path.join(example_dir, fn)
-	 print p
 	 os.remove(p)
 	 
 # Read dictionary
@@ -40,14 +39,30 @@
 	# Write file containing examples for this module
 	p = os.path.join(example_dir, module + '.tex')
 	output = open(p, 'w')
-	for title, url, excerpt in examples:
+	for (url, document_title, document_url,
+ author, title, excerpt) in examples:
+
+ attribution = ""
+ if document_title:
+ attribution += ' from "%s"' % tex_escape(document_title)
+ if document_url:
+ attribution += ' (\url{%s})' % (tex_escape(document_url))
+ if author:
+ attribution += " by %s" % (tex_escape(author))
+
+ if attribution:
+ attribution = ',' + attribution
+ 
 	 if excerpt is None:
-		output.write("\seeurl{%s}{%s}\n" % (tex_escape(url),
-		 tex_escape(title)))
+		output.write("\seeurl{%s}{%s%s.}\n" % (tex_escape(url),
+ tex_escape(title),
+ attribution,
+ ))
 	 else:
-		output.write("\seeurl{%s}{%s\n\n%s}\n" % (tex_escape(url),
-		 tex_escape(title),
-						 tex_escape(excerpt)))
+		output.write("\seeurl{%s}{%s%s.\n\n%s}\n" % (tex_escape(url),
+ tex_escape(title),
+ attribution,
+ tex_escape(excerpt)))
 		
 
 	output.close()
Modified: sandbox/trunk/seealso/parse-seealso.py
==============================================================================
--- sandbox/trunk/seealso/parse-seealso.py	(original)
+++ sandbox/trunk/seealso/parse-seealso.py	Mon Dec 12 15:59:17 2005
@@ -6,6 +6,9 @@
 import urllib, pickle
 from xml.dom import minidom
 
+# Dublin Core namespace
+DC_NS = 'http://purl.org/dc/elements/1.1/'
+
 def main ():
 if len(sys.argv) < 3:
 print 'Usage: %s URL database-filename'
@@ -30,7 +33,27 @@
 elif c.nodeType == c.ELEMENT_NODE:
 t += get_text(c)
 return t
- 
+
+ # Get document title and author
+ author = document_title = document_url = None
+
+ document_title_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'title'))
+ document_url_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'identifier'))
+ author_nodes = list(dom.getElementsByTagNameNS(DC_NS, 'creator'))
+
+ assert len(document_title_nodes) <= 1
+ assert len(document_url_nodes) <= 1
+ assert len(author_nodes) <= 1
+
+ if document_title_nodes:
+ document_title = get_text(document_title_nodes[0])
+ if author_nodes:
+ author = get_text(author_nodes[0])
+ if document_url_nodes:
+ document_url = get_text(document_url_nodes[0])
+ 
+ 
+ # Loop over items
 for item in dom.getElementsByTagNameNS(None, 'item'):
 href = item.getAttributeNS(None, 'href')
 title_node = item.getElementsByTagNameNS(None, 'title')[0]
@@ -47,7 +70,8 @@
 target_nodes = item.getElementsByTagNameNS(None, 'target')
 for t in target_nodes:
 target = get_text(t)
- L.append((target, title, href, excerpt))
+ L.append((target, href, document_title, document_url, author,
+ title, excerpt))
 
 
 
@@ -61,18 +85,20 @@
 input = open(db_file, 'rb')
 db = pickle.load(input)
 input.close()
- for module, title, url, excerpt in L:
+ for entry in L:
 # Check if URL is already listed; if yes, delete the old entry
- exlist = db.setdefault(module, [])
- exlist = [t for t in exlist if t[1] != url]
+ module = entry[0]
+ url = entry[1]
+ exlist = db.get(module, [])
+ exlist = [t for t in exlist if t[0] != url]
 
 # Append to list
- exlist.append((title, url, excerpt))
+ exlist.append(entry[1:])
 
 # Reinsert (since the list-comp created a new list)
 db[module] = exlist
- 
- ##import pprint ; pprint.pprint(db)
+ 
+ import pprint ; pprint.pprint(db)
 output = open(db_file, 'wb')
 pickle.dump(db, output)
 output.close()