2001年7月06日 10:08:51 -0700

Update of /cvsroot/python/python/dist/src/Tools/scripts
In directory usw-pr-cvs1:/tmp/cvs-serv3084
Added Files:
 Tag: descr-branch
	dutree.doc dutree.py eptags.py findlinksto.py fixcid.py 
	fixheader.py fixnotice.py fixps.py ftpmirror.py gencodec.py 
	h2py.py ifdef.py lfcr.py linktree.py lll.py logmerge.py 
	mailerdaemon.py md5sum.py methfix.py mkreal.py ndiff.py 
	nm2def.py objgraph.py parseentities.py pathfix.py pdeps.py 
	pindent.py ptags.py pydoc.pyw redemo.py reindent.py rgrep.py 
	suff.py sum5.py texi2html.py trace.py treesync.py untabify.py 
	which.py xxci.py 
Log Message:
Adding "the rest" of Tools/scripts to descr-branch.
--- NEW FILE: dutree.doc ---
Path: cwi.nl!sun4nl!mcsun!uunet!cs.utexas.edu!convex!usenet
From: tchrist@convex.COM (Tom Christiansen)
Newsgroups: comp.lang.perl
Subject: Re: The problems of Perl (Re: Question (silly?))
Message-ID: <1992Jan17.053115.4220@convex.com>
Date: 17 Jan 92 05:31:15 GMT
References: <17458@ector.cs.purdue.edu> <1992Jan16.165347.25583@cherokee.uswest.com> <=#Hues+4@cs.psu.edu>
Sender: usenet@convex.com (news access account)
Reply-To: tchrist@convex.COM (Tom Christiansen)
Organization: CONVEX Realtime Development, Colorado Springs, CO
Lines: 83
Nntp-Posting-Host: pixel.convex.com
>From the keyboard of flee@cs.psu.edu (Felix Lee):
:And Perl is definitely awkward with data types. I haven't yet found a
:pleasant way of shoving non-trivial data types into Perl's grammar.

Yes, it's pretty aweful at that, alright. Sometimes I write perl programs
that need them, and sometimes it just takes a little creativity. But
sometimes it's not worth it. I actually wrote a C program the other day
(gasp) because I didn't want to deal with a game matrix with six links per node.
:Here's a very simple problem that's tricky to express in Perl: process
:the output of "du" to produce output that's indented to reflect the
:tree structure, and with each subtree sorted by size. Something like:
: 434 /etc
: | 344 .
: | 50 install
: | 35 uucp
: | 3 nserve
: | | 2 .
: | | 1 auth.info
: | 1 sm
: | 1 sm.bak

At first I thought I could just keep one local list around
at once, but this seems inherently recursive. Which means 
I need an real recursive data structure. Maybe you could
do it with one of the %assoc arrays Larry uses in the begat
programs, but I broke down and got dirty. I think the hardest
part was matching Felix's desired output exactly. It's not 
blazingly fast: I should probably inline the &childof routine,
but it *was* faster to write than I could have written the 
equivalent C program.
--tom
--
"GUIs normally make it simple to accomplish simple actions and impossible
to accomplish complex actions." --Doug Gwyn (22/Jun/91 in comp.unix.wizards)
 Tom Christiansen tchrist@convex.com convex!tchrist
--- NEW FILE: dutree.py ---
#! /usr/bin/env python
# Format du output in a tree shape
import os, string, sys, errno
def main():
 p = os.popen('du ' + string.join(sys.argv[1:]), 'r')
 total, d = None, {}
 for line in p.readlines():
 i = 0
 while line[i] in '0123456789': i = i+1
 size = eval(line[:i])
 while line[i] in ' \t': i = i+1
 file = line[i:-1]
 comps = string.splitfields(file, '/')
 if comps[0] == '': comps[0] = '/'
 if comps[len(comps)-1] == '': del comps[len(comps)-1]
 total, d = store(size, comps, total, d)
 try:
 display(total, d)
 except IOError, e:
 if e.errno != errno.EPIPE:
 raise
def store(size, comps, total, d):
 if comps == []:
 return size, d
 if not d.has_key(comps[0]):
 d[comps[0]] = None, {}
 t1, d1 = d[comps[0]]
 d[comps[0]] = store(size, comps[1:], t1, d1)
 return total, d
def display(total, d):
 show(total, d, '')
def show(total, d, prefix):
 if not d: return
 list = []
 sum = 0
 for key in d.keys():
 tsub, dsub = d[key]
 list.append((tsub, key))
 if tsub is not None: sum = sum + tsub
## if sum < total:
## list.append((total - sum, os.curdir))
 list.sort()
 list.reverse()
 width = len(`list[0][0]`)
 for tsub, key in list:
 if tsub is None:
 psub = prefix
 else:
 print prefix + string.rjust(`tsub`, width) + ' ' + key
 psub = prefix + ' '*(width-1) + '|' + ' '*(len(key)+1)
 if d.has_key(key):
 show(tsub, d[key][1], psub)
main()
--- NEW FILE: eptags.py ---
#! /usr/bin/env python
"""Create a TAGS file for Python programs, usable with GNU Emacs.
usage: eptags pyfiles...
The output TAGS file is usable with Emacs version 18, 19, 20.
Tagged are:
 - functions (even inside other defs or classes)
 - classes
eptags warns about files it cannot open.
eptags will not give warnings about duplicate tags.
BUGS:
 Because of tag duplication (methods with the same name in different
 classes), TAGS files are not very useful for most object-oriented
 python projects.
"""
import sys,re
expr = r'^[ \t]*(def|class)[ \t]+([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*[:\(]'
matcher = re.compile(expr)
def treat_file(file, outfp):
 """Append tags found in file named 'file' to the open file 'outfp'"""
 try:
 fp = open(file, 'r')
 except:
 sys.stderr.write('Cannot open %s\n'%file)
 return
 charno = 0
 lineno = 0
 tags = []
 size = 0
 while 1:
 line = fp.readline()
 if not line:
 break
 lineno = lineno + 1
 m = matcher.search(line)
 if m:
 tag = m.group(0) + '177円%d,%d\n'%(lineno,charno)
 tags.append(tag)
 size = size + len(tag)
 charno = charno + len(line)
 outfp.write('\f\n%s,%d\n'%(file,size))
 for tag in tags:
 outfp.write(tag)
def main():
 outfp = open('TAGS', 'w')
 for file in sys.argv[1:]:
 treat_file(file, outfp)
if __name__=="__main__":
 main()
--- NEW FILE: findlinksto.py ---
#! /usr/bin/env python
# findlinksto
#
# find symbolic links to a path matching a regular expression
import os
import sys
import regex
import getopt
def main():
 try:
 opts, args = getopt.getopt(sys.argv[1:], '')
 if len(args) < 2:
 raise getopt.error, 'not enough arguments'
 except getopt.error, msg:
 sys.stdout = sys.stderr
 print msg
 print 'usage: findlinksto pattern directory ...'
 sys.exit(2)
 pat, dirs = args[0], args[1:]
 prog = regex.compile(pat)
 for dirname in dirs:
 os.path.walk(dirname, visit, prog)
def visit(prog, dirname, names):
 if os.path.islink(dirname):
 names[:] = []
 return
 if os.path.ismount(dirname):
 print 'descend into', dirname
 for name in names:
 name = os.path.join(dirname, name)
 try:
 linkto = os.readlink(name)
 if prog.search(linkto) >= 0:
 print name, '->', linkto
 except os.error:
 pass
main()
--- NEW FILE: fixcid.py ---
#! /usr/bin/env python
# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (off by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c). The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.
import sys
import regex
import string
import os
from stat import *
import getopt
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
def usage():
 progname = sys.argv[0]
 err('Usage: ' + progname +
 ' [-c] [-r] [-s file] ... file-or-directory ...\n')
 err('\n')
 err('-c : substitute inside comments\n')
 err('-r : reverse direction for following -s options\n')
 err('-s substfile : add a file of substitutions\n')
 err('\n')
 err('Each non-empty non-comment line in a substitution file must\n')
 err('contain exactly two words: an identifier and its replacement.\n')
 err('Comments start with a # character and end at end of line.\n')
 err('If an identifier is preceded with a *, it is not substituted\n')
 err('inside a comment even when -c is specified.\n')
def main():
 try:
 opts, args = getopt.getopt(sys.argv[1:], 'crs:')
 except getopt.error, msg:
 err('Options error: ' + str(msg) + '\n')
 usage()
 sys.exit(2)
 bad = 0
 if not args: # No arguments
 usage()
 sys.exit(2)
 for opt, arg in opts:
 if opt == '-c':
 setdocomments()
 if opt == '-r':
 setreverse()
 if opt == '-s':
 addsubst(arg)
 for arg in args:
 if os.path.isdir(arg):
 if recursedown(arg): bad = 1
 elif os.path.islink(arg):
 err(arg + ': will not process symbolic links\n')
 bad = 1
 else:
 if fix(arg): bad = 1
 sys.exit(bad)
# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
 return regex.match(Wanted, name) >= 0
def recursedown(dirname):
 dbg('recursedown(' + `dirname` + ')\n')
 bad = 0
 try:
 names = os.listdir(dirname)
 except os.error, msg:
 err(dirname + ': cannot list directory: ' + str(msg) + '\n')
 return 1
 names.sort()
 subdirs = []
 for name in names:
 if name in (os.curdir, os.pardir): continue
 fullname = os.path.join(dirname, name)
 if os.path.islink(fullname): pass
 elif os.path.isdir(fullname):
 subdirs.append(fullname)
 elif wanted(name):
 if fix(fullname): bad = 1
 for fullname in subdirs:
 if recursedown(fullname): bad = 1
 return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
 if filename == '-':
 # Filter mode
 f = sys.stdin
 g = sys.stdout
 else:
 # File replacement mode
 try:
 f = open(filename, 'r')
 except IOError, msg:
 err(filename + ': cannot open: ' + str(msg) + '\n')
 return 1
 head, tail = os.path.split(filename)
 tempname = os.path.join(head, '@' + tail)
 g = None
 # If we find a match, we rewind the file and start over but
 # now copy everything to a temp file.
 lineno = 0
 initfixline()
 while 1:
 line = f.readline()
 if not line: break
 lineno = lineno + 1
 while line[-2:] == '\\\n':
 nextline = f.readline()
 if not nextline: break
 line = line + nextline
 lineno = lineno + 1
 newline = fixline(line)
 if newline != line:
 if g is None:
 try:
 g = open(tempname, 'w')
 except IOError, msg:
 f.close()
 err(tempname+': cannot create: '+
 str(msg)+'\n')
 return 1
 f.seek(0)
 lineno = 0
 initfixline()
 rep(filename + ':\n')
 continue # restart from the beginning
 rep(`lineno` + '\n')
 rep('< ' + line)
 rep('> ' + newline)
 if g is not None:
 g.write(newline)
 # End of file
 if filename == '-': return 0 # Done in filter mode
 f.close()
 if not g: return 0 # No changes
 # Finishing touch -- move files
 # First copy the file's mode to the temp file
 try:
 statbuf = os.stat(filename)
 os.chmod(tempname, statbuf[ST_MODE] & 07777)
 except os.error, msg:
 err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
 # Then make a backup of the original file as filename~
 try:
 os.rename(filename, filename + '~')
 except os.error, msg:
 err(filename + ': warning: backup failed (' + str(msg) + ')\n')
 # Now move the temp file to the original file
 try:
 os.rename(tempname, filename)
 except os.error, msg:
 err(filename + ': rename failed (' + str(msg) + ')\n')
 return 1
 # Return succes
 return 0
# Tokenizing ANSI C (partly)
Identifier = '\(struct \)?[a-zA-Z_][a-zA-Z0-9_]+'
String = '"\([^\n\\"]\|\\\\.\)*"'
Char = '\'\([^\n\\\']\|\\\\.\)*\''
CommentStart = '/\*'
CommentEnd = '\*/'
Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '\|' + Expfloat
Number = Floatnumber + '\|' + Intnumber
# Anything else is an operator -- don't list this explicitly because of '/*'
OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '\(' + string.joinfields(OutsideComment, '\|') + '\)'
OutsideCommentProgram = regex.compile(OutsideCommentPattern)
InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '\(' + string.joinfields(InsideComment, '\|') + '\)'
InsideCommentProgram = regex.compile(InsideCommentPattern)
def initfixline():
 global Program
 Program = OutsideCommentProgram
def fixline(line):
 global Program
## print '-->', `line`
 i = 0
 while i < len(line):
 i = Program.search(line, i)
 if i < 0: break
 found = Program.group(0)
## if Program is InsideCommentProgram: print '...',
## else: print ' ',
## print found
 if len(found) == 2:
 if found == '/*':
 Program = InsideCommentProgram
 elif found == '*/':
 Program = OutsideCommentProgram
 n = len(found)
 if Dict.has_key(found):
 subst = Dict[found]
 if Program is InsideCommentProgram:
 if not Docomments:
 print 'Found in comment:', found
 i = i + n
 continue
 if NotInComment.has_key(found):
## print 'Ignored in comment:',
## print found, '-->', subst
## print 'Line:', line,
 subst = found
## else:
## print 'Substituting in comment:',
## print found, '-->', subst
## print 'Line:', line,
 line = line[:i] + subst + line[i+n:]
 n = len(subst)
 i = i + n
 return line
Docomments = 0
def setdocomments():
 global Docomments
 Docomments = 1
Reverse = 0
def setreverse():
 global Reverse
 Reverse = (not Reverse)
Dict = {}
NotInComment = {}
def addsubst(substfile):
 try:
 fp = open(substfile, 'r')
 except IOError, msg:
 err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
 sys.exit(1)
 lineno = 0
 while 1:
 line = fp.readline()
 if not line: break
 lineno = lineno + 1
 try:
 i = string.index(line, '#')
 except string.index_error:
 i = -1 # Happens to delete trailing \n
 words = string.split(line[:i])
 if not words: continue
 if len(words) == 3 and words[0] == 'struct':
 words[:2] = [words[0] + ' ' + words[1]]
 elif len(words) <> 2:
 err(substfile + ':' + `lineno` +
 ': warning: bad line: ' + line)
 continue
 if Reverse:
 [value, key] = words
 else:
 [key, value] = words
 if value[0] == '*':
 value = value[1:]
 if key[0] == '*':
 key = key[1:]
 NotInComment[key] = value
 if Dict.has_key(key):
 err(substfile + ':' + `lineno` +
 ': warning: overriding: ' +
 key + ' ' + value + '\n')
 err(substfile + ':' + `lineno` +
 ': warning: previous: ' + Dict[key] + '\n')
 Dict[key] = value
 fp.close()
main()
--- NEW FILE: fixheader.py ---
#! /usr/bin/env python
# Add some standard cpp magic to a header file
import sys
import string
def main():
 args = sys.argv[1:]
 for file in args:
 process(file)
def process(file):
 try:
 f = open(file, 'r')
 except IOError, msg:
 sys.stderr.write('%s: can\'t open: %s\n' % (file, str(msg)))
 return
 data = f.read()
 f.close()
 if data[:2] <> '/*':
 sys.stderr.write('%s does not begin with C comment\n' % file)
 return
 try:
 f = open(file, 'w')
 except IOError, msg:
 sys.stderr.write('%s: can\'t write: %s\n' % (file, str(msg)))
 return
 sys.stderr.write('Processing %s ...\n' % file)
 magic = 'Py_'
 for c in file:
 if c in string.letters + string.digits:
 magic = magic + string.upper(c)
 else: magic = magic + '_'
 sys.stdout = f
 print '#ifndef', magic
 print '#define', magic
 print '#ifdef __cplusplus'
 print 'extern "C" {'
 print '#endif'
 print
 f.write(data)
 print
 print '#ifdef __cplusplus'
 print '}'
 print '#endif'
 print '#endif /*', '!'+magic, '*/'
main()
--- NEW FILE: fixnotice.py ---
#! /usr/bin/env python
OLD_NOTICE = """/***********************************************************
Copyright (c) 2000, BeOpen.com.
Copyright (c) 1995-2000, Corporation for National Research Initiatives.
Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
All rights reserved.
See the file "Misc/COPYRIGHT" for information on usage and
redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
******************************************************************/
"""
NEW_NOTICE = ""
# " <-- Help Emacs
import os, sys, string
def main():
 args = sys.argv[1:]
 if not args:
 print "No arguments."
 for arg in args:
 process(arg)
def process(arg):
 f = open(arg)
 data = f.read()
 f.close()
 i = string.find(data, OLD_NOTICE)
 if i < 0:
## print "No old notice in", arg
 return
 data = data[:i] + NEW_NOTICE + data[i+len(OLD_NOTICE):]
 new = arg + ".new"
 backup = arg + ".bak"
 print "Replacing notice in", arg, "...",
 sys.stdout.flush()
 f = open(new, "w")
 f.write(data)
 f.close()
 os.rename(arg, backup)
 os.rename(new, arg)
 print "done"
if __name__ == '__main__':
 main()
--- NEW FILE: fixps.py ---
#!/usr/bin/env python
# Fix Python script(s) to reference the interpreter via /usr/bin/env python.
# Warning: this overwrites the file without making a backup.
import sys
import re
def main():
 for file in sys.argv[1:]:
 try:
 f = open(file, 'r')
 except IOError, msg:
 print file, ': can\'t open :', msg
 continue
 line = f.readline()
 if not re.match('^#! */usr/local/bin/python', line):
 print file, ': not a /usr/local/bin/python script'
 f.close()
 continue
 rest = f.read()
 f.close()
 line = re.sub('/usr/local/bin/python',
 '/usr/bin/env python', line)
 print file, ':', `line`
 f = open(file, "w")
 f.write(line)
 f.write(rest)
 f.close()
main()
--- NEW FILE: ftpmirror.py ---
#! /usr/bin/env python
"""Mirror a remote ftp subtree into a local directory tree.
usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
 [-l username [-p passwd [-a account]]]
 hostname [remotedir [localdir]]
-v: verbose
-q: quiet
-i: interactive mode
-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
-n: don't log in
-r: remove local files/directories no longer pertinent
-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
-s pat: skip files matching pattern
hostname: remote host
remotedir: remote directory (default initial)
localdir: local directory (default current)
"""
import os
import sys
import time
import getopt
import string
import ftplib
import netrc
from fnmatch import fnmatch
# Print usage message and exit
def usage(*args):
 sys.stdout = sys.stderr
 for msg in args: print msg
 print __doc__
 sys.exit(2)
verbose = 1 # 0 for -q, 2 for -v
interactive = 0
mac = 0
rmok = 0
nologin = 0
skippats = ['.', '..', '.mirrorinfo']
# Main program: parse command line and start processing
def main():
 global verbose, interactive, mac, rmok, nologin
 try:
 opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
 except getopt.error, msg:
 usage(msg)
 login = ''
 passwd = ''
 account = ''
 if not args: usage('hostname missing')
 host = args[0]
 try:
 auth = netrc.netrc().authenticators(host)
 if auth is not None:
 login, account, passwd = auth
 except (netrc.NetrcParseError, IOError):
 pass
 for o, a in opts:
 if o == '-l': login = a
 if o == '-p': passwd = a
 if o == '-a': account = a
 if o == '-v': verbose = verbose + 1
 if o == '-q': verbose = 0
 if o == '-i': interactive = 1
 if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
 if o == '-n': nologin = 1
 if o == '-r': rmok = 1
 if o == '-s': skippats.append(a)
 remotedir = ''
 localdir = ''
 if args[1:]:
 remotedir = args[1]
 if args[2:]:
 localdir = args[2]
 if args[3:]: usage('too many arguments')
 #
 f = ftplib.FTP()
 if verbose: print 'Connecting to %s...' % `host`
 f.connect(host)
 if not nologin:
 if verbose:
 print 'Logging in as %s...' % `login or 'anonymous'`
 f.login(login, passwd, account)
 if verbose: print 'OK.'
 pwd = f.pwd()
 if verbose > 1: print 'PWD =', `pwd`
 if remotedir:
 if verbose > 1: print 'cwd(%s)' % `remotedir`
 f.cwd(remotedir)
 if verbose > 1: print 'OK.'
 pwd = f.pwd()
 if verbose > 1: print 'PWD =', `pwd`
 #
 mirrorsubdir(f, localdir)
# Core logic: mirror one subdirectory (recursively)
def mirrorsubdir(f, localdir):
 pwd = f.pwd()
 if localdir and not os.path.isdir(localdir):
 if verbose: print 'Creating local directory', `localdir`
 try:
 makedir(localdir)
 except os.error, msg:
 print "Failed to establish local directory", `localdir`
 return
 infofilename = os.path.join(localdir, '.mirrorinfo')
 try:
 text = open(infofilename, 'r').read()
 except IOError, msg:
 text = '{}'
 try:
 info = eval(text)
 except (SyntaxError, NameError):
 print 'Bad mirror info in %s' % `infofilename`
 info = {}
 subdirs = []
 listing = []
 if verbose: print 'Listing remote directory %s...' % `pwd`
 f.retrlines('LIST', listing.append)
 filesfound = []
 for line in listing:
 if verbose > 1: print '-->', `line`
 if mac:
 # Mac listing has just filenames;
 # trailing / means subdirectory
 filename = string.strip(line)
 mode = '-'
 if filename[-1:] == '/':
 filename = filename[:-1]
 mode = 'd'
 infostuff = ''
 else:
 # Parse, assuming a UNIX listing
 words = string.split(line, None, 8)
 if len(words) < 6:
 if verbose > 1: print 'Skipping short line'
 continue
 filename = string.lstrip(words[-1])
 i = string.find(filename, " -> ")
 if i >= 0:
 # words[0] had better start with 'l'...
 if verbose > 1:
 print 'Found symbolic link %s' % `filename`
 linkto = filename[i+4:]
 filename = filename[:i]
 infostuff = words[-5:-1]
 mode = words[0]
 skip = 0
 for pat in skippats:
 if fnmatch(filename, pat):
 if verbose > 1:
 print 'Skip pattern', `pat`,
 print 'matches', `filename`
 skip = 1
 break
 if skip:
 continue
 if mode[0] == 'd':
 if verbose > 1:
 print 'Remembering subdirectory', `filename`
 subdirs.append(filename)
 continue
 filesfound.append(filename)
 if info.has_key(filename) and info[filename] == infostuff:
 if verbose > 1:
 print 'Already have this version of',`filename`
 continue
 fullname = os.path.join(localdir, filename)
 tempname = os.path.join(localdir, '@'+filename)
 if interactive:
 doit = askabout('file', filename, pwd)
 if not doit:
 if not info.has_key(filename):
 info[filename] = 'Not retrieved'
 continue
 try:
 os.unlink(tempname)
 except os.error:
 pass
 if mode[0] == 'l':
 if verbose:
 print "Creating symlink %s -> %s" % (
 `filename`, `linkto`)
 try:
 os.symlink(linkto, tempname)
 except IOError, msg:
 print "Can't create %s: %s" % (
 `tempname`, str(msg))
 continue
 else:
 try:
 fp = open(tempname, 'wb')
 except IOError, msg:
 print "Can't create %s: %s" % (
 `tempname`, str(msg))
 continue
 if verbose:
 print 'Retrieving %s from %s as %s...' % \
 (`filename`, `pwd`, `fullname`)
 if verbose:
 fp1 = LoggingFile(fp, 1024, sys.stdout)
 else:
 fp1 = fp
 t0 = time.time()
 try:
 f.retrbinary('RETR ' + filename,
 fp1.write, 8*1024)
 except ftplib.error_perm, msg:
 print msg
 t1 = time.time()
 bytes = fp.tell()
 fp.close()
 if fp1 != fp:
 fp1.close()
 try:
 os.unlink(fullname)
 except os.error:
 pass # Ignore the error
 try:
 os.rename(tempname, fullname)
 except os.error, msg:
 print "Can't rename %s to %s: %s" % (`tempname`,
 `fullname`,
 str(msg))
 continue
 info[filename] = infostuff
 writedict(info, infofilename)
 if verbose and mode[0] != 'l':
 dt = t1 - t0
 kbytes = bytes / 1024.0
 print int(round(kbytes)),
 print 'Kbytes in',
 print int(round(dt)),
 print 'seconds',
 if t1 > t0:
 print '(~%d Kbytes/sec)' % \
 int(round(kbytes/dt),)
 print
 #
 # Remove files from info that are no longer remote
 deletions = 0
 for filename in info.keys():
 if filename not in filesfound:
 if verbose:
 print "Removing obsolete info entry for",
 print `filename`, "in", `localdir or "."`
 del info[filename]
 deletions = deletions + 1
 if deletions:
 writedict(info, infofilename)
 #
 # Remove local files that are no longer in the remote directory
 try:
 if not localdir: names = os.listdir(os.curdir)
 else: names = os.listdir(localdir)
 except os.error:
 names = []
 for name in names:
 if name[0] == '.' or info.has_key(name) or name in subdirs:
 continue
 skip = 0
 for pat in skippats:
 if fnmatch(name, pat):
 if verbose > 1:
 print 'Skip pattern', `pat`,
 print 'matches', `name`
 skip = 1
 break
 if skip:
 continue
 fullname = os.path.join(localdir, name)
 if not rmok:
 if verbose:
 print 'Local file', `fullname`,
 print 'is no longer pertinent'
 continue
 if verbose: print 'Removing local file/dir', `fullname`
 remove(fullname)
 #
 # Recursively mirror subdirectories
 for subdir in subdirs:
 if interactive:
 doit = askabout('subdirectory', subdir, pwd)
 if not doit: continue
 if verbose: print 'Processing subdirectory', `subdir`
 localsubdir = os.path.join(localdir, subdir)
 pwd = f.pwd()
 if verbose > 1:
 print 'Remote directory now:', `pwd`
 print 'Remote cwd', `subdir`
 try:
 f.cwd(subdir)
 except ftplib.error_perm, msg:
 print "Can't chdir to", `subdir`, ":", `msg`
 else:
 if verbose: print 'Mirroring as', `localsubdir`
 mirrorsubdir(f, localsubdir)
 if verbose > 1: print 'Remote cwd ..'
 f.cwd('..')
 newpwd = f.pwd()
 if newpwd != pwd:
 print 'Ended up in wrong directory after cd + cd ..'
 print 'Giving up now.'
 break
 else:
 if verbose > 1: print 'OK.'
# Helper to remove a file or directory tree
def remove(fullname):
 if os.path.isdir(fullname) and not os.path.islink(fullname):
 try:
 names = os.listdir(fullname)
 except os.error:
 names = []
 ok = 1
 for name in names:
 if not remove(os.path.join(fullname, name)):
 ok = 0
 if not ok:
 return 0
 try:
 os.rmdir(fullname)
 except os.error, msg:
 print "Can't remove local directory %s: %s" % \
 (`fullname`, str(msg))
 return 0
 else:
 try:
 os.unlink(fullname)
 except os.error, msg:
 print "Can't remove local file %s: %s" % \
 (`fullname`, str(msg))
 return 0
 return 1
# Wrapper around a file for writing to write a hash sign every block.
class LoggingFile:
 def __init__(self, fp, blocksize, outfp):
 self.fp = fp
 self.bytes = 0
 self.hashes = 0
 self.blocksize = blocksize
 self.outfp = outfp
 def write(self, data):
 self.bytes = self.bytes + len(data)
 hashes = int(self.bytes) / self.blocksize
 while hashes > self.hashes:
 self.outfp.write('#')
 self.outfp.flush()
 self.hashes = self.hashes + 1
 self.fp.write(data)
 def close(self):
 self.outfp.write('\n')
# Ask permission to download a file.
def askabout(filetype, filename, pwd):
 prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
 while 1:
 reply = string.lower(string.strip(raw_input(prompt)))
 if reply in ['y', 'ye', 'yes']:
 return 1
 if reply in ['', 'n', 'no', 'nop', 'nope']:
 return 0
 print 'Please answer yes or no.'
# Create a directory if it doesn't exist. Recursively create the
# parent directory as well if needed.
def makedir(pathname):
 if os.path.isdir(pathname):
 return
 dirname = os.path.dirname(pathname)
 if dirname: makedir(dirname)
 os.mkdir(pathname, 0777)
# Write a dictionary to a file in a way that can be read back using
# rval() but is still somewhat readable (i.e. not a single long line).
# Also creates a backup file.
def writedict(dict, filename):
 dir, file = os.path.split(filename)
 tempname = os.path.join(dir, '@' + file)
 backup = os.path.join(dir, file + '~')
 try:
 os.unlink(backup)
 except os.error:
 pass
 fp = open(tempname, 'w')
 fp.write('{\n')
 for key, value in dict.items():
 fp.write('%s: %s,\n' % (`key`, `value`))
 fp.write('}\n')
 fp.close()
 try:
 os.rename(filename, backup)
 except os.error:
 pass
 os.rename(tempname, filename)
if __name__ == '__main__':
 main()
--- NEW FILE: gencodec.py ---
""" Unicode Mapping Parser and Codec Generator.
This script parses Unicode mapping files as available from the Unicode
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
modules from them. The codecs use the standard character mapping codec
to actually apply the mapping.
Synopsis: gencodec.py dir codec_prefix
All files in dir are scanned and those producing non-empty mappings
will be written to <codec_prefix><mapname>.py with <mapname> being the
first part of the map's filename ('a' in a.b.c.txt) converted to
lowercase with hyphens replaced by underscores.
The tool also writes marshalled versions of the mapping tables to the
same location (with .mapping extension).
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright Guido van Rossum, 2000.
"""#"
import string,re,os,time,marshal
# Create numeric tables or character based ones ?
numeric = 1
mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
 '\s+'
 '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
 '\s*'
 '(#.+)?')
def parsecodes(codes,
 split=string.split,atoi=string.atoi,len=len,
 filter=filter,range=range):
 """ Converts code combinations to either a single code integer
 or a tuple of integers.
 meta-codes (in angular brackets, e.g. <LR> and <RL>) are
 ignored.
 Empty codes or illegal ones are returned as None.
 """
 if not codes:
 return None
 l = split(codes,'+')
 if len(l) == 1:
 return atoi(l[0],16)
 for i in range(len(l)):
 try:
 l[i] = atoi(l[i],16)
 except ValueError:
 l[i] = None
 l = filter(lambda x: x is not None, l)
 if len(l) == 1:
 return l[0]
 else:
 return tuple(l)
def readmap(filename,
 strip=string.strip):
 f = open(filename,'r')
 lines = f.readlines()
 f.close()
 enc2uni = {}
 identity = []
 unmapped = range(256)
 for i in range(256):
 unmapped[i] = i
 for line in lines:
 line = strip(line)
 if not line or line[0] == '#':
 continue
 m = mapRE.match(line)
 if not m:
 #print '* not matched: %s' % repr(line)
 continue
 enc,uni,comment = m.groups()
 enc = parsecodes(enc)
 uni = parsecodes(uni)
 if not comment:
 comment = ''
 else:
 comment = comment[1:]
 if enc < 256:
 unmapped.remove(enc)
 if enc == uni:
 identity.append(enc)
 else:
 enc2uni[enc] = (uni,comment)
 else:
 enc2uni[enc] = (uni,comment)
 # If there are more identity-mapped entries than unmapped entries,
 # it pays to generate an identity dictionary first, add add explicit
 # mappings to None for the rest
 if len(identity)>=len(unmapped):
 for enc in unmapped:
 enc2uni[enc] = (None, "")
 enc2uni['IDENTITY'] = 256
 return enc2uni
def hexrepr(t,
 join=string.join):
 if t is None:
 return 'None'
 try:
 len(t)
 except:
 return '0x%04x' % t
 return '(' + join(map(lambda t: '0x%04x' % t, t),', ') + ')'
def unicoderepr(t,
 join=string.join):
 if t is None:
 return 'None'
 if numeric:
 return hexrepr(t)
 else:
 try:
 len(t)
 except:
 return repr(unichr(t))
 return repr(join(map(unichr, t),''))
def keyrepr(t,
 join=string.join):
 if t is None:
 return 'None'
 if numeric:
 return hexrepr(t)
 else:
 try:
 len(t)
 except:
 if t < 256:
 return repr(chr(t))
 else:
 return repr(unichr(t))
 return repr(join(map(chr, t),''))
def codegen(name,map,comments=1):
 """ Returns Python source for the given map.
 Comments are included in the source, if comments is true (default).
 """
 l = [
 '''\
""" Python Character Mapping Codec generated from '%s' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
 def encode(self,input,errors='strict'):
 return codecs.charmap_encode(input,errors,encoding_map)
 def decode(self,input,errors='strict'):
 return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
 pass
class StreamReader(Codec,codecs.StreamReader):
 pass
### encodings module API
def getregentry():
 return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
''' % name,
 ]
 if map.has_key("IDENTITY"):
 l.append("decoding_map = codecs.make_identity_dict(range(%d))"
 % map["IDENTITY"])
 l.append("decoding_map.update({")
 splits = 1
 del map["IDENTITY"]
 else:
 l.append("decoding_map = {")
 splits = 0
 mappings = map.items()
 mappings.sort()
 append = l.append
 i = 0
 for e,value in mappings:
 try:
 (u,c) = value
 except TypeError:
 u = value
 c = ''
 key = keyrepr(e)
 if c and comments:
 append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
 else:
 append('\t%s: %s,' % (key,unicoderepr(u)))
 i += 1
 if i == 4096:
 # Split the definition into parts to that the Python
 # parser doesn't dump core
 if splits == 0:
 append('}')
 else:
 append('})')
 append('decoding_map.update({')
 i = 0
 splits = splits + 1
 if splits == 0:
 append('}')
 else:
 append('})')
 append('''
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)
''')
 return string.join(l,'\n')
def pymap(name,map,pyfile,comments=1):
 code = codegen(name,map,comments)
 f = open(pyfile,'w')
 f.write(code)
 f.close()
def marshalmap(name,map,marshalfile):
 d = {}
 for e,(u,c) in map.items():
 d[e] = (u,c)
 f = open(marshalfile,'wb')
 marshal.dump(d,f)
 f.close()
def convertdir(dir,prefix='',comments=1):
 mapnames = os.listdir(dir)
 for mapname in mapnames:
 name = os.path.split(mapname)[1]
 name = string.replace(name,'-','_')
 name = string.split(name, '.')[0]
 name = string.lower(name)
 codefile = name + '.py'
 marshalfile = name + '.mapping'
 print 'converting %s to %s and %s' % (mapname,
 prefix + codefile,
 prefix + marshalfile)
 try:
 map = readmap(os.path.join(dir,mapname))
 if not map:
 print '* map is empty; skipping'
 else:
 pymap(mapname, map, prefix + codefile,comments)
 marshalmap(mapname, map, prefix + marshalfile)
 except ValueError:
 print '* conversion failed'
def rewritepythondir(dir,prefix='',comments=1):
 mapnames = os.listdir(dir)
 for mapname in mapnames:
 if not mapname.endswith('.mapping'):
 continue
 codefile = mapname[:-len('.mapping')] + '.py'
 print 'converting %s to %s' % (mapname,
 prefix + codefile)
 try:
 map = marshal.load(open(os.path.join(dir,mapname),
 'rb'))
 if not map:
 print '* map is empty; skipping'
 else:
 pymap(mapname, map, prefix + codefile,comments)
 except ValueError, why:
 print '* conversion failed: %s' % why
if __name__ == '__main__':
 import sys
 if 1:
 apply(convertdir,tuple(sys.argv[1:]))
 else:
 apply(rewritepythondir,tuple(sys.argv[1:]))
--- NEW FILE: h2py.py ---
#! /usr/bin/env python
# Read #define's and translate to Python code.
# Handle #include statements.
# Handle #define macros with one argument.
# Anything that isn't recognized or doesn't translate into valid
# Python is ignored.
# Without filename arguments, acts as a filter.
# If one or more filenames are given, output is written to corresponding
# filenames in the local directory, translated to all uppercase, with
# the extension replaced by ".py".
# By passing one or more options of the form "-i regular_expression"
# you can specify additional strings to be ignored. This is useful
# e.g. to ignore casts to u_long: simply specify "-i '(u_long)'".
# XXX To do:
# - turn trailing C comments into Python comments
# - turn C Boolean operators "&& || !" into Python "and or not"
# - what to do about #if(def)?
# - what to do about macros with multiple parameters?
import sys, regex, regsub, string, getopt, os
p_define = regex.compile('^[\t ]*#[\t ]*define[\t ]+\([a-zA-Z0-9_]+\)[\t ]+')
p_macro = regex.compile(
 '^[\t ]*#[\t ]*define[\t ]+'
 '\([a-zA-Z0-9_]+\)(\([_a-zA-Z][_a-zA-Z0-9]*\))[\t ]+')
p_include = regex.compile('^[\t ]*#[\t ]*include[\t ]+<\([a-zA-Z0-9_/\.]+\)')
p_comment = regex.compile('/\*\([^*]+\|\*+[^/]\)*\(\*+/\)?')
p_cpp_comment = regex.compile('//.*')
ignores = [p_comment, p_cpp_comment]
p_char = regex.compile("'\(\\\\.[^\\\\]*\|[^\\\\]\)'")
filedict = {}
try:
 searchdirs=string.splitfields(os.environ['include'],';')
except KeyError:
 try:
 searchdirs=string.splitfields(os.environ['INCLUDE'],';')
 except KeyError:
 try:
 if string.find( sys.platform, "beos" ) == 0:
 searchdirs=string.splitfields(os.environ['BEINCLUDES'],';')
 else:
 raise KeyError
 except KeyError:
 searchdirs=['/usr/include']
def main():
 global filedict
 opts, args = getopt.getopt(sys.argv[1:], 'i:')
 for o, a in opts:
 if o == '-i':
 ignores.append(regex.compile(a))
 if not args:
 args = ['-']
 for filename in args:
 if filename == '-':
 sys.stdout.write('# Generated by h2py from stdin\n')
 process(sys.stdin, sys.stdout)
 else:
 fp = open(filename, 'r')
 outfile = os.path.basename(filename)
 i = string.rfind(outfile, '.')
 if i > 0: outfile = outfile[:i]
 outfile = string.upper(outfile)
 outfile = outfile + '.py'
 outfp = open(outfile, 'w')
 outfp.write('# Generated by h2py from %s\n' % filename)
 filedict = {}
 for dir in searchdirs:
 if filename[:len(dir)] == dir:
 filedict[filename[len(dir)+1:]] = None # no '/' trailing
 break
 process(fp, outfp)
 outfp.close()
 fp.close()
def process(fp, outfp, env = {}):
 lineno = 0
 while 1:
 line = fp.readline()
 if not line: break
 lineno = lineno + 1
 n = p_define.match(line)
 if n >= 0:
 # gobble up continuation lines
 while line[-2:] == '\\\n':
 nextline = fp.readline()
 if not nextline: break
 lineno = lineno + 1
 line = line + nextline
 name = p_define.group(1)
 body = line[n:]
 # replace ignored patterns by spaces
 for p in ignores:
 body = regsub.gsub(p, ' ', body)
 # replace char literals by ord(...)
 body = regsub.gsub(p_char, 'ord(\0円)', body)
 stmt = '%s = %s\n' % (name, string.strip(body))
 ok = 0
 try:
 exec stmt in env
 except:
 sys.stderr.write('Skipping: %s' % stmt)
 else:
 outfp.write(stmt)
 n =p_macro.match(line)
 if n >= 0:
 macro, arg = p_macro.group(1, 2)
 body = line[n:]
 for p in ignores:
 body = regsub.gsub(p, ' ', body)
 body = regsub.gsub(p_char, 'ord(\0円)', body)
 stmt = 'def %s(%s): return %s\n' % (macro, arg, body)
 try:
 exec stmt in env
 except:
 sys.stderr.write('Skipping: %s' % stmt)
 else:
 outfp.write(stmt)
 if p_include.match(line) >= 0:
 regs = p_include.regs
 a, b = regs[1]
 filename = line[a:b]
 if not filedict.has_key(filename):
 filedict[filename] = None
 inclfp = None
 for dir in searchdirs:
 try:
 inclfp = open(dir + '/' + filename, 'r')
 break
 except IOError:
 pass
 if inclfp:
 outfp.write(
 '\n# Included from %s\n' % filename)
 process(inclfp, outfp, env)
 else:
 sys.stderr.write('Warning - could not find file %s' % filename)
main()
--- NEW FILE: ifdef.py ---
#! /usr/bin/env python
# Selectively preprocess #ifdef / #ifndef statements.
# Usage:
# ifdef [-Dname] ... [-Uname] ... [file] ...
#
# This scans the file(s), looking for #ifdef and #ifndef preprocessor
# commands that test for one of the names mentioned in the -D and -U
# options. On standard output it writes a copy of the input file(s)
# minus those code sections that are suppressed by the selected
# combination of defined/undefined symbols. The #if(n)def/#else/#else
# lines themselfs (if the #if(n)def tests for one of the mentioned
# names) are removed as well.
# Features: Arbitrary nesting of recognized and unrecognized
# preprocesor statements works correctly. Unrecognized #if* commands
# are left in place, so it will never remove too much, only too
# little. It does accept whitespace around the '#' character.
# Restrictions: There should be no comments or other symbols on the
# #if(n)def lines. The effect of #define/#undef commands in the input
# file or in included files is not taken into account. Tests using
# #if and the defined() pseudo function are not recognized. The #elif
# command is not recognized. Improperly nesting is not detected.
# Lines that look like preprocessor commands but which are actually
# part of comments or string literals will be mistaken for
# preprocessor commands.
import sys
import regex
import getopt
import string
defs = []
undefs = []
def main():
 opts, args = getopt.getopt(sys.argv[1:], 'D:U:')
 for o, a in opts:
 if o == '-D':
 defs.append(a)
 if o == '-U':
 undefs.append(a)
 if not args:
 args = ['-']
 for file in args:
 if file == '-':
 process(sys.stdin, sys.stdout)
 else:
 f = open(file, 'r')
 process(f, sys.stdout)
 f.close()
def process(fpi, fpo):
 keywords = ('if', 'ifdef', 'ifndef', 'else', 'endif')
 ok = 1
 stack = []
 while 1:
 line = fpi.readline()
 if not line: break
 while line[-2:] == '\\\n':
 nextline = fpi.readline()
 if not nextline: break
 line = line + nextline
 tmp = string.strip(line)
 if tmp[:1] != '#':
 if ok: fpo.write(line)
 continue
 tmp = string.strip(tmp[1:])
 words = string.split(tmp)
 keyword = words[0]
 if keyword not in keywords:
 if ok: fpo.write(line)
 continue
 if keyword in ('ifdef', 'ifndef') and len(words) == 2:
 if keyword == 'ifdef':
 ko = 1
 else:
 ko = 0
 word = words[1]
 if word in defs:
 stack.append((ok, ko, word))
 if not ko: ok = 0
 elif word in undefs:
 stack.append((ok, not ko, word))
 if ko: ok = 0
 else:
 stack.append((ok, -1, word))
 if ok: fpo.write(line)
 elif keyword == 'if':
 stack.append((ok, -1, ''))
 if ok: fpo.write(line)
 elif keyword == 'else' and stack:
 s_ok, s_ko, s_word = stack[-1]
 if s_ko < 0:
 if ok: fpo.write(line)
 else:
 s_ko = not s_ko
 ok = s_ok
 if not s_ko: ok = 0
 stack[-1] = s_ok, s_ko, s_word
 elif keyword == 'endif' and stack:
 s_ok, s_ko, s_word = stack[-1]
 if s_ko < 0:
 if ok: fpo.write(line)
 del stack[-1]
 ok = s_ok
 else:
 sys.stderr.write('Unknown keyword %s\n' % keyword)
 if stack:
 sys.stderr.write('stack: %s\n' % stack)
main()
--- NEW FILE: lfcr.py ---
#! /usr/bin/env python
"Replace LF with CRLF in argument files. Print names of changed files."
import sys, re, os
for file in sys.argv[1:]:
 if os.path.isdir(file):
 print file, "Directory!"
 continue
 data = open(file, "rb").read()
 if '0円' in data:
 print file, "Binary!"
 continue
 newdata = re.sub("\r?\n", "\r\n", data)
 if newdata != data:
 print file
 f = open(file, "wb")
 f.write(newdata)
 f.close()
--- NEW FILE: linktree.py ---
#! /usr/bin/env python
# linktree
#
# Make a copy of a directory tree with symbolic links to all files in the
# original tree.
# All symbolic links go to a special symbolic link at the top, so you
# can easily fix things if the original source tree moves.
# See also "mkreal".
#
# usage: mklinks oldtree newtree
import sys, os
LINK = '.LINK' # Name of special symlink at the top.
debug = 0
def main():
 if not 3 <= len(sys.argv) <= 4:
 print 'usage:', sys.argv[0], 'oldtree newtree [linkto]'
 return 2
 oldtree, newtree = sys.argv[1], sys.argv[2]
 if len(sys.argv) > 3:
 link = sys.argv[3]
 link_may_fail = 1
 else:
 link = LINK
 link_may_fail = 0
 if not os.path.isdir(oldtree):
 print oldtree + ': not a directory'
 return 1
 try:
 os.mkdir(newtree, 0777)
 except os.error, msg:
 print newtree + ': cannot mkdir:', msg
 return 1
 linkname = os.path.join(newtree, link)
 try:
 os.symlink(os.path.join(os.pardir, oldtree), linkname)
 except os.error, msg:
 if not link_may_fail:
 print linkname + ': cannot symlink:', msg
 return 1
 else:
 print linkname + ': warning: cannot symlink:', msg
 linknames(oldtree, newtree, link)
 return 0
def linknames(old, new, link):
 if debug: print 'linknames', (old, new, link)
 try:
 names = os.listdir(old)
 except os.error, msg:
 print old + ': warning: cannot listdir:', msg
 return
 for name in names:
 if name not in (os.curdir, os.pardir):
 oldname = os.path.join(old, name)
 linkname = os.path.join(link, name)
 newname = os.path.join(new, name)
 if debug > 1: print oldname, newname, linkname
 if os.path.isdir(oldname) and \
 not os.path.islink(oldname):
 try:
 os.mkdir(newname, 0777)
 ok = 1
 except:
 print newname + \
 ': warning: cannot mkdir:', msg
 ok = 0
 if ok:
 linkname = os.path.join(os.pardir,
 linkname)
 linknames(oldname, newname, linkname)
 else:
 os.symlink(linkname, newname)
sys.exit(main())
--- NEW FILE: lll.py ---
#! /usr/bin/env python
# Find symbolic links and show where they point to.
# Arguments are directories to search; default is current directory.
# No recursion.
# (This is a totally different program from "findsymlinks.py"!)
import sys, os
def lll(dirname):
 for name in os.listdir(dirname):
 if name not in (os.curdir, os.pardir):
 full = os.path.join(dirname, name)
 if os.path.islink(full):
 print name, '->', os.readlink(full)
args = sys.argv[1:]
if not args: args = [os.curdir]
first = 1
for arg in args:
 if len(args) > 1:
 if not first: print
 first = 0
 print arg + ':'
 lll(arg)
--- NEW FILE: logmerge.py ---
#! /usr/bin/env python
"""Consolidate a bunch of CVS or RCS logs read from stdin.
Input should be the output of a CVS or RCS logging command, e.g.
 cvs log -rrelease14:
which dumps all log messages from release1.4 upwards (assuming that
release 1.4 was tagged with tag 'release14'). Note the trailing
colon!
This collects all the revision records and outputs them sorted by date
rather than by file, collapsing duplicate revision record, i.e.,
records with the same message for different files.
The -t option causes it to truncate (discard) the last revision log
entry; this is useful when using something like the above cvs log
command, which shows the revisions including the given tag, while you
probably want everything *since* that tag.
XXX This code was created by reverse engineering CVS 1.9 and RCS 5.7
from their output.
"""
import os, sys, getopt, string, re
sep1 = '='*77 + '\n' # file separator
sep2 = '-'*28 + '\n' # revision separator
def main():
 """Main program"""
 truncate_last = 0
 reverse = 0
 opts, args = getopt.getopt(sys.argv[1:], "tr")
 for o, a in opts:
 if o == '-t':
 truncate_last = 1
 elif o == '-r':
 reverse = 1
 database = []
 while 1:
 chunk = read_chunk(sys.stdin)
 if not chunk:
 break
 records = digest_chunk(chunk)
 if truncate_last:
 del records[-1]
 database[len(database):] = records
 database.sort()
 if not reverse:
 database.reverse()
 format_output(database)
def read_chunk(fp):
 """Read a chunk -- data for one file, ending with sep1.
 Split the chunk in parts separated by sep2.
 """
 chunk = []
 lines = []
 while 1:
 line = fp.readline()
 if not line:
 break
 if line == sep1:
 if lines:
 chunk.append(lines)
 break
 if line == sep2:
 if lines:
 chunk.append(lines)
 lines = []
 else:
 lines.append(line)
 return chunk
def digest_chunk(chunk):
 """Digest a chunk -- extrach working file name and revisions"""
 lines = chunk[0]
 key = 'Working file:'
 keylen = len(key)
 for line in lines:
 if line[:keylen] == key:
 working_file = string.strip(line[keylen:])
 break
 else:
 working_file = None
 records = []
 for lines in chunk[1:]:
 revline = lines[0]
 dateline = lines[1]
 text = lines[2:]
 words = string.split(dateline)
 author = None
 if len(words) >= 3 and words[0] == 'date:':
 dateword = words[1]
 timeword = words[2]
 if timeword[-1:] == ';':
 timeword = timeword[:-1]
 date = dateword + ' ' + timeword
 if len(words) >= 5 and words[3] == 'author:':
 author = words[4]
 if author[-1:] == ';':
 author = author[:-1]
 else:
 date = None
 text.insert(0, revline)
 words = string.split(revline)
 if len(words) >= 2 and words[0] == 'revision':
 rev = words[1]
 else:
 rev = None
 text.insert(0, revline)
 records.append((date, working_file, rev, author, text))
 return records
def format_output(database):
 prevtext = None
 prev = []
 database.append((None, None, None, None, None)) # Sentinel
 for (date, working_file, rev, author, text) in database:
 if text != prevtext:
 if prev:
 print sep2,
 for (p_date, p_working_file, p_rev, p_author) in prev:
 print p_date, p_author, p_working_file, p_rev
 sys.stdout.writelines(prevtext)
 prev = []
 prev.append((date, working_file, rev, author))
 prevtext = text
main()
--- NEW FILE: mailerdaemon.py ---
"""mailerdaemon - classes to parse mailer-daemon messages"""
import string
import rfc822
import calendar
import re
import os
import sys
Unparseable = 'mailerdaemon.Unparseable'
class ErrorMessage(rfc822.Message):
 def __init__(self, fp):
 rfc822.Message.__init__(self, fp)
 self.sub = ''
 def is_warning(self):
 sub = self.getheader('Subject')
 if not sub:
 return 0
 sub = string.lower(sub)
 if sub[:12] == 'waiting mail': return 1
 if string.find(sub, 'warning') >= 0: return 1
 self.sub = sub
 return 0
 def get_errors(self):
 for p in EMPARSERS:
 self.rewindbody()
 try:
 return p(self.fp, self.sub)
 except Unparseable:
 pass
 raise Unparseable
# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address. The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's. The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses. The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
 'error: (?P<reason>unresolvable): (?P<email>.+)',
 ('----- The following addresses had permanent fatal errors -----\n',
 '(?P<email>[^ \n].*)\n( .*\n)?'),
 'remote execution.*\n.*rmail (?P<email>.+)',
 ('The following recipients did not receive your message:\n\n',
 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
 '^<(?P<email>.*)>:\n(?P<reason>.*)',
 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
 '^Original-Recipient: rfc822;(?P<email>.*)',
 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
 ]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
 x = emparse_list_list[i]
 if type(x) is type(''):
 x = re.compile(x, re.MULTILINE)
 else:
 xl = []
 for x in x:
 xl.append(re.compile(x, re.MULTILINE))
 x = tuple(xl)
 del xl
 emparse_list_list[i] = x
 del x
del i
# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order. After the first match,
# no more expressions are searched for. So, order is important.
emparse_list_reason = [
 r'^5\d{2} <>\.\.\. (?P<reason>.*)',
 '<>\.\.\. (?P<reason>.*)',
 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
 ]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
 data = fp.read()
 res = emparse_list_from.search(data)
 if res is None:
 from_index = len(data)
 else:
 from_index = res.start(0)
 errors = []
 emails = []
 reason = None
 for regexp in emparse_list_list:
 if type(regexp) is type(()):
 res = regexp[0].search(data, 0, from_index)
 if res is not None:
 try:
 reason = res.group('reason')
 except IndexError:
 pass
 while 1:
 res = regexp[1].match(data, res.end(0), from_index)
 if res is None:
 break
 emails.append(res.group('email'))
 break
 else:
 res = regexp.search(data, 0, from_index)
 if res is not None:
 emails.append(res.group('email'))
 try:
 reason = res.group('reason')
 except IndexError:
 pass
 break
 if not emails:
 raise Unparseable
 if not reason:
 reason = sub
 if reason[:15] == 'returned mail: ':
 reason = reason[15:]
 for regexp in emparse_list_reason:
 if type(regexp) is type(''):
 for i in range(len(emails)-1,-1,-1):
 email = emails[i]
 exp = re.compile(string.join(string.split(regexp, '<>'), re.escape(email)), re.MULTILINE)
 res = exp.search(data)
 if res is not None:
 errors.append(string.join(string.split(string.strip(email)+': '+res.group('reason'))))
 del emails[i]
 continue
 res = regexp.search(data)
 if res is not None:
 reason = res.group('reason')
 break
 for email in emails:
 errors.append(string.join(string.split(string.strip(email)+': '+reason)))
 return errors
EMPARSERS = [emparse_list, ]
def sort_numeric(a, b):
 a = string.atoi(a)
 b = string.atoi(b)
 if a < b: return -1
 elif a > b: return 1
 else: return 0
def parsedir(dir, modify):
 os.chdir(dir)
 pat = re.compile('^[0-9]*$')
 errordict = {}
 errorfirst = {}
 errorlast = {}
 nok = nwarn = nbad = 0
 # find all numeric file names and sort them
 files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
 files.sort(sort_numeric)
 for fn in files:
 # Lets try to parse the file.
 fp = open(fn)
 m = ErrorMessage(fp)
 sender = m.getaddr('From')
 print '%s\t%-40s\t'%(fn, sender[1]),
 if m.is_warning():
 fp.close()
 print 'warning only'
 nwarn = nwarn + 1
 if modify:
 os.rename(fn, ','+fn)
## os.unlink(fn)
 continue
 try:
 errors = m.get_errors()
 except Unparseable:
 print '** Not parseable'
 nbad = nbad + 1
 fp.close()
 continue
 print len(errors), 'errors'
 # Remember them
 for e in errors:
 try:
 mm, dd = m.getdate('date')[1:1+2]
 date = '%s %02d' % (calendar.month_abbr[mm], dd)
 except:
 date = '??????'
 if not errordict.has_key(e):
 errordict[e] = 1
 errorfirst[e] = '%s (%s)' % (fn, date)
 else:
 errordict[e] = errordict[e] + 1
 errorlast[e] = '%s (%s)' % (fn, date)
 fp.close()
 nok = nok + 1
 if modify:
 os.rename(fn, ','+fn)
## os.unlink(fn)
 print '--------------'
 print nok, 'files parsed,',nwarn,'files warning-only,',
 print nbad,'files unparseable'
 print '--------------'
 list = []
 for e in errordict.keys():
 list.append((errordict[e], errorfirst[e], errorlast[e], e))
 list.sort()
 for num, first, last, e in list:
 print '%d %s - %s\t%s' % (num, first, last, e)
def main():
 modify = 0
 if len(sys.argv) > 1 and sys.argv[1] == '-d':
 modify = 1
 del sys.argv[1]
 if len(sys.argv) > 1:
 for folder in sys.argv[1:]:
 parsedir(folder, modify)
 else:
 parsedir('/ufs/jack/Mail/errorsinbox', modify)
if __name__ == '__main__' or sys.argv[0] == __name__:
 main()
--- NEW FILE: md5sum.py ---
#! /usr/bin/env python
"""Python utility to print MD5 checksums of argument files.
Works with Python 1.5.2 and later.
"""
import sys, md5
BLOCKSIZE = 1024*1024
def hexify(s):
 return ("%02x"*len(s)) % tuple(map(ord, s))
def main():
 args = sys.argv[1:]
 if not args:
 sys.stderr.write("usage: %s file ...\n" % sys.argv[0])
 sys.exit(2)
 for file in sys.argv[1:]:
 f = open(file, "rb")
 sum = md5.new()
 while 1:
 block = f.read(BLOCKSIZE)
 if not block:
 break
 sum.update(block)
 f.close()
 print hexify(sum.digest()), file
if __name__ == "__main__":
 main()
--- NEW FILE: methfix.py ---
#! /usr/bin/env python
# Fix Python source files to avoid using
# def method(self, (arg1, ..., argn)):
# instead of the more rational
# def method(self, arg1, ..., argn):
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments). Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
# It complains about binaries (files containing null bytes)
# and about files that are ostensibly not Python files: if the first
# line starts with '#!' and does not contain the string 'python'.
#
# Changes made are reported to stdout in a diff-like format.
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions. Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixline() you can turn this
# into a program for a different change to Python programs...
import sys
import regex
import os
from stat import *
import string
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
def main():
 bad = 0
 if not sys.argv[1:]: # No arguments
 err('usage: ' + sys.argv[0] + ' file-or-directory ...\n')
 sys.exit(2)
 for arg in sys.argv[1:]:
 if os.path.isdir(arg):
 if recursedown(arg): bad = 1
 elif os.path.islink(arg):
 err(arg + ': will not process symbolic links\n')
 bad = 1
 else:
 if fix(arg): bad = 1
 sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
 return ispythonprog.match(name) >= 0
def recursedown(dirname):
 dbg('recursedown(' + `dirname` + ')\n')
 bad = 0
 try:
 names = os.listdir(dirname)
 except os.error, msg:
 err(dirname + ': cannot list directory: ' + `msg` + '\n')
 return 1
 names.sort()
 subdirs = []
 for name in names:
 if name in (os.curdir, os.pardir): continue
 fullname = os.path.join(dirname, name)
 if os.path.islink(fullname): pass
 elif os.path.isdir(fullname):
 subdirs.append(fullname)
 elif ispython(name):
 if fix(fullname): bad = 1
 for fullname in subdirs:
 if recursedown(fullname): bad = 1
 return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
 try:
 f = open(filename, 'r')
 except IOError, msg:
 err(filename + ': cannot open: ' + `msg` + '\n')
 return 1
 head, tail = os.path.split(filename)
 tempname = os.path.join(head, '@' + tail)
 g = None
 # If we find a match, we rewind the file and start over but
 # now copy everything to a temp file.
 lineno = 0
 while 1:
 line = f.readline()
 if not line: break
 lineno = lineno + 1
 if g is None and '0円' in line:
 # Check for binary files
 err(filename + ': contains null bytes; not fixed\n')
 f.close()
 return 1
 if lineno == 1 and g is None and line[:2] == '#!':
 # Check for non-Python scripts
 words = string.split(line[2:])
 if words and regex.search('[pP]ython', words[0]) < 0:
 msg = filename + ': ' + words[0]
 msg = msg + ' script; not fixed\n'
 err(msg)
 f.close()
 return 1
 while line[-2:] == '\\\n':
 nextline = f.readline()
 if not nextline: break
 line = line + nextline
 lineno = lineno + 1
 newline = fixline(line)
 if newline != line:
 if g is None:
 try:
 g = open(tempname, 'w')
 except IOError, msg:
 f.close()
 err(tempname+': cannot create: '+\
 `msg`+'\n')
 return 1
 f.seek(0)
 lineno = 0
 rep(filename + ':\n')
 continue # restart from the beginning
 rep(`lineno` + '\n')
 rep('< ' + line)
 rep('> ' + newline)
 if g is not None:
 g.write(newline)
 # End of file
 f.close()
 if not g: return 0 # No changes
 # Finishing touch -- move files
 # First copy the file's mode to the temp file
 try:
 statbuf = os.stat(filename)
 os.chmod(tempname, statbuf[ST_MODE] & 07777)
 except os.error, msg:
 err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
 # Then make a backup of the original file as filename~
 try:
 os.rename(filename, filename + '~')
 except os.error, msg:
 err(filename + ': warning: backup failed (' + `msg` + ')\n')
 # Now move the temp file to the original file
 try:
 os.rename(tempname, filename)
 except os.error, msg:
 err(filename + ': rename failed (' + `msg` + ')\n')
 return 1
 # Return succes
 return 0
fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:'
fixprog = regex.compile(fixpat)
def fixline(line):
 if fixprog.match(line) >= 0:
 (a, b), (c, d) = fixprog.regs[1:3]
 line = line[:a] + line[c:d] + line[b:]
 return line
main()
--- NEW FILE: mkreal.py ---
#! /usr/bin/env python
# mkreal
#
# turn a symlink to a directory into a real directory
import sys
import os
from stat import *
join = os.path.join
error = 'mkreal error'
BUFSIZE = 32*1024
def mkrealfile(name):
 st = os.stat(name) # Get the mode
 mode = S_IMODE(st[ST_MODE])
 linkto = os.readlink(name) # Make sure again it's a symlink
 f_in = open(name, 'r') # This ensures it's a file
 os.unlink(name)
 f_out = open(name, 'w')
 while 1:
 buf = f_in.read(BUFSIZE)
 if not buf: break
 f_out.write(buf)
 del f_out # Flush data to disk before changing mode
 os.chmod(name, mode)
def mkrealdir(name):
 st = os.stat(name) # Get the mode
 mode = S_IMODE(st[ST_MODE])
 linkto = os.readlink(name)
 files = os.listdir(name)
 os.unlink(name)
 os.mkdir(name, mode)
 os.chmod(name, mode)
 linkto = join(os.pardir, linkto)
 #
 for file in files:
 if file not in (os.curdir, os.pardir):
 os.symlink(join(linkto, file), join(name, file))
def main():
 sys.stdout = sys.stderr
 progname = os.path.basename(sys.argv[0])
 if progname == '-c': progname = 'mkreal'
 args = sys.argv[1:]
 if not args:
 print 'usage:', progname, 'path ...'
 sys.exit(2)
 status = 0
 for name in args:
 if not os.path.islink(name):
 print progname+':', name+':', 'not a symlink'
 status = 1
 else:
 if os.path.isdir(name):
 mkrealdir(name)
 else:
 mkrealfile(name)
 sys.exit(status)
main()
--- NEW FILE: ndiff.py ---
#! /usr/bin/env python
# Module ndiff version 1.6.0
# Released to the public domain 08-Dec-2000,
# by Tim Peters (tim.one@home.com).
# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
"""ndiff [-q] file1 file2
 or
ndiff (-r1 | -r2) < ndiff_output > file1_or_file2
Print a human-friendly file difference report to stdout. Both inter-
and intra-line differences are noted. In the second form, recreate file1
(-r1) or file2 (-r2) on stdout, from an ndiff report on stdin.
In the first form, if -q ("quiet") is not specified, the first two lines
of output are
-: file1
+: file2
Each remaining line begins with a two-letter code:
 "- " line unique to file1
 "+ " line unique to file2
 " " line common to both files
 "? " line not present in either input file
Lines beginning with "? " attempt to guide the eye to intraline
differences, and were not present in either input file. These lines can be
confusing if the source files contain tab characters.
The first file can be recovered by retaining only lines that begin with
" " or "- ", and deleting those 2-character prefixes; use ndiff with -r1.
The second file can be recovered similarly, but by retaining only " " and
"+ " lines; use ndiff with -r2; or, on Unix, the second file can be
recovered by piping the output through
 sed -n '/^[+ ] /s/^..//p'
See module comments for details and programmatic interface.
"""
__version__ = 1, 5, 0
# SequenceMatcher tries to compute a "human-friendly diff" between
# two sequences (chiefly picturing a file as a sequence of lines,
# and a line as a sequence of characters, here). Unlike e.g. UNIX(tm)
# diff, the fundamental notion is the longest *contiguous* & junk-free
# matching subsequence. That's what catches peoples' eyes. The
# Windows(tm) windiff has another interesting notion, pairing up elements
# that appear uniquely in each sequence. That, and the method here,
# appear to yield more intuitive difference reports than does diff. This
# method appears to be the least vulnerable to synching up on blocks
# of "junk lines", though (like blank lines in ordinary text files,
# or maybe "<P>" lines in HTML files). That may be because this is
# the only method of the 3 that has a *concept* of "junk" <wink>.
#
# Note that ndiff makes no claim to produce a *minimal* diff. To the
# contrary, minimal diffs are often counter-intuitive, because they
# synch up anywhere possible, sometimes accidental matches 100 pages
# apart. Restricting synch points to contiguous matches preserves some
# notion of locality, at the occasional cost of producing a longer diff.
#
# With respect to junk, an earlier version of ndiff simply refused to
# *start* a match with a junk element. The result was cases like this:
# before: private Thread currentThread;
# after: private volatile Thread currentThread;
# If you consider whitespace to be junk, the longest contiguous match
# not starting with junk is "e Thread currentThread". So ndiff reported
# that "e volatil" was inserted between the 't' and the 'e' in "private".
# While an accurate view, to people that's absurd. The current version
# looks for matching blocks that are entirely junk-free, then extends the
# longest one of those as far as possible but only with matching junk.
# So now "currentThread" is matched, then extended to suck up the
# preceding blank; then "private" is matched, and extended to suck up the
# following blank; then "Thread" is matched; and finally ndiff reports
# that "volatile " was inserted before "Thread". The only quibble
# remaining is that perhaps it was really the case that " volatile"
# was inserted after "private". I can live with that <wink>.
#
# NOTE on junk: the module-level names
# IS_LINE_JUNK
# IS_CHARACTER_JUNK
# can be set to any functions you like. The first one should accept
# a single string argument, and return true iff the string is junk.
# The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
# line without visible characters, except for at most one splat).
# The second should accept a string of length 1 etc. The default is
# whether the character is a blank or tab (note: bad idea to include
# newline in this!).
#
# After setting those, you can call fcompare(f1name, f2name) with the
# names of the files you want to compare. The difference report
# is sent to stdout. Or you can call main(args), passing what would
# have been in sys.argv[1:] had the cmd-line form been used.
from difflib import SequenceMatcher
import string
TRACE = 0
# define what "junk" means
import re
def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
 return pat(line) is not None
def IS_CHARACTER_JUNK(ch, ws=" \t"):
 return ch in ws
del re
# meant for dumping lines
def dump(tag, x, lo, hi):
 for i in xrange(lo, hi):
 print tag, x[i],
def plain_replace(a, alo, ahi, b, blo, bhi):
 assert alo < ahi and blo < bhi
 # dump the shorter block first -- reduces the burden on short-term
 # memory if the blocks are of very different sizes
 if bhi - blo < ahi - alo:
 dump('+', b, blo, bhi)
 dump('-', a, alo, ahi)
 else:
 dump('-', a, alo, ahi)
 dump('+', b, blo, bhi)
# When replacing one block of lines with another, this guy searches
# the blocks for *similar* lines; the best-matching pair (if any) is
# used as a synch point, and intraline difference marking is done on
# the similar pair. Lots of work, but often worth it.
def fancy_replace(a, alo, ahi, b, blo, bhi):
 if TRACE:
 print '*** fancy_replace', alo, ahi, blo, bhi
 dump('>', a, alo, ahi)
 dump('<', b, blo, bhi)
 # don't synch up unless the lines have a similarity score of at
 # least cutoff; best_ratio tracks the best score seen so far
 best_ratio, cutoff = 0.74, 0.75
 cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
 eqi, eqj = None, None # 1st indices of equal lines (if any)
 # search for the pair that matches best without being identical
 # (identical lines must be junk lines, & we don't want to synch up
 # on junk -- unless we have to)
 for j in xrange(blo, bhi):
 bj = b[j]
 cruncher.set_seq2(bj)
 for i in xrange(alo, ahi):
 ai = a[i]
 if ai == bj:
 if eqi is None:
 eqi, eqj = i, j
 continue
 cruncher.set_seq1(ai)
 # computing similarity is expensive, so use the quick
 # upper bounds first -- have seen this speed up messy
 # compares by a factor of 3.
 # note that ratio() is only expensive to compute the first
 # time it's called on a sequence pair; the expensive part
 # of the computation is cached by cruncher
 if cruncher.real_quick_ratio() > best_ratio and \
 cruncher.quick_ratio() > best_ratio and \
 cruncher.ratio() > best_ratio:
 best_ratio, best_i, best_j = cruncher.ratio(), i, j
 if best_ratio < cutoff:
 # no non-identical "pretty close" pair
 if eqi is None:
 # no identical pair either -- treat it as a straight replace
 plain_replace(a, alo, ahi, b, blo, bhi)
 return
 # no close pair, but an identical pair -- synch up on that
 best_i, best_j, best_ratio = eqi, eqj, 1.0
 else:
 # there's a close pair, so forget the identical pair (if any)
 eqi = None
 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
 # identical
 if TRACE:
 print '*** best_ratio', best_ratio, best_i, best_j
 dump('>', a, best_i, best_i+1)
 dump('<', b, best_j, best_j+1)
 # pump out diffs from before the synch point
 fancy_helper(a, alo, best_i, b, blo, best_j)
 # do intraline marking on the synch pair
 aelt, belt = a[best_i], b[best_j]
 if eqi is None:
 # pump out a '-', '?', '+', '?' quad for the synched lines
 atags = btags = ""
 cruncher.set_seqs(aelt, belt)
 for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
 la, lb = ai2 - ai1, bj2 - bj1
 if tag == 'replace':
 atags += '^' * la
 btags += '^' * lb
 elif tag == 'delete':
 atags += '-' * la
 elif tag == 'insert':
 btags += '+' * lb
 elif tag == 'equal':
 atags += ' ' * la
 btags += ' ' * lb
 else:
 raise ValueError, 'unknown tag ' + `tag`
 printq(aelt, belt, atags, btags)
 else:
 # the synch pair is identical
 print ' ', aelt,
 # pump out diffs from after the synch point
 fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
def fancy_helper(a, alo, ahi, b, blo, bhi):
 if alo < ahi:
 if blo < bhi:
 fancy_replace(a, alo, ahi, b, blo, bhi)
 else:
 dump('-', a, alo, ahi)
 elif blo < bhi:
 dump('+', b, blo, bhi)
# Crap to deal with leading tabs in "?" output. Can hurt, but will
# probably help most of the time.
def printq(aline, bline, atags, btags):
 common = min(count_leading(aline, "\t"),
 count_leading(bline, "\t"))
 common = min(common, count_leading(atags[:common], " "))
 print "-", aline,
 if count_leading(atags, " ") < len(atags):
 print "?", "\t" * common + atags[common:]
 print "+", bline,
 if count_leading(btags, " ") < len(btags):
 print "?", "\t" * common + btags[common:]
def count_leading(line, ch):
 i, n = 0, len(line)
 while i < n and line[i] == ch:
 i += 1
 return i
def fail(msg):
 import sys
 out = sys.stderr.write
 out(msg + "\n\n")
 out(__doc__)
 return 0
# open a file & return the file object; gripe and return 0 if it
# couldn't be opened
def fopen(fname):
 try:
 return open(fname, 'r')
 except IOError, detail:
 return fail("couldn't open " + fname + ": " + str(detail))
# open two files & spray the diff to stdout; return false iff a problem
def fcompare(f1name, f2name):
 f1 = fopen(f1name)
 f2 = fopen(f2name)
 if not f1 or not f2:
 return 0
 a = f1.readlines(); f1.close()
 b = f2.readlines(); f2.close()
 cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
 for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
 if tag == 'replace':
 fancy_replace(a, alo, ahi, b, blo, bhi)
 elif tag == 'delete':
 dump('-', a, alo, ahi)
 elif tag == 'insert':
 dump('+', b, blo, bhi)
 elif tag == 'equal':
 dump(' ', a, alo, ahi)
 else:
 raise ValueError, 'unknown tag ' + `tag`
 return 1
# crack args (sys.argv[1:] is normal) & compare;
# return false iff a problem
def main(args):
 import getopt
 try:
 opts, args = getopt.getopt(args, "qr:")
 except getopt.error, detail:
 return fail(str(detail))
 noisy = 1
 qseen = rseen = 0
 for opt, val in opts:
 if opt == "-q":
 qseen = 1
 noisy = 0
 elif opt == "-r":
 rseen = 1
 whichfile = val
 if qseen and rseen:
 return fail("can't specify both -q and -r")
 if rseen:
 if args:
 return fail("no args allowed with -r option")
 if whichfile in "12":
 restore(whichfile)
 return 1
 return fail("-r value must be 1 or 2")
 if len(args) != 2:
 return fail("need 2 filename args")
 f1name, f2name = args
 if noisy:
 print '-:', f1name
 print '+:', f2name
 return fcompare(f1name, f2name)
def restore(which):
 import sys
 tag = {"1": "- ", "2": "+ "}[which]
 prefixes = (" ", tag)
 for line in sys.stdin.readlines():
 if line[:2] in prefixes:
 print line[2:],
if __name__ == '__main__':
 import sys
 args = sys.argv[1:]
 if "-profile" in args:
 import profile, pstats
 args.remove("-profile")
 statf = "ndiff.pro"
 profile.run("main(args)", statf)
 stats = pstats.Stats(statf)
 stats.strip_dirs().sort_stats('time').print_stats()
 else:
 main(args)
--- NEW FILE: nm2def.py ---
#! /usr/bin/env python
"""nm2def.py
Helpers to extract symbols from Unix libs and auto-generate
Windows definition files from them. Depends on nm(1). Tested
on Linux and Solaris only (-p option to nm is for Solaris only).
By Marc-Andre Lemburg, Aug 1998.
Additional notes: the output of nm is supposed to look like this:
acceler.o:
000001fd T PyGrammar_AddAccelerators
 U PyGrammar_FindDFA
00000237 T PyGrammar_RemoveAccelerators
 U _IO_stderr_
 U exit
 U fprintf
 U free
 U malloc
 U printf
grammar1.o:
00000000 T PyGrammar_FindDFA
00000034 T PyGrammar_LabelRepr
 U _PyParser_TokenNames
 U abort
 U printf
 U sprintf
...
Even if this isn't the default output of your nm, there is generally an
option to produce this format (since it is the original v7 Unix format).
"""
import os,re,string,sys
PYTHONLIB = 'libpython'+sys.version[:3]+'.a'
PC_PYTHONLIB = 'Python'+sys.version[0]+sys.version[2]+'.dll'
NM = 'nm -p -g %s' # For Linux, use "nm -g %s"
def symbols(lib=PYTHONLIB,types=('T','C','D')):
 lines = os.popen(NM % lib).readlines()
 lines = map(string.strip,lines)
 symbols = {}
 for line in lines:
 if len(line) == 0 or ':' in line:
 continue
 items = string.split(line)
 if len(items) != 3:
 continue
 address, type, name = items
 if type not in types:
 continue
 symbols[name] = address,type
 return symbols
def export_list(symbols):
 data = []
 code = []
 for name,(addr,type) in symbols.items():
 if type in ('C','D'):
 data.append('\t'+name)
 else:
 code.append('\t'+name)
 data.sort()
 data.append('')
 code.sort()
 return string.join(data,' DATA\n')+'\n'+string.join(code,'\n')
# Definition file template
DEF_TEMPLATE = """\
EXPORTS
%s
"""
# Special symbols that have to be included even though they don't
# pass the filter
SPECIALS = (
 )
def filter_Python(symbols,specials=SPECIALS):
 for name in symbols.keys():
 if name[:2] == 'Py' or name[:3] == '_Py':
 pass
 elif name not in specials:
 del symbols[name]
def main():
 s = symbols(PYTHONLIB)
 filter_Python(s)
 exports = export_list(s)
 f = sys.stdout # open('PC/python_nt.def','w')
 f.write(DEF_TEMPLATE % (exports))
 f.close()
if __name__ == '__main__':
 main()
--- NEW FILE: objgraph.py ---
#! /usr/bin/env python
# objgraph
#
# Read "nm -o" input (on IRIX: "nm -Bo") of a set of libraries or modules
# and print various interesting listings, such as:
#
# - which names are used but not defined in the set (and used where),
# - which names are defined in the set (and where),
# - which modules use which other modules,
# - which modules are used by which other modules.
#
# Usage: objgraph [-cdu] [file] ...
# -c: print callers per objectfile
# -d: print callees per objectfile
# -u: print usage of undefined symbols
# If none of -cdu is specified, all are assumed.
# Use "nm -o" to generate the input (on IRIX: "nm -Bo"),
# e.g.: nm -o /lib/libc.a | objgraph
import sys
import string
import os
import getopt
import regex
# Types of symbols.
#
definitions = 'TRGDSBAEC'
externals = 'UV'
ignore = 'Nntrgdsbavuc'
# Regular expression to parse "nm -o" output.
#
matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$')
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
 if dict.has_key(key):
 dict[key].append(item)
 else:
 dict[key] = [item]
# Return a flattened version of a list of strings: the concatenation
# of its elements with intervening spaces.
#
def flat(list):
 s = ''
 for item in list:
 s = s + ' ' + item
 return s[1:]
# Global variables mapping defined/undefined names to files and back.
#
file2undef = {}
def2file = {}
file2def = {}
undef2file = {}
# Read one input file and merge the data into the tables.
# Argument is an open file.
#
def readinput(file):
 while 1:
 s = file.readline()
 if not s:
 break
 # If you get any output from this line,
 # it is probably caused by an unexpected input line:
 if matcher.search(s) < 0: s; continue # Shouldn't happen
 (ra, rb), (r1a, r1b), (r2a, r2b), (r3a, r3b) = matcher.regs[:4]
 fn, name, type = s[r1a:r1b], s[r3a:r3b], s[r2a:r2b]
 if type in definitions:
 store(def2file, name, fn)
 store(file2def, fn, name)
 elif type in externals:
 store(file2undef, fn, name)
 store(undef2file, name, fn)
 elif not type in ignore:
 print fn + ':' + name + ': unknown type ' + type
# Print all names that were undefined in some module and where they are
# defined.
#
def printcallee():
 flist = file2undef.keys()
 flist.sort()
 for file in flist:
 print file + ':'
 elist = file2undef[file]
 elist.sort()
 for ext in elist:
 if len(ext) >= 8:
 tabs = '\t'
 else:
 tabs = '\t\t'
 if not def2file.has_key(ext):
 print '\t' + ext + tabs + ' *undefined'
 else:
 print '\t' + ext + tabs + flat(def2file[ext])
# Print for each module the names of the other modules that use it.
#
def printcaller():
 files = file2def.keys()
 files.sort()
 for file in files:
 callers = []
 for label in file2def[file]:
 if undef2file.has_key(label):
 callers = callers + undef2file[label]
 if callers:
 callers.sort()
 print file + ':'
 lastfn = ''
 for fn in callers:
 if fn <> lastfn:
 print '\t' + fn
 lastfn = fn
 else:
 print file + ': unused'
# Print undefine names and where they are used.
#
def printundef():
 undefs = {}
 for file in file2undef.keys():
 for ext in file2undef[file]:
 if not def2file.has_key(ext):
 store(undefs, ext, file)
 elist = undefs.keys()
 elist.sort()
 for ext in elist:
 print ext + ':'
 flist = undefs[ext]
 flist.sort()
 for file in flist:
 print '\t' + file
# Print warning messages about names defined in more than one file.
#
def warndups():
 savestdout = sys.stdout
 sys.stdout = sys.stderr
 names = def2file.keys()
 names.sort()
 for name in names:
 if len(def2file[name]) > 1:
 print 'warning:', name, 'multiply defined:',
 print flat(def2file[name])
 sys.stdout = savestdout
# Main program
#
def main():
 try:
 optlist, args = getopt.getopt(sys.argv[1:], 'cdu')
 except getopt.error:
 sys.stdout = sys.stderr
 print 'Usage:', os.path.basename(sys.argv[0]),
 print '[-cdu] [file] ...'
 print '-c: print callers per objectfile'
 print '-d: print callees per objectfile'
 print '-u: print usage of undefined symbols'
 print 'If none of -cdu is specified, all are assumed.'
 print 'Use "nm -o" to generate the input (on IRIX: "nm -Bo"),'
 print 'e.g.: nm -o /lib/libc.a | objgraph'
 return 1
 optu = optc = optd = 0
 for opt, void in optlist:
 if opt == '-u':
 optu = 1
 elif opt == '-c':
 optc = 1
 elif opt == '-d':
 optd = 1
 if optu == optc == optd == 0:
 optu = optc = optd = 1
 if not args:
 args = ['-']
 for file in args:
 if file == '-':
 readinput(sys.stdin)
 else:
 readinput(open(file, 'r'))
 #
 warndups()
 #
 more = (optu + optc + optd > 1)
 if optd:
 if more:
 print '---------------All callees------------------'
 printcallee()
 if optu:
 if more:
 print '---------------Undefined callees------------'
 printundef()
 if optc:
 if more:
 print '---------------All Callers------------------'
 printcaller()
 return 0
# Call the main program.
# Use its return value as exit status.
# Catch interrupts to avoid stack trace.
#
try:
 sys.exit(main())
except KeyboardInterrupt:
 sys.exit(1)
--- NEW FILE: parseentities.py ---
#!/usr/local/bin/python
""" Utility for parsing HTML entity definitions available from:
 http://www.w3.org/ as e.g.
 http://www.w3.org/TR/REC-html40/HTMLlat1.ent
 Input is read from stdin, output is written to stdout in form of a
 Python snippet defining a dictionary "entitydefs" mapping literal
 entity name to character or numeric entity.
 Marc-Andre Lemburg, mal@lemburg.com, 1999.
 Use as you like. NO WARRANTIES.
"""
import re,sys
import TextTools
entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
def parse(text,pos=0,endpos=None):
 pos = 0
 if endpos is None:
 endpos = len(text)
 d = {}
 while 1:
 m = entityRE.search(text,pos,endpos)
 if not m:
 break
 name,charcode,comment = m.groups()
 d[name] = charcode,comment
 pos = m.end()
 return d
def writefile(f,defs):
 f.write("entitydefs = {\n")
 items = defs.items()
 items.sort()
 for name,(charcode,comment) in items:
 if charcode[:2] == '&#':
 code = int(charcode[2:-1])
 if code < 256:
 charcode = "'\%o'" % code
 else:
 charcode = repr(charcode)
 else:
 charcode = repr(charcode)
 comment = TextTools.collapse(comment)
 f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment))
 f.write('\n}\n')
if __name__ == '__main__':
 if len(sys.argv) > 1:
 infile = open(sys.argv[1])
 else:
 infile = sys.stdin
 if len(sys.argv) > 2:
 outfile = open(sys.argv[2],'w')
 else:
 outfile = sys.stdout
 text = infile.read()
 defs = parse(text)
 writefile(outfile,defs)
--- NEW FILE: pathfix.py ---
#! /usr/bin/env python
# Change the #! line occurring in Python scripts. The new interpreter
# pathname must be given with a -i option.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments). Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions. Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixfile() you can turn this
# into a program for a different change to Python programs...
import sys
import regex
import os
from stat import *
import string
import getopt
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
new_interpreter = None
def main():
 global new_interpreter
 usage = ('usage: %s -i /interpreter file-or-directory ...\n' %
 sys.argv[0])
 try:
 opts, args = getopt.getopt(sys.argv[1:], 'i:')
 except getopt.error, msg:
 err(msg + '\n')
 err(usage)
 sys.exit(2)
 for o, a in opts:
 if o == '-i':
 new_interpreter = a
 if not new_interpreter or new_interpreter[0] != '/' or not args:
 err('-i option or file-or-directory missing\n')
 err(usage)
 sys.exit(2)
 bad = 0
 for arg in args:
 if os.path.isdir(arg):
 if recursedown(arg): bad = 1
 elif os.path.islink(arg):
 err(arg + ': will not process symbolic links\n')
 bad = 1
 else:
 if fix(arg): bad = 1
 sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
 return ispythonprog.match(name) >= 0
def recursedown(dirname):
 dbg('recursedown(' + `dirname` + ')\n')
 bad = 0
 try:
 names = os.listdir(dirname)
 except os.error, msg:
 err(dirname + ': cannot list directory: ' + `msg` + '\n')
 return 1
 names.sort()
 subdirs = []
 for name in names:
 if name in (os.curdir, os.pardir): continue
 fullname = os.path.join(dirname, name)
 if os.path.islink(fullname): pass
 elif os.path.isdir(fullname):
 subdirs.append(fullname)
 elif ispython(name):
 if fix(fullname): bad = 1
 for fullname in subdirs:
 if recursedown(fullname): bad = 1
 return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
 try:
 f = open(filename, 'r')
 except IOError, msg:
 err(filename + ': cannot open: ' + `msg` + '\n')
 return 1
 line = f.readline()
 fixed = fixline(line)
 if line == fixed:
 rep(filename+': no change\n')
 f.close()
 return
 head, tail = os.path.split(filename)
 tempname = os.path.join(head, '@' + tail)
 try:
 g = open(tempname, 'w')
 except IOError, msg:
 f.close()
 err(tempname+': cannot create: '+`msg`+'\n')
 return 1
 rep(filename + ': updating\n')
 g.write(fixed)
 BUFSIZE = 8*1024
 while 1:
 buf = f.read(BUFSIZE)
 if not buf: break
 g.write(buf)
 g.close()
 f.close()
 # Finishing touch -- move files
 # First copy the file's mode to the temp file
 try:
 statbuf = os.stat(filename)
 os.chmod(tempname, statbuf[ST_MODE] & 07777)
 except os.error, msg:
 err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
 # Then make a backup of the original file as filename~
 try:
 os.rename(filename, filename + '~')
 except os.error, msg:
 err(filename + ': warning: backup failed (' + `msg` + ')\n')
 # Now move the temp file to the original file
 try:
 os.rename(tempname, filename)
 except os.error, msg:
 err(filename + ': rename failed (' + `msg` + ')\n')
 return 1
 # Return succes
 return 0
def fixline(line):
 if line[:2] != '#!':
 return line
 if string.find(line, "python") < 0:
 return line
 return '#! %s\n' % new_interpreter
main()
--- NEW FILE: pdeps.py ---
#! /usr/bin/env python
# pdeps
#
# Find dependencies between a bunch of Python modules.
#
# Usage:
# pdeps file1.py file2.py ...
#
# Output:
# Four tables separated by lines like '--- Closure ---':
# 1) Direct dependencies, listing which module imports which other modules
# 2) The inverse of (1)
# 3) Indirect dependencies, or the closure of the above
# 4) The inverse of (3)
#
# To do:
# - command line options to select output type
# - option to automatically scan the Python library for referenced modules
# - option to limit output to particular modules
import sys
import regex
import os
import string
# Main program
#
def main():
 args = sys.argv[1:]
 if not args:
 print 'usage: pdeps file.py file.py ...'
 return 2
 #
 table = {}
 for arg in args:
 process(arg, table)
 #
 print '--- Uses ---'
 printresults(table)
 #
 print '--- Used By ---'
 inv = inverse(table)
 printresults(inv)
 #
 print '--- Closure of Uses ---'
 reach = closure(table)
 printresults(reach)
 #
 print '--- Closure of Used By ---'
 invreach = inverse(reach)
 printresults(invreach)
 #
 return 0
# Compiled regular expressions to search for import statements
#
m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+')
m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)')
# Collect data from one file
#
def process(filename, table):
 fp = open(filename, 'r')
 mod = os.path.basename(filename)
 if mod[-3:] == '.py':
 mod = mod[:-3]
 table[mod] = list = []
 while 1:
 line = fp.readline()
 if not line: break
 while line[-1:] == '\\':
 nextline = fp.readline()
 if not nextline: break
 line = line[:-1] + nextline
 if m_import.match(line) >= 0:
 (a, b), (a1, b1) = m_import.regs[:2]
 elif m_from.match(line) >= 0:
 (a, b), (a1, b1) = m_from.regs[:2]
 else: continue
 words = string.splitfields(line[a1:b1], ',')
 # print '#', line, words
 for word in words:
 word = string.strip(word)
 if word not in list:
 list.append(word)
# Compute closure (this is in fact totally general)
#
def closure(table):
 modules = table.keys()
 #
 # Initialize reach with a copy of table
 #
 reach = {}
 for mod in modules:
 reach[mod] = table[mod][:]
 #
 # Iterate until no more change
 #
 change = 1
 while change:
 change = 0
 for mod in modules:
 for mo in reach[mod]:
 if mo in modules:
 for m in reach[mo]:
 if m not in reach[mod]:
 reach[mod].append(m)
 change = 1
 #
 return reach
# Invert a table (this is again totally general).
# All keys of the original table are made keys of the inverse,
# so there may be empty lists in the inverse.
#
def inverse(table):
 inv = {}
 for key in table.keys():
 if not inv.has_key(key):
 inv[key] = []
 for item in table[key]:
 store(inv, item, key)
 return inv
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
 if dict.has_key(key):
 dict[key].append(item)
 else:
 dict[key] = [item]
# Tabulate results neatly
#
def printresults(table):
 modules = table.keys()
 maxlen = 0
 for mod in modules: maxlen = max(maxlen, len(mod))
 modules.sort()
 for mod in modules:
 list = table[mod]
 list.sort()
 print string.ljust(mod, maxlen), ':',
 if mod in list:
 print '(*)',
 for ref in list:
 print ref,
 print
# Call main and honor exit status
try:
 sys.exit(main())
except KeyboardInterrupt:
 sys.exit(1)
--- NEW FILE: pindent.py ---
#! /usr/bin/env python
# This file contains a class and a main program that perform three
# related (though complimentary) formatting operations on Python
# programs. When called as "pindent -c", it takes a valid Python
# program as input and outputs a version augmented with block-closing
# comments. When called as "pindent -d", it assumes its input is a
# Python program with block-closing comments and outputs a commentless
# version. When called as "pindent -r" it assumes its input is a
# Python program with block-closing comments but with its indentation
# messed up, and outputs a properly indented version.
# A "block-closing comment" is a comment of the form '# end <keyword>'
# where <keyword> is the keyword that opened the block. If the
# opening keyword is 'def' or 'class', the function or class name may
# be repeated in the block-closing comment as well. Here is an
# example of a program fully augmented with block-closing comments:
# def foobar(a, b):
# if a == b:
# a = a+1
# elif a < b:
# b = b-1
# if b > a: a = a-1
# # end if
# else:
# print 'oops!'
# # end if
# # end def foobar
# Note that only the last part of an if...elif...else... block needs a
# block-closing comment; the same is true for other compound
# statements (e.g. try...except). Also note that "short-form" blocks
# like the second 'if' in the example must be closed as well;
# otherwise the 'else' in the example would be ambiguous (remember
# that indentation is not significant when interpreting block-closing
# comments).
# The operations are idempotent (i.e. applied to their own output
# they yield an identical result). Running first "pindent -c" and
# then "pindent -r" on a valid Python program produces a program that
# is semantically identical to the input (though its indentation may
# be different). Running "pindent -e" on that output produces a
# program that only differs from the original in indentation.
# Other options:
# -s stepsize: set the indentation step size (default 8)
# -t tabsize : set the number of spaces a tab character is worth (default 8)
# -e : expand TABs into spaces
# file ... : input file(s) (default standard input)
# The results always go to standard output
# Caveats:
# - comments ending in a backslash will be mistaken for continued lines
# - continuations using backslash are always left unchanged
# - continuations inside parentheses are not extra indented by -r
# but must be indented for -c to work correctly (this breaks
# idempotency!)
# - continued lines inside triple-quoted strings are totally garbled
# Secret feature:
# - On input, a block may also be closed with an "end statement" --
# this is a block-closing comment without the '#' sign.
# Possible improvements:
# - check syntax based on transitions in 'next' table
# - better error reporting
# - better error recovery
# - check identifier after class/def
# The following wishes need a more complete tokenization of the source:
# - Don't get fooled by comments ending in backslash
# - reindent continuation lines indicated by backslash
# - handle continuation lines inside parentheses/braces/brackets
# - handle triple quoted strings spanning lines
# - realign comments
# - optionally do much more thorough reformatting, a la C indent
# Defaults
STEPSIZE = 8
TABSIZE = 8
EXPANDTABS = 0
import os
import re
import string
import sys
next = {}
next['if'] = next['elif'] = 'elif', 'else', 'end'
next['while'] = next['for'] = 'else', 'end'
next['try'] = 'except', 'finally'
next['except'] = 'except', 'else', 'end'
next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
next['end'] = ()
start = 'if', 'while', 'for', 'try', 'def', 'class'
class PythonIndenter:
 def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 self.fpi = fpi
 self.fpo = fpo
 self.indentsize = indentsize
 self.tabsize = tabsize
 self.lineno = 0
 self.expandtabs = expandtabs
 self._write = fpo.write
 self.kwprog = re.compile(
 r'^\s*(?P<kw>[a-z]+)'
 r'(\s+(?P<id>[a-zA-Z_]\w*))?'
 r'[^\w]')
 self.endprog = re.compile(
 r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
 r'(\s+(?P<id>[a-zA-Z_]\w*))?'
 r'[^\w]')
 self.wsprog = re.compile(r'^[ \t]*')
 # end def __init__
 def write(self, line):
 if self.expandtabs:
 self._write(string.expandtabs(line, self.tabsize))
 else:
 self._write(line)
 # end if
 # end def write
 def readline(self):
 line = self.fpi.readline()
 if line: self.lineno = self.lineno + 1
 # end if
 return line
 # end def readline
 def error(self, fmt, *args):
 if args: fmt = fmt % args
 # end if
 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
 self.write('### %s ###\n' % fmt)
 # end def error
 def getline(self):
 line = self.readline()
 while line[-2:] == '\\\n':
 line2 = self.readline()
 if not line2: break
 # end if
 line = line + line2
 # end while
 return line
 # end def getline
 def putline(self, line, indent = None):
 if indent is None:
 self.write(line)
 return
 # end if
 tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
 i = 0
 m = self.wsprog.match(line)
 if m: i = m.end()
 # end if
 self.write('\t'*tabs + ' '*spaces + line[i:])
 # end def putline
 def reformat(self):
 stack = []
 while 1:
 line = self.getline()
 if not line: break # EOF
 # end if
 m = self.endprog.match(line)
 if m:
 kw = 'end'
 kw2 = m.group('kw')
 if not stack:
 self.error('unexpected end')
 elif stack[-1][0] != kw2:
 self.error('unmatched end')
 # end if
 del stack[-1:]
 self.putline(line, len(stack))
 continue
 # end if
 m = self.kwprog.match(line)
 if m:
 kw = m.group('kw')
 if kw in start:
 self.putline(line, len(stack))
 stack.append((kw, kw))
 continue
 # end if
 if next.has_key(kw) and stack:
 self.putline(line, len(stack)-1)
 kwa, kwb = stack[-1]
 stack[-1] = kwa, kw
 continue
 # end if
 # end if
 self.putline(line, len(stack))
 # end while
 if stack:
 self.error('unterminated keywords')
 for kwa, kwb in stack:
 self.write('\t%s\n' % kwa)
 # end for
 # end if
 # end def reformat
 def delete(self):
 begin_counter = 0
 end_counter = 0
 while 1:
 line = self.getline()
 if not line: break # EOF
 # end if
 m = self.endprog.match(line)
 if m:
 end_counter = end_counter + 1
 continue
 # end if
 m = self.kwprog.match(line)
 if m:
 kw = m.group('kw')
 if kw in start:
 begin_counter = begin_counter + 1
 # end if
 # end if
 self.putline(line)
 # end while
 if begin_counter - end_counter < 0:
 sys.stderr.write('Warning: input contained more end tags than expected\n')
 elif begin_counter - end_counter > 0:
 sys.stderr.write('Warning: input contained less end tags than expected\n')
 # end if
 # end def delete
 def complete(self):
 self.indentsize = 1
 stack = []
 todo = []
 current, firstkw, lastkw, topid = 0, '', '', ''
 while 1:
 line = self.getline()
 i = 0
 m = self.wsprog.match(line)
 if m: i = m.end()
 # end if
 m = self.endprog.match(line)
 if m:
 thiskw = 'end'
 endkw = m.group('kw')
 thisid = m.group('id')
 else:
 m = self.kwprog.match(line)
 if m:
 thiskw = m.group('kw')
 if not next.has_key(thiskw):
 thiskw = ''
 # end if
 if thiskw in ('def', 'class'):
 thisid = m.group('id')
 else:
 thisid = ''
 # end if
 elif line[i:i+1] in ('\n', '#'):
 todo.append(line)
 continue
 else:
 thiskw = ''
 # end if
 # end if
 indent = len(string.expandtabs(line[:i], self.tabsize))
 while indent < current:
 if firstkw:
 if topid:
 s = '# end %s %s\n' % (
 firstkw, topid)
 else:
 s = '# end %s\n' % firstkw
 # end if
 self.putline(s, current)
 firstkw = lastkw = ''
 # end if
 current, firstkw, lastkw, topid = stack[-1]
 del stack[-1]
 # end while
 if indent == current and firstkw:
 if thiskw == 'end':
 if endkw != firstkw:
 self.error('mismatched end')
 # end if
 firstkw = lastkw = ''
 elif not thiskw or thiskw in start:
 if topid:
 s = '# end %s %s\n' % (
 firstkw, topid)
 else:
 s = '# end %s\n' % firstkw
 # end if
 self.putline(s, current)
 firstkw = lastkw = topid = ''
 # end if
 # end if
 if indent > current:
 stack.append((current, firstkw, lastkw, topid))
 if thiskw and thiskw not in start:
 # error
 thiskw = ''
 # end if
 current, firstkw, lastkw, topid = \
 indent, thiskw, thiskw, thisid
 # end if
 if thiskw:
 if thiskw in start:
 firstkw = lastkw = thiskw
 topid = thisid
 else:
 lastkw = thiskw
 # end if
 # end if
 for l in todo: self.write(l)
 # end for
 todo = []
 if not line: break
 # end if
 self.write(line)
 # end while
 # end def complete
# end class PythonIndenter
# Simplified user interface
# - xxx_filter(input, output): read and write file objects
# - xxx_string(s): take and return string object
# - xxx_file(filename): process file in place, return true iff changed
def complete_filter(input = sys.stdin, output = sys.stdout,
 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.complete()
# end def complete_filter
def delete_filter(input= sys.stdin, output = sys.stdout,
 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.delete()
# end def delete_filter
def reformat_filter(input = sys.stdin, output = sys.stdout,
 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.reformat()
# end def reformat_filter
class StringReader:
 def __init__(self, buf):
 self.buf = buf
 self.pos = 0
 self.len = len(self.buf)
 # end def __init__
 def read(self, n = 0):
 if n <= 0:
 n = self.len - self.pos
 else:
 n = min(n, self.len - self.pos)
 # end if
 r = self.buf[self.pos : self.pos + n]
 self.pos = self.pos + n
 return r
 # end def read
 def readline(self):
 i = string.find(self.buf, '\n', self.pos)
 return self.read(i + 1 - self.pos)
 # end def readline
 def readlines(self):
 lines = []
 line = self.readline()
 while line:
 lines.append(line)
 line = self.readline()
 # end while
 return lines
 # end def readlines
 # seek/tell etc. are left as an exercise for the reader
# end class StringReader
class StringWriter:
 def __init__(self):
 self.buf = ''
 # end def __init__
 def write(self, s):
 self.buf = self.buf + s
 # end def write
 def getvalue(self):
 return self.buf
 # end def getvalue
# end class StringWriter
def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 input = StringReader(source)
 output = StringWriter()
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.complete()
 return output.getvalue()
# end def complete_string
def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 input = StringReader(source)
 output = StringWriter()
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.delete()
 return output.getvalue()
# end def delete_string
def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 input = StringReader(source)
 output = StringWriter()
 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
 pi.reformat()
 return output.getvalue()
# end def reformat_string
def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 source = open(filename, 'r').read()
 result = complete_string(source, stepsize, tabsize, expandtabs)
 if source == result: return 0
 # end if
 import os
 try: os.rename(filename, filename + '~')
 except os.error: pass
 # end try
 f = open(filename, 'w')
 f.write(result)
 f.close()
 return 1
# end def complete_file
def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 source = open(filename, 'r').read()
 result = delete_string(source, stepsize, tabsize, expandtabs)
 if source == result: return 0
 # end if
 import os
 try: os.rename(filename, filename + '~')
 except os.error: pass
 # end try
 f = open(filename, 'w')
 f.write(result)
 f.close()
 return 1
# end def delete_file
def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
 source = open(filename, 'r').read()
 result = reformat_string(source, stepsize, tabsize, expandtabs)
 if source == result: return 0
 # end if
 import os
 try: os.rename(filename, filename + '~')
 except os.error: pass
 # end try
 f = open(filename, 'w')
 f.write(result)
 f.close()
 return 1
# end def reformat_file
# Test program when called as a script
usage = """
usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
-c : complete a correctly indented program (add #end directives)
-d : delete #end directives
-r : reformat a completed program (use #end directives)
-s stepsize: indentation step (default %(STEPSIZE)d)
-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
-e : expand TABs into spaces (defailt OFF)
[file] ... : files are changed in place, with backups in file~
If no files are specified or a single - is given,
the program acts as a filter (reads stdin, writes stdout).
""" % vars()
def error_both(op1, op2):
 sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
 sys.stderr.write(usage)
 sys.exit(2)
# end def error_both
def test():
 import getopt
 try:
 opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
 except getopt.error, msg:
 sys.stderr.write('Error: %s\n' % msg)
 sys.stderr.write(usage)
 sys.exit(2)
 # end try
 action = None
 stepsize = STEPSIZE
 tabsize = TABSIZE
 expandtabs = EXPANDTABS
 for o, a in opts:
 if o == '-c':
 if action: error_both(o, action)
 # end if
 action = 'complete'
 elif o == '-d':
 if action: error_both(o, action)
 # end if
 action = 'delete'
 elif o == '-r':
 if action: error_both(o, action)
 # end if
 action = 'reformat'
 elif o == '-s':
 stepsize = string.atoi(a)
 elif o == '-t':
 tabsize = string.atoi(a)
 elif o == '-e':
 expandtabs = 1
 # end if
 # end for
 if not action:
 sys.stderr.write(
 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
 sys.stderr.write(usage)
 sys.exit(2)
 # end if
 if not args or args == ['-']:
 action = eval(action + '_filter')
 action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
 else:
 action = eval(action + '_file')
 for file in args:
 action(file, stepsize, tabsize, expandtabs)
 # end for
 # end if
# end def test
if __name__ == '__main__':
 test()
# end if
--- NEW FILE: ptags.py ---
#! /usr/bin/env python
# ptags
#
# Create a tags file for Python programs, usable with vi.
# Tagged are:
# - functions (even inside other defs or classes)
# - classes
# - filenames
# Warns about files it cannot open.
# No warnings about duplicate tags.
import sys, re, os
tags = [] # Modified global variable!
def main():
 args = sys.argv[1:]
 for file in args: treat_file(file)
 if tags:
 fp = open('tags', 'w')
 tags.sort()
 for s in tags: fp.write(s)
expr = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*[:\(]'
matcher = re.compile(expr)
def treat_file(file):
 try:
 fp = open(file, 'r')
 except:
 sys.stderr.write('Cannot open %s\n' % file)
 return
 base = os.path.basename(file)
 if base[-3:] == '.py':
 base = base[:-3]
 s = base + '\t' + file + '\t' + '1\n'
 tags.append(s)
 while 1:
 line = fp.readline()
 if not line:
 break
 m = matcher.match(line)
 if m:
 content = m.group(0)
 name = m.group(2)
 s = name + '\t' + file + '\t/^' + content + '/\n'
 tags.append(s)
main()
--- NEW FILE: pydoc.pyw ---
import pydoc
pydoc.gui()
--- NEW FILE: redemo.py ---
"""Basic regular expression demostration facility (Perl style syntax)."""
from Tkinter import *
import re
class ReDemo:
 def __init__(self, master):
 self.master = master
 self.promptdisplay = Label(self.master, anchor=W,
 text="Enter a Perl-style regular expression:")
 self.promptdisplay.pack(side=TOP, fill=X)
 self.regexdisplay = Entry(self.master)
 self.regexdisplay.pack(fill=X)
 self.regexdisplay.focus_set()
 self.addoptions()
 self.statusdisplay = Label(self.master, text="", anchor=W)
 self.statusdisplay.pack(side=TOP, fill=X)
 self.labeldisplay = Label(self.master, anchor=W,
 text="Enter a string to search:")
 self.labeldisplay.pack(fill=X)
 self.labeldisplay.pack(fill=X)
 self.showframe = Frame(master)
 self.showframe.pack(fill=X, anchor=W)
 self.showvar = StringVar(master)
 self.showvar.set("first")
 self.showfirstradio = Radiobutton(self.showframe,
 text="Highlight first match",
 variable=self.showvar,
 value="first",
 command=self.recompile)
 self.showfirstradio.pack(side=LEFT)
 self.showallradio = Radiobutton(self.showframe,
 text="Highlight all matches",
 variable=self.showvar,
 value="all",
 command=self.recompile)
 self.showallradio.pack(side=LEFT)
 self.stringdisplay = Text(self.master, width=60, height=4)
 self.stringdisplay.pack(fill=BOTH, expand=1)
 self.stringdisplay.tag_configure("hit", background="yellow")
 self.grouplabel = Label(self.master, text="Groups:", anchor=W)
 self.grouplabel.pack(fill=X)
 self.grouplist = Listbox(self.master)
 self.grouplist.pack(expand=1, fill=BOTH)
 self.regexdisplay.bind('<Key>', self.recompile)
 self.stringdisplay.bind('<Key>', self.reevaluate)
 self.compiled = None
 self.recompile()
 btags = self.regexdisplay.bindtags()
 self.regexdisplay.bindtags(btags[1:] + btags[:1])
 btags = self.stringdisplay.bindtags()
 self.stringdisplay.bindtags(btags[1:] + btags[:1])
 def addoptions(self):
 self.frames = []
 self.boxes = []
 self.vars = []
 for name in ('IGNORECASE',
 'LOCALE',
 'MULTILINE',
 'DOTALL',
 'VERBOSE'):
 if len(self.boxes) % 3 == 0:
 frame = Frame(self.master)
 frame.pack(fill=X)
 self.frames.append(frame)
 val = getattr(re, name)
 var = IntVar()
 box = Checkbutton(frame,
 variable=var, text=name,
 offvalue=0, onvalue=val,
 command=self.recompile)
 box.pack(side=LEFT)
 self.boxes.append(box)
 self.vars.append(var)
 def getflags(self):
 flags = 0
 for var in self.vars:
 flags = flags | var.get()
 flags = flags
 return flags
 def recompile(self, event=None):
 try:
 self.compiled = re.compile(self.regexdisplay.get(),
 self.getflags())
 bg = self.promptdisplay['background']
 self.statusdisplay.config(text="", background=bg)
 except re.error, msg:
 self.compiled = None
 self.statusdisplay.config(
 text="re.error: %s" % str(msg),
 background="red")
 self.reevaluate()
 def reevaluate(self, event=None):
 try:
 self.stringdisplay.tag_remove("hit", "1.0", END)
 except TclError:
 pass
 try:
 self.stringdisplay.tag_remove("hit0", "1.0", END)
 except TclError:
 pass
 self.grouplist.delete(0, END)
 if not self.compiled:
 return
 self.stringdisplay.tag_configure("hit", background="yellow")
 self.stringdisplay.tag_configure("hit0", background="orange")
 text = self.stringdisplay.get("1.0", END)
 last = 0
 nmatches = 0
 while last <= len(text):
 m = self.compiled.search(text, last)
 if m is None:
 break
 first, last = m.span()
 if last == first:
 last = first+1
 tag = "hit0"
 else:
 tag = "hit"
 pfirst = "1.0 + %d chars" % first
 plast = "1.0 + %d chars" % last
 self.stringdisplay.tag_add(tag, pfirst, plast)
 if nmatches == 0:
 self.stringdisplay.yview_pickplace(pfirst)
 groups = list(m.groups())
 groups.insert(0, m.group())
 for i in range(len(groups)):
 g = "%2d: %s" % (i, `groups[i]`)
 self.grouplist.insert(END, g)
 nmatches = nmatches + 1
 if self.showvar.get() == "first":
 break
 if nmatches == 0:
 self.statusdisplay.config(text="(no match)",
 background="yellow")
 else:
 self.statusdisplay.config(text="")
# Main function, run when invoked as a stand-alone Python program.
def main():
 root = Tk()
 demo = ReDemo(root)
 root.protocol('WM_DELETE_WINDOW', root.quit)
 root.mainloop()
if __name__ == '__main__':
 main()
--- NEW FILE: reindent.py ---
#! /usr/bin/env python
# Released to the public domain, by Tim Peters, 03 October 2000.
"""reindent [-d][-r][-v] path ...
-d Dry run. Analyze, but don't make any changes to, files.
-r Recurse. Search for all .py files in subdirectories too.
-v Verbose. Print informative msgs; else no output.
Change Python (.py) files to use 4-space indents and no hard tab characters.
Also trim excess whitespace from ends of lines, and empty lines at the ends
of files. Ensure the last line ends with a newline.
Pass one or more file and/or directory paths. When a directory path, all
.py files within the directory will be examined, and, if the -r option is
given, likewise recursively for subdirectories.
Overwrites files in place, renaming the originals with a .bak extension.
If reindent finds nothing to change, the file is left alone. If reindent
does change a file, the changed file is a fixed-point for reindent (i.e.,
running reindent on the resulting .py file won't change it again).
The hard part of reindenting is figuring out what to do with comment
lines. So long as the input files get a clean bill of health from
tabnanny.py, reindent should do a good job.
"""
__version__ = "1"
import tokenize
import os
import sys
verbose = 0
recurse = 0
dryrun = 0
def errprint(*args):
 sep = ""
 for arg in args:
 sys.stderr.write(sep + str(arg))
 sep = " "
 sys.stderr.write("\n")
def main():
 import getopt
 global verbose, recurse, dryrun
 try:
 opts, args = getopt.getopt(sys.argv[1:], "drv")
 except getopt.error, msg:
 errprint(msg)
 return
 for o, a in opts:
 if o == '-d':
 dryrun += 1
 elif o == '-r':
 recurse += 1
 elif o == '-v':
 verbose += 1
 if not args:
 errprint("Usage:", __doc__)
 return
 for arg in args:
 check(arg)
def check(file):
 if os.path.isdir(file) and not os.path.islink(file):
 if verbose:
 print "listing directory", file
 names = os.listdir(file)
 for name in names:
 fullname = os.path.join(file, name)
 if ((recurse and os.path.isdir(fullname) and
 not os.path.islink(fullname))
 or name.lower().endswith(".py")):
 check(fullname)
 return
 if verbose:
 print "checking", file, "...",
 try:
 f = open(file)
 except IOError, msg:
 errprint("%s: I/O Error: %s" % (file, str(msg)))
 return
 r = Reindenter(f)
 f.close()
 if r.run():
 if verbose:
 print "changed."
 if dryrun:
 print "But this is a dry run, so leaving it alone."
 if not dryrun:
 bak = file + ".bak"
 if os.path.exists(bak):
 os.remove(bak)
 os.rename(file, bak)
 if verbose:
 print "renamed", file, "to", bak
 f = open(file, "w")
 r.write(f)
 f.close()
 if verbose:
 print "wrote new", file
 else:
 if verbose:
 print "unchanged."
class Reindenter:
 def __init__(self, f):
 self.find_stmt = 1 # next token begins a fresh stmt?
 self.level = 0 # current indent level
 # Raw file lines.
 self.raw = f.readlines()
 # File lines, rstripped & tab-expanded. Dummy at start is so
 # that we can use tokenize's 1-based line numbering easily.
 # Note that a line is all-blank iff it's "\n".
 self.lines = [line.rstrip().expandtabs() + "\n"
 for line in self.raw]
 self.lines.insert(0, None)
 self.index = 1 # index into self.lines of next line
 # List of (lineno, indentlevel) pairs, one for each stmt and
 # comment line. indentlevel is -1 for comment lines, as a
 # signal that tokenize doesn't know what to do about them;
 # indeed, they're our headache!
 self.stats = []
 def run(self):
 tokenize.tokenize(self.getline, self.tokeneater)
 # Remove trailing empty lines.
 lines = self.lines
 while lines and lines[-1] == "\n":
 lines.pop()
 # Sentinel.
 stats = self.stats
 stats.append((len(lines), 0))
 # Map count of leading spaces to # we want.
 have2want = {}
 # Program after transformation.
 after = self.after = []
 for i in range(len(stats)-1):
 thisstmt, thislevel = stats[i]
 nextstmt = stats[i+1][0]
 have = getlspace(lines[thisstmt])
 want = thislevel * 4
 if want < 0:
 # A comment line.
 if have:
 # An indented comment line. If we saw the same
 # indentation before, reuse what it most recently
 # mapped to.
 want = have2want.get(have, -1)
 if want < 0:
 # Then it probably belongs to the next real stmt.
 for j in xrange(i+1, len(stats)-1):
 jline, jlevel = stats[j]
 if jlevel >= 0:
 if have == getlspace(lines[jline]):
 want = jlevel * 4
 break
 if want < 0: # Maybe it's a hanging
 # comment like this one,
 # in which case we should shift it like its base
 # line got shifted.
 for j in xrange(i-1, -1, -1):
 jline, jlevel = stats[j]
 if jlevel >= 0:
 want = have + getlspace(after[jline-1]) - \
 getlspace(lines[jline])
 break
 if want < 0:
 # Still no luck -- leave it alone.
 want = have
 else:
 want = 0
 assert want >= 0
 have2want[have] = want
 diff = want - have
 if diff == 0 or have == 0:
 after.extend(lines[thisstmt:nextstmt])
 else:
 for line in lines[thisstmt:nextstmt]:
 if diff > 0:
 if line == "\n":
 after.append(line)
 else:
 after.append(" " * diff + line)
 else:
 remove = min(getlspace(line), -diff)
 after.append(line[remove:])
 return self.raw != self.after
 def write(self, f):
 f.writelines(self.after)
 # Line-getter for tokenize.
 def getline(self):
 if self.index >= len(self.lines):
 line = ""
 else:
 line = self.lines[self.index]
 self.index += 1
 return line
 # Line-eater for tokenize.
 def tokeneater(self, type, token, (sline, scol), end, line,
 INDENT=tokenize.INDENT,
 DEDENT=tokenize.DEDENT,
 NEWLINE=tokenize.NEWLINE,
 COMMENT=tokenize.COMMENT,
 NL=tokenize.NL):
 if type == NEWLINE:
 # A program statement, or ENDMARKER, will eventually follow,
 # after some (possibly empty) run of tokens of the form
 # (NL | COMMENT)* (INDENT | DEDENT+)?
 self.find_stmt = 1
 elif type == INDENT:
 self.find_stmt = 1
 self.level += 1
 elif type == DEDENT:
 self.find_stmt = 1
 self.level -= 1
 elif type == COMMENT:
 if self.find_stmt:
 self.stats.append((sline, -1))
 # but we're still looking for a new stmt, so leave
 # find_stmt alone
 elif type == NL:
 pass
 elif self.find_stmt:
 # This is the first "real token" following a NEWLINE, so it
 # must be the first token of the next program statement, or an
 # ENDMARKER.
 self.find_stmt = 0
 if line: # not endmarker
 self.stats.append((sline, self.level))
# Count number of leading blanks.
def getlspace(line):
 i, n = 0, len(line)
 while i < n and line[i] == " ":
 i += 1
 return i
if __name__ == '__main__':
 main()
--- NEW FILE: rgrep.py ---
#! /usr/bin/env python
"""Reverse grep.
Usage: rgrep [-i] pattern file
"""
import sys
import re
import string
import getopt
def main():
 bufsize = 64*1024
 reflags = 0
 opts, args = getopt.getopt(sys.argv[1:], "i")
 for o, a in opts:
 if o == '-i':
 reflags = reflags | re.IGNORECASE
 if len(args) < 2:
 usage("not enough arguments")
 if len(args) > 2:
 usage("exactly one file argument required")
 pattern, filename = args
 try:
 prog = re.compile(pattern, reflags)
 except re.error, msg:
 usage("error in regular expression: %s" % str(msg))
 try:
 f = open(filename)
 except IOError, msg:
 usage("can't open %s: %s" % (repr(filename), str(msg)), 1)
 f.seek(0, 2)
 pos = f.tell()
 leftover = None
 while pos > 0:
 size = min(pos, bufsize)
 pos = pos - size
 f.seek(pos)
 buffer = f.read(size)
 lines = string.split(buffer, "\n")
 del buffer
 if leftover is None:
 if not lines[-1]:
 del lines[-1]
 else:
 lines[-1] = lines[-1] + leftover
 if pos > 0:
 leftover = lines[0]
 del lines[0]
 else:
 leftover = None
 lines.reverse()
 for line in lines:
 if prog.search(line):
 print line
def usage(msg, code=2):
 sys.stdout = sys.stderr
 print msg
 print __doc__
 sys.exit(code)
if __name__ == '__main__':
 main()
--- NEW FILE: suff.py ---
#! /usr/bin/env python
# suff
#
# show different suffixes amongst arguments
import sys
def main():
 files = sys.argv[1:]
 suffixes = {}
 for file in files:
 suff = getsuffix(file)
 if not suffixes.has_key(suff):
 suffixes[suff] = []
 suffixes[suff].append(file)
 keys = suffixes.keys()
 keys.sort()
 for suff in keys:
 print `suff`, len(suffixes[suff])
def getsuffix(file):
 suff = ''
 for i in range(len(file)):
 if file[i] == '.':
 suff = file[i:]
 return suff
main()
--- NEW FILE: sum5.py ---
#! /usr/bin/env python
# print md5 checksum for files
bufsize = 8096
fnfilter = None
rmode = 'r'
usage = """
usage: sum5 [-b] [-t] [-l] [-s bufsize] [file ...]
-b : read files in binary mode
-t : read files in text mode (default)
-l : print last pathname component only
-s bufsize: read buffer size (default %d)
file ... : files to sum; '-' or no files means stdin
""" % bufsize
import sys
import string
import os
import md5
import regsub
StringType = type('')
FileType = type(sys.stdin)
def sum(*files):
 sts = 0
 if files and type(files[-1]) == FileType:
 out, files = files[-1], files[:-1]
 else:
 out = sys.stdout
 if len(files) == 1 and type(files[0]) != StringType:
 files = files[0]
 for f in files:
 if type(f) == StringType:
 if f == '-':
 sts = printsumfp(sys.stdin, '<stdin>', out) or sts
 else:
 sts = printsum(f, out) or sts
 else:
 sts = sum(f, out) or sts
 return sts
def printsum(file, out = sys.stdout):
 try:
 fp = open(file, rmode)
 except IOError, msg:
 sys.stderr.write('%s: Can\'t open: %s\n' % (file, msg))
 return 1
 if fnfilter:
 file = fnfilter(file)
 sts = printsumfp(fp, file, out)
 fp.close()
 return sts
def printsumfp(fp, file, out = sys.stdout):
 m = md5.md5()
 try:
 while 1:
 data = fp.read(bufsize)
 if not data: break
 m.update(data)
 except IOError, msg:
 sys.stderr.write('%s: I/O error: %s\n' % (file, msg))
 return 1
 out.write('%s %s\n' % (hexify(m.digest()), file))
 return 0
def hexify(s):
 res = ''
 for c in s:
 res = res + '%02x' % ord(c)
 return res
def main(args = sys.argv[1:], out = sys.stdout):
 global fnfilter, rmode, bufsize
 import getopt
 try:
 opts, args = getopt.getopt(args, 'blts:')
 except getopt.error, msg:
 sys.stderr.write('%s: %s\n%s' % (sys.argv[0], msg, usage))
 return 2
 for o, a in opts:
 if o == '-l':
 fnfilter = os.path.basename
 if o == '-b':
 rmode = 'rb'
 if o == '-t':
 rmode = 'r'
 if o == '-s':
 bufsize = string.atoi(a)
 if not args: args = ['-']
 return sum(args, out)
if __name__ == '__main__' or __name__ == sys.argv[0]:
 sys.exit(main(sys.argv[1:], sys.stdout))
--- NEW FILE: texi2html.py ---
#! /usr/bin/env python
# Convert GNU texinfo files into HTML, one file per node.
# Based on Texinfo 2.14.
# Usage: texi2html [-d] [-d] [-c] inputfile outputdirectory
# The input file must be a complete texinfo file, e.g. emacs.texi.
# This creates many files (one per info node) in the output directory,
# overwriting existing files of the same name. All files created have
# ".html" as their extension.
# XXX To do:
# - handle @comment*** correctly
# - handle @xref {some words} correctly
# - handle @ftable correctly (items aren't indexed?)
# - handle @itemx properly
# - handle @exdent properly
# - add links directly to the proper line from indices
# - check against the definitive list of @-cmds; we still miss (among others):
[...1574 lines suppressed...]
 parser.print_headers = print_headers
 file = sys.argv[1]
 parser.setdirname(sys.argv[2])
 if file == '-':
 fp = sys.stdin
 else:
 parser.setincludedir(os.path.dirname(file))
 try:
 fp = open(file, 'r')
 except IOError, msg:
 print file, ':', msg
 sys.exit(1)
 parser.parse(fp)
 fp.close()
 parser.report()
if __name__ == "__main__":
 test()
--- NEW FILE: trace.py ---
#!/usr/bin/env python
# Copyright 2000, Mojam Media, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1999, Bioreason, Inc., all rights reserved.
# Author: Andrew Dalke
#
# Copyright 1995-1997, Automatrix, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
#
#
# Permission to use, copy, modify, and distribute this Python software and
# its associated documentation for any purpose without fee is hereby
# granted, provided that the above copyright notice appears in all copies,
# and that both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of neither Automatrix,
# Bioreason or Mojam Media be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior permission.
#
#
# Summary of recent changes:
# Support for files with the same basename (submodules in packages)
# Expanded the idea of how to ignore files or modules
# Split tracing and counting into different classes
# Extracted count information and reporting from the count class
# Added some ability to detect which missing lines could be executed
# Added pseudo-pragma to prohibit complaining about unexecuted lines
# Rewrote the main program
# Summary of older changes:
# Added run-time display of statements being executed
# Incorporated portability and performance fixes from Greg Stein
# Incorporated main program from Michael Scharf
"""
program/module to trace Python program or function execution
Sample use, command line:
 trace.py -c -f counts --ignore-dir '$prefix' spam.py eggs
 trace.py -t --ignore-dir '$prefix' spam.py eggs
Sample use, programmatically (still more complicated than it should be)
 # create an Ignore option, telling it what you want to ignore
 ignore = trace.Ignore(dirs = [sys.prefix, sys.exec_prefix])
 # create a Coverage object, telling it what to ignore
 coverage = trace.Coverage(ignore)
 # run the new command using the given trace
 trace.run(coverage.trace, 'main()')
 # make a report, telling it where you want output
 t = trace.create_results_log(coverage.results(),
 '/usr/local/Automatrix/concerts/coverage')
 show_missing = 1)
 The Trace class can be instantited instead of the Coverage class if
 runtime display of executable lines is desired instead of statement
 converage measurement.
"""
import sys, os, string, marshal, tempfile, copy, operator
def usage(outfile):
 outfile.write("""Usage: %s [OPTIONS] <file> [ARGS]
Execution:
 --help Display this help then exit.
 --version Output version information then exit.
 -t,--trace Print the line to be executed to sys.stdout.
 -c,--count Count the number of times a line is executed.
 Results are written in the results file, if given.
 -r,--report Generate a report from a results file; do not
 execute any code.
 (One of `-t', `-c' or `-r' must be specified)
 -s,--summary Generate a brief summary for each file. (Can only
 be used with -c or -r.)
I/O:
 -f,--file= File name for accumulating results over several runs.
 (No file name means do not archive results)
 -d,--logdir= Directory to use when writing annotated log files.
 Log files are the module __name__ with `.` replaced
 by os.sep and with '.pyl' added.
 -m,--missing Annotate all executable lines which were not executed
 with a '>>>>>> '.
 -R,--no-report Do not generate the annotated reports. Useful if
 you want to accumulate several over tests.
 -C,--coverdir= Generate .cover files in this directory
Selection: Do not trace or log lines from ...
 --ignore-module=[string] modules with the given __name__, and submodules
 of that module
 --ignore-dir=[string] files in the stated directory (multiple
 directories can be joined by os.pathsep)
 The selection options can be listed multiple times to ignore different
modules.
""" % sys.argv[0])
class Ignore:
 def __init__(self, modules = None, dirs = None):
 self._mods = modules or []
 self._dirs = dirs or []
 self._ignore = { '<string>': 1 }
 def names(self, filename, modulename):
 if self._ignore.has_key(modulename):
 return self._ignore[modulename]
 # haven't seen this one before, so see if the module name is
 # on the ignore list. Need to take some care since ignoring
 # "cmp" musn't mean ignoring "cmpcache" but ignoring
 # "Spam" must also mean ignoring "Spam.Eggs".
 for mod in self._mods:
 if mod == modulename: # Identical names, so ignore
 self._ignore[modulename] = 1
 return 1
 # check if the module is a proper submodule of something on
 # the ignore list
 n = len(mod)
 # (will not overflow since if the first n characters are the
 # same and the name has not already occured, then the size
 # of "name" is greater than that of "mod")
 if mod == modulename[:n] and modulename[n] == '.':
 self._ignore[modulename] = 1
 return 1
 # Now check that __file__ isn't in one of the directories
 if filename is None:
 # must be a built-in, so we must ignore
 self._ignore[modulename] = 1
 return 1
 # Ignore a file when it contains one of the ignorable paths
 for d in self._dirs:
 # The '+ os.sep' is to ensure that d is a parent directory,
 # as compared to cases like:
 # d = "/usr/local"
 # filename = "/usr/local.py"
 # or
 # d = "/usr/local.py"
 # filename = "/usr/local.py"
 if string.find(filename, d + os.sep) == 0:
 self._ignore[modulename] = 1
 return 1
 # Tried the different ways, so we don't ignore this module
 self._ignore[modulename] = 0
 return 0
def run(trace, cmd):
 import __main__
 dict = __main__.__dict__
 sys.settrace(trace)
 try:
 exec cmd in dict, dict
 finally:
 sys.settrace(None)
def runctx(trace, cmd, globals=None, locals=None):
 if globals is None: globals = {}
 if locals is None: locals = {}
 sys.settrace(trace)
 try:
 exec cmd in dict, dict
 finally:
 sys.settrace(None)
def runfunc(trace, func, *args, **kw):
 result = None
 sys.settrace(trace)
 try:
 result = apply(func, args, kw)
 finally:
 sys.settrace(None)
 return result
class CoverageResults:
 def __init__(self, counts = {}, modules = {}):
 self.counts = counts.copy() # map (filename, lineno) to count
 self.modules = modules.copy() # map filenames to modules
 def update(self, other):
 """Merge in the data from another CoverageResults"""
 counts = self.counts
 other_counts = other.counts
 modules = self.modules
 other_modules = other.modules
 for key in other_counts.keys():
 counts[key] = counts.get(key, 0) + other_counts[key]
 for key in other_modules.keys():
 if modules.has_key(key):
 # make sure they point to the same file
 assert modules[key] == other_modules[key], \
 "Strange! filename %s has two different module " \
 "names: %s and %s" % \
 (key, modules[key], other_modules[key])
 else:
 modules[key] = other_modules[key]
# Given a code string, return the SET_LINENO information
def _find_LINENO_from_string(co_code):
 """return all of the SET_LINENO information from a code string"""
 import dis
 linenos = {}
 # This code was filched from the `dis' module then modified
 n = len(co_code)
 i = 0
 prev_op = None
 prev_lineno = 0
 while i < n:
 c = co_code[i]
 op = ord(c)
 if op == dis.SET_LINENO:
 if prev_op == op:
 # two SET_LINENO in a row, so the previous didn't
 # indicate anything. This occurs with triple
 # quoted strings (?). Remove the old one.
 del linenos[prev_lineno]
 prev_lineno = ord(co_code[i+1]) + ord(co_code[i+2])*256
 linenos[prev_lineno] = 1
 if op >= dis.HAVE_ARGUMENT:
 i = i + 3
 else:
 i = i + 1
 prev_op = op
 return linenos
def _find_LINENO(code):
 """return all of the SET_LINENO information from a code object"""
 import types
 # get all of the lineno information from the code of this scope level
 linenos = _find_LINENO_from_string(code.co_code)
 # and check the constants for references to other code objects
 for c in code.co_consts:
 if type(c) == types.CodeType:
 # find another code object, so recurse into it
 linenos.update(_find_LINENO(c))
 return linenos
def find_executable_linenos(filename):
 """return a dict of the line numbers from executable statements in a file
 Works by finding all of the code-like objects in the module then searching
 the byte code for 'SET_LINENO' terms (so this won't work one -O files).
 """
 import parser
 assert filename.endswith('.py')
 prog = open(filename).read()
 ast = parser.suite(prog)
 code = parser.compileast(ast, filename)
 # The only way I know to find line numbers is to look for the
 # SET_LINENO instructions. Isn't there some way to get it from
 # the AST?
 return _find_LINENO(code)
### XXX because os.path.commonprefix seems broken by my way of thinking...
def commonprefix(dirs):
 "Given a list of pathnames, returns the longest common leading component"
 if not dirs: return ''
 n = copy.copy(dirs)
 for i in range(len(n)):
 n[i] = n[i].split(os.sep)
 prefix = n[0]
 for item in n:
 for i in range(len(prefix)):
 if prefix[:i+1] <> item[:i+1]:
 prefix = prefix[:i]
 if i == 0: return ''
 break
 return os.sep.join(prefix)
def create_results_log(results, dirname = ".", show_missing = 1,
 save_counts = 0, summary = 0, coverdir = None):
 import re
 # turn the counts data ("(filename, lineno) = count") into something
 # accessible on a per-file basis
 per_file = {}
 for filename, lineno in results.counts.keys():
 lines_hit = per_file[filename] = per_file.get(filename, {})
 lines_hit[lineno] = results.counts[(filename, lineno)]
 # try and merge existing counts and modules file from dirname
 try:
 counts = marshal.load(open(os.path.join(dirname, "counts")))
 modules = marshal.load(open(os.path.join(dirname, "modules")))
 results.update(results.__class__(counts, modules))
 except IOError:
 pass
 # there are many places where this is insufficient, like a blank
 # line embedded in a multiline string.
 blank = re.compile(r'^\s*(#.*)?$')
 # accumulate summary info, if needed
 sums = {}
 # generate file paths for the coverage files we are going to write...
 fnlist = []
 tfdir = tempfile.gettempdir()
 for key in per_file.keys():
 filename = key
 # skip some "files" we don't care about...
 if filename == "<string>":
 continue
 # are these caused by code compiled using exec or something?
 if filename.startswith(tfdir):
 continue
 modulename = os.path.split(results.modules[key])[1]
 if filename.endswith(".pyc") or filename.endswith(".pyo"):
 filename = filename[:-1]
 if coverdir:
 listfilename = os.path.join(coverdir, modulename + ".cover")
 else:
 # XXX this is almost certainly not portable!!!
 fndir = os.path.dirname(filename)
 if os.path.isabs(filename):
 coverpath = fndir
 else:
 coverpath = os.path.join(dirname, fndir)
 # build list file name by appending a ".cover" to the module name
 # and sticking it into the specified directory
 if "." in modulename:
 # A module in a package
 finalname = modulename.split(".")[-1]
 listfilename = os.path.join(coverpath, finalname + ".cover")
 else:
 listfilename = os.path.join(coverpath, modulename + ".cover")
 # Get the original lines from the .py file
 try:
 lines = open(filename, 'r').readlines()
 except IOError, err:
 print >> sys.stderr, "trace: Could not open %s for reading " \
 "because: %s - skipping" % (`filename`, err.strerror)
 continue
 try:
 outfile = open(listfilename, 'w')
 except IOError, err:
 sys.stderr.write(
 '%s: Could not open %s for writing because: %s" \
 "- skipping\n' % ("trace", `listfilename`, err.strerror))
 continue
 # If desired, get a list of the line numbers which represent
 # executable content (returned as a dict for better lookup speed)
 if show_missing:
 executable_linenos = find_executable_linenos(filename)
 else:
 executable_linenos = {}
 n_lines = 0
 n_hits = 0
 lines_hit = per_file[key]
 for i in range(len(lines)):
 line = lines[i]
 # do the blank/comment match to try to mark more lines
 # (help the reader find stuff that hasn't been covered)
 if lines_hit.has_key(i+1):
 # count precedes the lines that we captured
 outfile.write('%5d: ' % lines_hit[i+1])
 n_hits = n_hits + 1
 n_lines = n_lines + 1
 elif blank.match(line):
 # blank lines and comments are preceded by dots
 outfile.write(' . ')
 else:
 # lines preceded by no marks weren't hit
 # Highlight them if so indicated, unless the line contains
 # '#pragma: NO COVER' (it is possible to embed this into
 # the text as a non-comment; no easy fix)
 if executable_linenos.has_key(i+1) and \
 string.find(lines[i],
 string.join(['#pragma', 'NO COVER'])) == -1:
 outfile.write('>>>>>> ')
 else:
 outfile.write(' '*7)
 n_lines = n_lines + 1
 outfile.write(string.expandtabs(lines[i], 8))
 outfile.close()
 if summary and n_lines:
 percent = int(100 * n_hits / n_lines)
 sums[modulename] = n_lines, percent, modulename, filename
 if save_counts:
 # try and store counts and module info into dirname
 try:
 marshal.dump(results.counts,
 open(os.path.join(dirname, "counts"), "w"))
 marshal.dump(results.modules,
 open(os.path.join(dirname, "modules"), "w"))
 except IOError, err:
 sys.stderr.write("cannot save counts/modules " \
 "files because %s" % err.strerror)
 if summary and sums:
 mods = sums.keys()
 mods.sort()
 print "lines cov% module (path)"
 for m in mods:
 n_lines, percent, modulename, filename = sums[m]
 print "%5d %3d%% %s (%s)" % sums[m]
# There is a lot of code shared between these two classes even though
# it is straightforward to make a super class to share code. However,
# for performance reasons (remember, this is called at every step) I
# wanted to keep everything to a single function call. Also, by
# staying within a single scope, I don't have to temporarily nullify
# sys.settrace, which would slow things down even more.
class Coverage:
 def __init__(self, ignore = Ignore()):
 self.ignore = ignore
 self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
 self.counts = {} # keys are (filename, linenumber)
 self.modules = {} # maps filename -> module name
 def trace(self, frame, why, arg):
 if why == 'line':
 # something is fishy about getting the file name
 filename = frame.f_globals.get("__file__", None)
 if filename is None:
 filename = frame.f_code.co_filename
 try:
 modulename = frame.f_globals["__name__"]
 except KeyError:
 # PyRun_String() for example
 # XXX what to do?
 modulename = None
 # We do this next block to keep from having to make methods
 # calls, which also requires resetting the trace
 ignore_it = self.ignore_names.get(modulename, -1)
 if ignore_it == -1: # unknown filename
 sys.settrace(None)
 ignore_it = self.ignore.names(filename, modulename)
 sys.settrace(self.trace)
 # record the module name for every file
 self.modules[filename] = modulename
 if not ignore_it:
 lineno = frame.f_lineno
 # record the file name and line number of every trace
 key = (filename, lineno)
 self.counts[key] = self.counts.get(key, 0) + 1
 return self.trace
 def results(self):
 return CoverageResults(self.counts, self.modules)
class Trace:
 def __init__(self, ignore = Ignore()):
 self.ignore = ignore
 self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
 self.files = {'<string>': None} # stores lines from the .py file,
 # or None
 def trace(self, frame, why, arg):
 if why == 'line':
 filename = frame.f_code.co_filename
 try:
 modulename = frame.f_globals["__name__"]
 except KeyError:
 # PyRun_String() for example
 # XXX what to do?
 modulename = None
 # We do this next block to keep from having to make methods
 # calls, which also requires resetting the trace
 ignore_it = self.ignore_names.get(modulename, -1)
 if ignore_it == -1: # unknown filename
 sys.settrace(None)
 ignore_it = self.ignore.names(filename, modulename)
 sys.settrace(self.trace)
 if not ignore_it:
 lineno = frame.f_lineno
 files = self.files
 if filename != '<string>' and not files.has_key(filename):
 files[filename] = map(string.rstrip,
 open(filename).readlines())
 # If you want to see filenames (the original behaviour), try:
 # modulename = filename
 # or, prettier but confusing when several files have the
 # same name
 # modulename = os.path.basename(filename)
 if files[filename] != None:
 print '%s(%d): %s' % (os.path.basename(filename), lineno,
 files[filename][lineno-1])
 else:
 print '%s(%d): ??' % (modulename, lineno)
 return self.trace
def _err_exit(msg):
 print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
 sys.exit(1)
def main(argv = None):
 import getopt
 if argv is None:
 argv = sys.argv
 try:
 opts, prog_argv = getopt.getopt(argv[1:], "tcrRf:d:msC:",
 ["help", "version", "trace", "count",
 "report", "no-report",
 "file=", "logdir=", "missing",
 "ignore-module=", "ignore-dir=",
 "coverdir="])
 except getopt.error, msg:
 print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
 print >> sys.stderr, "Try `%s --help' for more information" \
 % sys.argv[0]
 sys.exit(1)
 trace = 0
 count = 0
 report = 0
 no_report = 0
 counts_file = None
 logdir = "."
 missing = 0
 ignore_modules = []
 ignore_dirs = []
 coverdir = None
 summary = 0
 for opt, val in opts:
 if opt == "--help":
 usage(sys.stdout)
 sys.exit(0)
 if opt == "--version":
 sys.stdout.write("trace 2.0\n")
 sys.exit(0)
 if opt == "-t" or opt == "--trace":
 trace = 1
 continue
 if opt == "-c" or opt == "--count":
 count = 1
 continue
 if opt == "-r" or opt == "--report":
 report = 1
 continue
 if opt == "-R" or opt == "--no-report":
 no_report = 1
 continue
 if opt == "-f" or opt == "--file":
 counts_file = val
 continue
 if opt == "-d" or opt == "--logdir":
 logdir = val
 continue
 if opt == "-m" or opt == "--missing":
 missing = 1
 continue
 if opt == "-C" or opt == "--coverdir":
 coverdir = val
 continue
 if opt == "-s" or opt == "--summary":
 summary = 1
 continue
 if opt == "--ignore-module":
 ignore_modules.append(val)
 continue
 if opt == "--ignore-dir":
 for s in string.split(val, os.pathsep):
 s = os.path.expandvars(s)
 # should I also call expanduser? (after all, could use $HOME)
 s = string.replace(s, "$prefix",
 os.path.join(sys.prefix, "lib",
 "python" + sys.version[:3]))
 s = string.replace(s, "$exec_prefix",
 os.path.join(sys.exec_prefix, "lib",
 "python" + sys.version[:3]))
 s = os.path.normpath(s)
 ignore_dirs.append(s)
 continue
 assert 0, "Should never get here"
 if len(prog_argv) == 0:
 _err_exit("missing name of file to run")
 if count + trace + report > 1:
 _err_exit("can only specify one of --trace, --count or --report")
 if count + trace + report == 0:
 _err_exit("must specify one of --trace, --count or --report")
 if report and counts_file is None:
 _err_exit("--report requires a --file")
 if report and no_report:
 _err_exit("cannot specify both --report and --no-report")
 if logdir is not None:
 # warn if the directory doesn't exist, but keep on going
 # (is this the correct behaviour?)
 if not os.path.isdir(logdir):
 sys.stderr.write(
 "trace: WARNING, --logdir directory %s is not available\n" %
 `logdir`)
 sys.argv = prog_argv
 progname = prog_argv[0]
 if eval(sys.version[:3])>1.3:
 sys.path[0] = os.path.split(progname)[0] # ???
 # everything is ready
 ignore = Ignore(ignore_modules, ignore_dirs)
 if trace:
 t = Trace(ignore)
 try:
 run(t.trace, 'execfile(' + `progname` + ')')
 except IOError, err:
 _err_exit("Cannot run file %s because: %s" % \
 (`sys.argv[0]`, err.strerror))
 elif count:
 t = Coverage(ignore)
 try:
 run(t.trace, 'execfile(' + `progname` + ')')
 except IOError, err:
 _err_exit("Cannot run file %s because: %s" % \
 (`sys.argv[0]`, err.strerror))
 except SystemExit:
 pass
 results = t.results()
 # Add another lookup from the program's file name to its import name
 # This give the right results, but I'm not sure why ...
 results.modules[progname] = os.path.splitext(progname)[0]
 if counts_file:
 # add in archived data, if available
 try:
 old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
 except IOError:
 pass
 else:
 results.update(CoverageResults(old_counts, old_modules))
 if not no_report:
 create_results_log(results, logdir, missing,
 summary=summary, coverdir=coverdir)
 if counts_file:
 try:
 marshal.dump( (results.counts, results.modules),
 open(counts_file, 'wb'))
 except IOError, err:
 _err_exit("Cannot save counts file %s because: %s" % \
 (`counts_file`, err.strerror))
 elif report:
 old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
 results = CoverageResults(old_counts, old_modules)
 create_results_log(results, logdir, missing,
 summary=summary, coverdir=coverdir)
 else:
 assert 0, "Should never get here"
if __name__=='__main__':
 main()
--- NEW FILE: treesync.py ---
#! /usr/bin/env python
"""Script to synchronize two source trees.
Invoke with two arguments:
python treesync.py slave master
The assumption is that "master" contains CVS administration while
slave doesn't. All files in the slave tree that have a CVS/Entries
entry in the master tree are synchronized. This means:
 If the files differ:
 if the slave file is newer:
 normalize the slave file
 if the files still differ:
 copy the slave to the master
 else (the master is newer):
 copy the master to the slave
 normalizing the slave means replacing CRLF with LF when the master
 doesn't use CRLF
"""
import os, sys, stat, string, getopt
# Interactivity options
default_answer = "ask"
create_files = "yes"
create_directories = "no"
write_slave = "ask"
write_master = "ask"
def main():
 global always_no, always_yes
 global create_directories, write_master, write_slave
 opts, args = getopt.getopt(sys.argv[1:], "nym:s:d:f:a:")
 for o, a in opts:
 if o == '-y':
 default_answer = "yes"
 if o == '-n':
 default_answer = "no"
 if o == '-s':
 write_slave = a
 if o == '-m':
 write_master = a
 if o == '-d':
 create_directories = a
 if o == '-f':
 create_files = a
 if o == '-a':
 create_files = create_directories = write_slave = write_master = a
 try:
 [slave, master] = args
 except ValueError:
 print "usage: python", sys.argv[0] or "treesync.py",
 print "[-n] [-y] [-m y|n|a] [-s y|n|a] [-d y|n|a] [-f n|y|a]",
 print "slavedir masterdir"
 return
 process(slave, master)
def process(slave, master):
 cvsdir = os.path.join(master, "CVS")
 if not os.path.isdir(cvsdir):
 print "skipping master subdirectory", master
 print "-- not under CVS"
 return
 print "-"*40
 print "slave ", slave
 print "master", master
 if not os.path.isdir(slave):
 if not okay("create slave directory %s?" % slave,
 answer=create_directories):
 print "skipping master subdirectory", master
 print "-- no corresponding slave", slave
 return
 print "creating slave directory", slave
 try:
 os.mkdir(slave)
 except os.error, msg:
 print "can't make slave directory", slave, ":", msg
 return
 else:
 print "made slave directory", slave
 cvsdir = None
 subdirs = []
 names = os.listdir(master)
 for name in names:
 mastername = os.path.join(master, name)
 slavename = os.path.join(slave, name)
 if name == "CVS":
 cvsdir = mastername
 else:
 if os.path.isdir(mastername) and not os.path.islink(mastername):
 subdirs.append((slavename, mastername))
 if cvsdir:
 entries = os.path.join(cvsdir, "Entries")
 for e in open(entries).readlines():
 words = string.split(e, '/')
 if words[0] == '' and words[1:]:
 name = words[1]
 s = os.path.join(slave, name)
 m = os.path.join(master, name)
 compare(s, m)
 for (s, m) in subdirs:
 process(s, m)
def compare(slave, master):
 try:
 sf = open(slave, 'r')
 except IOError:
 sf = None
 try:
 mf = open(master, 'rb')
 except IOError:
 mf = None
 if not sf:
 if not mf:
 print "Neither master nor slave exists", master
 return
 print "Creating missing slave", slave
 copy(master, slave, answer=create_files)
 return
 if not mf:
 print "Not updating missing master", master
 return
 if sf and mf:
 if identical(sf, mf):
 return
 sft = mtime(sf)
 mft = mtime(mf)
 if mft > sft:
 # Master is newer -- copy master to slave
 sf.close()
 mf.close()
 print "Master ", master
 print "is newer than slave", slave
 copy(master, slave, answer=write_slave)
 return
 # Slave is newer -- copy slave to master
 print "Slave is", sft-mft, "seconds newer than master"
 # But first check what to do about CRLF
 mf.seek(0)
 fun = funnychars(mf)
 mf.close()
 sf.close()
 if fun:
 print "***UPDATING MASTER (BINARY COPY)***"
 copy(slave, master, "rb", answer=write_master)
 else:
 print "***UPDATING MASTER***"
 copy(slave, master, "r", answer=write_master)
BUFSIZE = 16*1024
def identical(sf, mf):
 while 1:
 sd = sf.read(BUFSIZE)
 md = mf.read(BUFSIZE)
 if sd != md: return 0
 if not sd: break
 return 1
def mtime(f):
 st = os.fstat(f.fileno())
 return st[stat.ST_MTIME]
def funnychars(f):
 while 1:
 buf = f.read(BUFSIZE)
 if not buf: break
 if '\r' in buf or '0円' in buf: return 1
 return 0
def copy(src, dst, rmode="rb", wmode="wb", answer='ask'):
 print "copying", src
 print " to", dst
 if not okay("okay to copy? ", answer):
 return
 f = open(src, rmode)
 g = open(dst, wmode)
 while 1:
 buf = f.read(BUFSIZE)
 if not buf: break
 g.write(buf)
 f.close()
 g.close()
def okay(prompt, answer='ask'):
 answer = string.lower(string.strip(answer))
 if not answer or answer[0] not in 'ny':
 answer = raw_input(prompt)
 answer = string.lower(string.strip(answer))
 if not answer:
 answer = default_answer
 if answer[:1] == 'y':
 return 1
 if answer[:1] == 'n':
 return 0
 print "Yes or No please -- try again:"
 return okay(prompt)
main()
--- NEW FILE: untabify.py ---
#! /usr/bin/env python
"Replace tabs with spaces in argument files. Print names of changed files."
import os
import sys
import string
import getopt
def main():
 tabsize = 8
 try:
 opts, args = getopt.getopt(sys.argv[1:], "t:")
 if not args:
 raise getopt.error, "At least one file argument required"
 except getopt.error, msg:
 print msg
 print "usage:", sys.argv[0], "[-t tabwidth] file ..."
 return
 for optname, optvalue in opts:
 if optname == '-t':
 tabsize = int(optvalue)
 for file in args:
 process(file, tabsize)
def process(file, tabsize):
 try:
 f = open(file)
 text = f.read()
 f.close()
 except IOError, msg:
 print "%s: I/O error: %s" % (`file`, str(msg))
 return
 newtext = string.expandtabs(text, tabsize)
 if newtext == text:
 return
 backup = file + "~"
 try:
 os.unlink(backup)
 except os.error:
 pass
 try:
 os.rename(file, backup)
 except os.error:
 pass
 f = open(file, "w")
 f.write(newtext)
 f.close()
 print file
if __name__ == '__main__':
 main()
--- NEW FILE: which.py ---
#! /usr/bin/env python
# Variant of "which".
# On stderr, near and total misses are reported.
# '-l<flags>' argument adds ls -l<flags> of each file found.
import sys
if sys.path[0] in (".", ""): del sys.path[0]
import sys, os, string
from stat import *
def msg(str):
 sys.stderr.write(str + '\n')
pathlist = string.splitfields(os.environ['PATH'], ':')
sts = 0
longlist = ''
if sys.argv[1:] and sys.argv[1][:2] == '-l':
 longlist = sys.argv[1]
 del sys.argv[1]
for prog in sys.argv[1:]:
 ident = ()
 for dir in pathlist:
 file = os.path.join(dir, prog)
 try:
 st = os.stat(file)
 except os.error:
 continue
 if not S_ISREG(st[ST_MODE]):
 msg(file + ': not a disk file')
 else:
 mode = S_IMODE(st[ST_MODE])
 if mode & 0111:
 if not ident:
 print file
 ident = st[:3]
 else:
 if st[:3] == ident:
 s = 'same as: '
 else:
 s = 'also: '
 msg(s + file)
 else:
 msg(file + ': not executable')
 if longlist:
 sts = os.system('ls ' + longlist + ' ' + file)
 if sts: msg('"ls -l" exit status: ' + `sts`)
 if not ident:
 msg(prog + ': not found')
 sts = 1
sys.exit(sts)
--- NEW FILE: xxci.py ---
#! /usr/bin/env python
# xxci
#
# check in files for which rcsdiff returns nonzero exit status
import sys
import os
from stat import *
import commands
import fnmatch
import string
EXECMAGIC = '001円140円000円010円'
MAXSIZE = 200*1024 # Files this big must be binaries and are skipped.
def getargs():
 args = sys.argv[1:]
 if args:
 return args
 print 'No arguments, checking almost *, in "ls -t" order'
 list = []
 for file in os.listdir(os.curdir):
 if not skipfile(file):
 list.append((getmtime(file), file))
 list.sort()
 if not list:
 print 'Nothing to do -- exit 1'
 sys.exit(1)
 list.sort()
 list.reverse()
 for mtime, file in list: args.append(file)
 return args
def getmtime(file):
 try:
 st = os.stat(file)
 return st[ST_MTIME]
 except os.error:
 return -1
badnames = ['tags', 'TAGS', 'xyzzy', 'nohup.out', 'core']
badprefixes = ['.', ',', '@', '#', 'o.']
badsuffixes = \
 ['~', '.a', '.o', '.old', '.bak', '.orig', '.new', '.prev', '.not', \
 '.pyc', '.fdc', '.rgb', '.elc', ',v']
ignore = []
def setup():
 ignore[:] = badnames
 for p in badprefixes:
 ignore.append(p + '*')
 for p in badsuffixes:
 ignore.append('*' + p)
 try:
 f = open('.xxcign', 'r')
 except IOError:
 return
 ignore[:] = ignore + string.split(f.read())
def skipfile(file):
 for p in ignore:
 if fnmatch.fnmatch(file, p): return 1
 try:
 st = os.lstat(file)
 except os.error:
 return 1 # Doesn't exist -- skip it
 # Skip non-plain files.
 if not S_ISREG(st[ST_MODE]): return 1
 # Skip huge files -- probably binaries.
 if st[ST_SIZE] >= MAXSIZE: return 1
 # Skip executables
 try:
 data = open(file, 'r').read(len(EXECMAGIC))
 if data == EXECMAGIC: return 1
 except:
 pass
 return 0
def badprefix(file):
 for bad in badprefixes:
 if file[:len(bad)] == bad: return 1
 return 0
def badsuffix(file):
 for bad in badsuffixes:
 if file[-len(bad):] == bad: return 1
 return 0
def go(args):
 for file in args:
 print file + ':'
 if differing(file):
 showdiffs(file)
 if askyesno('Check in ' + file + ' ? '):
 sts = os.system('rcs -l ' + file) # ignored
 sts = os.system('ci -l ' + file)
def differing(file):
 cmd = 'co -p ' + file + ' 2>/dev/null | cmp -s - ' + file
 sts = os.system(cmd)
 return sts != 0
def showdiffs(file):
 cmd = 'rcsdiff ' + file + ' 2>&1 | ${PAGER-more}'
 sts = os.system(cmd)
def askyesno(prompt):
 s = raw_input(prompt)
 return s in ['y', 'yes']
try:
 setup()
 go(getargs())
except KeyboardInterrupt:
 print '[Intr]'