CSV.py
#
# CSV 0.17 8 June 1999 Copyright ゥLaurence Tratt 1998 - 1999
# e-mail: tratt@dcs.kcl.ac.uk
# home-page: http://eh.org/~laurie/comp/python/csv/index.html
#
#
#
# CSV.py is copyright ゥ1998 - 1999 by Laurence Tratt
#
# All rights reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted, provided that
# the above copyright notice appear in all copies and that both that copyright
# notice and this permission notice appear in supporting documentation.
#
# THE AUTHOR - LAURENCE TRATT - DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
# NO EVENT SHALL THE AUTHOR FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR
# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
import re, string, types, UserList
###################################################################################################
#
# CSV class
#
class CSV(UserList.UserList):
""" Manage a CSV (comma separated values) file
The data is held in a list.
Methods:
__init__()
load() load from file
save() save to file
input() input from string
output() save to string
append() appends one entry
__str__() printable represenation
"""
def __init__(self, separator = ','):
""" Initialise CVS class instance.
Arguments:
separator : The field delimiter. Defaults to ','
"""
self.separator = separator
self.data = []
self.fields__title__have = self.fields__title = None
def load(self, file__data__name, fields__title__have, convert_numbers = 0, separator = None, comments = None):
""" Load up a CSV file
Arguments:
file__data__name : The name of the CSV file
fields__title__have : 0 : file has no title fields
otherwise : file has title fields
convert_numbers : 0 : store everything as string's
otherwise : store fields that can be converted
to ints or floats to that Python
type defaults to 0
separator : The field delimiter (optional)
comments : A list of strings and regular expressions to remove comments
"""
file__data = open(file__data__name, 'r')
self.input(file__data.read(-1), fields__title__have, convert_numbers, separator or self.separator, comments or ["#"])
file__data.close()
def save(self, file__data__name, separator = None):
""" Save data to CSV file.
Arguments:
file__data__name : The name of the CSV file to save to
separator : The field delimiter (optional)
"""
file__data = open(file__data__name, 'w')
file__data.write(self.output(separator or self.separator))
file__data.close()
def line__process(self, line, convert_numbers, separator):
fields = []
line__pos = 0
while line__pos < len(line):
# Skip any space at the beginning of the field (if there should be leading space,
# there should be a " character in the CSV file)
while line__pos < len(line) and line[line__pos] == " ":
line__pos = line__pos + 1
field = ""
quotes__level = 0
while line__pos < len(line):
# Skip space at the end of a field (if there is trailing space, it should be
# encompassed by speech marks)
if quotes__level == 0 and line[line__pos] == " ":
line__pos__temp = line__pos
while line__pos__temp < len(line) and line[line__pos__temp] == " ":
line__pos__temp = line__pos__temp + 1
if line__pos__temp >= len(line):
break
elif line[line__pos__temp : line__pos__temp + len(separator)] == separator:
line__pos = line__pos__temp
if quotes__level == 0 and line[line__pos : line__pos + len(separator)] == separator:
break
elif line[line__pos] == "\"":
if quotes__level == 0:
quotes__level = 1
else:
quotes__level = 0
else:
field = field + line[line__pos]
line__pos = line__pos + 1
line__pos = line__pos + len(separator)
if convert_numbers:
for char in field:
if char not in "0123456789.-":
fields.append(field)
break
else:
try:
if "." not in field:
fields.append(int(field))
else:
fields.append(float(field))
except:
fields.append(field)
else:
fields.append(field)
if line[-len(separator)] == separator:
fields.append(field)
return fields
def input(self, data, fields__title__have, convert_numbers = 0, separator = None, comments = None):
""" Take wodge of CSV data & convert it into internal format.
Arguments:
data : A string containing the CSV data
fields__title__have : 0 : file has no title fields
otherwise : file has title fields
convert_numbers : 0 : store everything as string's
otherwise : store fields that can be
converted to ints or
floats to that Python type
defaults to 0
separator : The field delimiter (Optional)
comments : A list of strings and regular expressions to remove comments
(defaults to ["#"])
"""
separator = separator or self.separator
comments = comments or ["#"]
self.fields__title__have = fields__title__have
# Remove comments from the input file
comments__strings = []
for comment in comments:
if type(comment) == types.InstanceType:
data = comment.sub("", data)
elif type(comment) == types.StringType:
comments__strings.append(comment)
else:
raise Exception("Invalid comment type '" + comment + "'")
# Change made by Aq to handle long lines split by backslashes
lines_unjoined = map(string.strip, string.split(data, "\n"))
lines = []
thisline = ''
for l in lines_unjoined:
if l[-1:] == '\\':
thisline = thisline + l[:-1] + '\n'
else:
thisline = thisline + l
lines.append(thisline)
thisline = ''
# Remove all comments that are of type string
lines__pos = 0
while lines__pos < len(lines):
line = lines[lines__pos]
line__pos = 0
while line__pos < len(line) and line[line__pos] == " ":
line__pos = line__pos + 1
found_comment = 0
for comment in comments__strings:
if line__pos + len(comment) < len(line) and line[line__pos : line__pos + len(comment)] == comment:
found_comment = 1
break
if found_comment:
del lines[lines__pos]
else:
lines__pos = lines__pos + 1
# Process the input data
if fields__title__have:
self.fields__title = self.line__process(lines[0], convert_numbers, separator)
pos__start = 1
else:
self.fields__title = []
pos__start = 0
self.data = []
for line in lines[pos__start : ]:
if line != "":
self.data.append(Entry(self.line__process(line, convert_numbers, separator), self.fields__title))
def output(self, separator = None):
""" Convert internal data into CSV string.
Arguments:
separator : The field delimiter (optional)
Returns:
String containing CSV data
"""
separator = separator or self.separator
def line__make(entry, separator = separator):
str = ""
done__any = 0
for field in entry:
if done__any:
str = str + separator
else:
done__any = 1
if type(field) != types.StringType:
field = `field`
if len(field) > 0 and (string.find(field, separator) != -1 or string.find(field,'\n') != -1 or (field[0] == " " or field[-1] == " ")):
str = str + "\"" + string.replace(field,'\n','\\\n') + "\""
else:
str = str + field
return str
if self.fields__title__have:
str = line__make(self.fields__title) + "\n\n"
else:
str = ""
str = str + string.join(map(line__make, self.data), "\n") + "\n"
return str
def append(self, entry):
""" Add an entry. """
if self.fields__title:
entry.fields__title = self.fields__title
self.data.append(entry)
def field__append(self, func, field__title = None):
""" Append a field with values specified by a function
Arguments:
func : Function to be called func(entry) to get the value of the new field
field__title : Name of new field (if applicable)
"""
for data__pos in range(len(self)):
entry = self.data[data__pos]
entry.append(func(entry))
self.data[data__pos] = entry
if self.fields__title__have:
self.fields__title.append(field__title)
def duplicates__eliminate(self):
""" Eliminate duplicates (this may result in a reordering of the entries) """
# To eliminate duplicates, we first get Python to sort the list for us; then all we have to
# do is to check to see whether consecutive elements are the same, and delete them
# This give us O(<sort>) * O(n) rather than the more obvious O(n * n) speed algorithm
# XXX Could be done more efficiently for multiplicate duplicates by deleting a slice of
# similar elements rather than deleting them individually
self.sort()
data__pos = 1
entry__last = self.data[0]
while data__pos < len(self.data):
if self.data[data__pos] == entry__last:
del self.data[data__pos]
else:
entry__last = self.data[data__pos]
data__pos = data__pos + 1
def __str__(self):
""" Construct a printable representation of the internal data. """
columns__width = []
# Work out the maximum width of each column
for column in range(len(self.data[0])):
if self.fields__title__have:
width = len(`self.fields__title[column]`)
else:
width = 0
for entry in self:
width__possible = len(`entry.data[column]`)
if width__possible > width:
width = width__possible
columns__width.append(width)
if self.fields__title__have:
str = string.join(map(string.ljust, self.fields__title, columns__width), " ") + "\n\n"
else:
str = ""
for entry in self:
str = str + string.join(map(string.ljust, map(lambda a : (type(a) == types.StringType and [a] or [eval("`a`")])[0], entry.data), columns__width), " ") + "\n"
return str
###################################################################################################
#
# CSV data entry class
#
#
class Entry(UserList.UserList):
""" CSV data entry, UserList subclass.
Has the same properties as a list, but has a few dictionary
like properties for easy access of fields if they have titles.
Methods(Override):
__init__
__getitem__
__setitem__
__delitem__
"""
def __init__(self, fields, fields__title = None):
""" Initialise with fields data and field title.
Arguments:
fields : a list containing the data for each field
of this entry
fields__title : a list with the titles of each field
(an empty list means there are no titles)
"""
self.data = fields
if fields__title != None:
self.fields__title = fields__title
else:
self.fields__title = []
def __getitem__(self, x):
if type(x) == types.IntType:
return self.data[x]
else:
return self.data[self.fields__title.index(x)]
def __setitem__(self, x, item):
if type(x) == types.IntType:
self.data[x] = item
else:
self.data[self.fields__title.index(x)] = item
def __delitem__(self, x):
if type(x) == types.IntType:
del self.data[x]
else:
del self.data[self.fields__title.index(x)]
def __str__(self):
return `self.data`