CSV.py

#
# CSV 0.17 8 June 1999 Copyright ゥLaurence Tratt 1998 - 1999
# e-mail: tratt@dcs.kcl.ac.uk
# home-page: http://eh.org/~laurie/comp/python/csv/index.html
#
#
#
# CSV.py is copyright ゥ1998 - 1999 by Laurence Tratt
#
# All rights reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted, provided that
# the above copyright notice appear in all copies and that both that copyright
# notice and this permission notice appear in supporting documentation.
#
# THE AUTHOR - LAURENCE TRATT - DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
# NO EVENT SHALL THE AUTHOR FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR
# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
import re, string, types, UserList
###################################################################################################
#
# CSV class
#
class CSV(UserList.UserList):
 """ Manage a CSV (comma separated values) file
 The data is held in a list.
 
 Methods:
 __init__()
 load() load from file
 save() save to file
 input() input from string
 output() save to string
 append() appends one entry
 __str__() printable represenation
 """
 def __init__(self, separator = ','):
 """ Initialise CVS class instance.
 Arguments:
 separator : The field delimiter. Defaults to ','
 """
 self.separator = separator
 self.data = []
 self.fields__title__have = self.fields__title = None
 def load(self, file__data__name, fields__title__have, convert_numbers = 0, separator = None, comments = None):
 """ Load up a CSV file
 Arguments:
 file__data__name : The name of the CSV file
 fields__title__have : 0 : file has no title fields
 otherwise : file has title fields
 convert_numbers : 0 : store everything as string's
 otherwise : store fields that can be converted
 to ints or floats to that Python
 type defaults to 0
 separator : The field delimiter (optional)
 comments : A list of strings and regular expressions to remove comments
 """
 file__data = open(file__data__name, 'r')
 self.input(file__data.read(-1), fields__title__have, convert_numbers, separator or self.separator, comments or ["#"])
 file__data.close()
 def save(self, file__data__name, separator = None):
 """ Save data to CSV file.
 Arguments:
 file__data__name : The name of the CSV file to save to
 separator : The field delimiter (optional)
 """
 file__data = open(file__data__name, 'w')
 file__data.write(self.output(separator or self.separator))
 file__data.close()
 def line__process(self, line, convert_numbers, separator):
 fields = []
 line__pos = 0
 
 while line__pos < len(line):
 # Skip any space at the beginning of the field (if there should be leading space,
 # there should be a " character in the CSV file)
 while line__pos < len(line) and line[line__pos] == " ":
 line__pos = line__pos + 1
 field = ""
 quotes__level = 0
 while line__pos < len(line):
 # Skip space at the end of a field (if there is trailing space, it should be
 # encompassed by speech marks)
 if quotes__level == 0 and line[line__pos] == " ":
 line__pos__temp = line__pos
 while line__pos__temp < len(line) and line[line__pos__temp] == " ":
 line__pos__temp = line__pos__temp + 1
 if line__pos__temp >= len(line):
 break
 elif line[line__pos__temp : line__pos__temp + len(separator)] == separator:
 line__pos = line__pos__temp
 if quotes__level == 0 and line[line__pos : line__pos + len(separator)] == separator:
 break
 elif line[line__pos] == "\"":
 if quotes__level == 0:
 quotes__level = 1
 else:
 quotes__level = 0
 else:
 field = field + line[line__pos]
 line__pos = line__pos + 1
 line__pos = line__pos + len(separator)
 if convert_numbers:
 for char in field:
 if char not in "0123456789.-":
 fields.append(field)
 break
 else:
 try:
 if "." not in field:
 fields.append(int(field))
 else:
 fields.append(float(field))
 except:
 fields.append(field)
 else:
 fields.append(field)
 if line[-len(separator)] == separator:
 fields.append(field)
 
 return fields
 def input(self, data, fields__title__have, convert_numbers = 0, separator = None, comments = None):
 """ Take wodge of CSV data & convert it into internal format.
 Arguments:
 data : A string containing the CSV data
 fields__title__have : 0 : file has no title fields
 otherwise : file has title fields
 convert_numbers : 0 : store everything as string's
 otherwise : store fields that can be
 converted to ints or
 floats to that Python type
 defaults to 0
 separator : The field delimiter (Optional)
 comments : A list of strings and regular expressions to remove comments
 (defaults to ["#"])
 """
 separator = separator or self.separator
 comments = comments or ["#"]
 self.fields__title__have = fields__title__have
 # Remove comments from the input file
 comments__strings = []
 for comment in comments:
 if type(comment) == types.InstanceType:
 data = comment.sub("", data)
 elif type(comment) == types.StringType:
 comments__strings.append(comment)
 else:
 raise Exception("Invalid comment type '" + comment + "'")
 # Change made by Aq to handle long lines split by backslashes
 lines_unjoined = map(string.strip, string.split(data, "\n"))
 lines = []
 thisline = ''
 for l in lines_unjoined:
 if l[-1:] == '\\':
 thisline = thisline + l[:-1] + '\n'
 else:
 thisline = thisline + l
 lines.append(thisline)
 thisline = ''
 # Remove all comments that are of type string
 lines__pos = 0
 while lines__pos < len(lines):
 line = lines[lines__pos]
 line__pos = 0
 while line__pos < len(line) and line[line__pos] == " ":
 line__pos = line__pos + 1
 found_comment = 0
 for comment in comments__strings:
 if line__pos + len(comment) < len(line) and line[line__pos : line__pos + len(comment)] == comment:
 found_comment = 1
 break
 if found_comment:
 del lines[lines__pos]
 else:
 lines__pos = lines__pos + 1
 # Process the input data
 if fields__title__have:
 self.fields__title = self.line__process(lines[0], convert_numbers, separator)
 pos__start = 1
 else:
 self.fields__title = []
 pos__start = 0
 self.data = []
 for line in lines[pos__start : ]:
 if line != "":
 self.data.append(Entry(self.line__process(line, convert_numbers, separator), self.fields__title))
 def output(self, separator = None):
 """ Convert internal data into CSV string.
 Arguments:
 separator : The field delimiter (optional)
 Returns:
 String containing CSV data
 """
 separator = separator or self.separator
 def line__make(entry, separator = separator):
 str = ""
 done__any = 0
 for field in entry:
 if done__any:
 str = str + separator
 else:
 done__any = 1
 if type(field) != types.StringType:
 field = `field`
 if len(field) > 0 and (string.find(field, separator) != -1 or string.find(field,'\n') != -1 or (field[0] == " " or field[-1] == " ")):
 str = str + "\"" + string.replace(field,'\n','\\\n') + "\""
 else:
 str = str + field
 return str
 if self.fields__title__have:
 str = line__make(self.fields__title) + "\n\n"
 else:
 str = ""
 str = str + string.join(map(line__make, self.data), "\n") + "\n"
 return str
 def append(self, entry):
 
 """ Add an entry. """
 if self.fields__title:
 entry.fields__title = self.fields__title
 self.data.append(entry)
 def field__append(self, func, field__title = None):
 """ Append a field with values specified by a function
 Arguments:
 func : Function to be called func(entry) to get the value of the new field
 field__title : Name of new field (if applicable)
 """
 for data__pos in range(len(self)):
 entry = self.data[data__pos]
 entry.append(func(entry))
 self.data[data__pos] = entry
 if self.fields__title__have:
 self.fields__title.append(field__title)
 def duplicates__eliminate(self):
 """ Eliminate duplicates (this may result in a reordering of the entries) """
 # To eliminate duplicates, we first get Python to sort the list for us; then all we have to
 # do is to check to see whether consecutive elements are the same, and delete them
 # This give us O(<sort>) * O(n) rather than the more obvious O(n * n) speed algorithm
 # XXX Could be done more efficiently for multiplicate duplicates by deleting a slice of
 # similar elements rather than deleting them individually
 self.sort()
 data__pos = 1
 entry__last = self.data[0]
 while data__pos < len(self.data):
 if self.data[data__pos] == entry__last:
 del self.data[data__pos]
 else:
 entry__last = self.data[data__pos]
 data__pos = data__pos + 1
 def __str__(self):
 """ Construct a printable representation of the internal data. """
 columns__width = []
 # Work out the maximum width of each column
 for column in range(len(self.data[0])):
 if self.fields__title__have:
 width = len(`self.fields__title[column]`)
 else:
 width = 0
 for entry in self:
 width__possible = len(`entry.data[column]`)
 if width__possible > width:
 width = width__possible
 columns__width.append(width)
 if self.fields__title__have:
 str = string.join(map(string.ljust, self.fields__title, columns__width), " ") + "\n\n"
 else:
 str = ""
 for entry in self:
 str = str + string.join(map(string.ljust, map(lambda a : (type(a) == types.StringType and [a] or [eval("`a`")])[0], entry.data), columns__width), " ") + "\n"
 return str
###################################################################################################
#
# CSV data entry class
#
#
class Entry(UserList.UserList):
 """ CSV data entry, UserList subclass.
 Has the same properties as a list, but has a few dictionary
 like properties for easy access of fields if they have titles.
 
 Methods(Override):
 __init__
 __getitem__
 __setitem__
 __delitem__
 """
 def __init__(self, fields, fields__title = None):
 
 """ Initialise with fields data and field title.
 Arguments:
 fields : a list containing the data for each field
 of this entry
 fields__title : a list with the titles of each field
 (an empty list means there are no titles)
 """
 self.data = fields
 if fields__title != None:
 self.fields__title = fields__title
 else:
 self.fields__title = []
 def __getitem__(self, x):
 if type(x) == types.IntType:
 return self.data[x]
 else:
 return self.data[self.fields__title.index(x)]
 def __setitem__(self, x, item):
 if type(x) == types.IntType:
 self.data[x] = item
 else:
 self.data[self.fields__title.index(x)] = item
 def __delitem__(self, x):
 if type(x) == types.IntType:
 del self.data[x]
 else:
 del self.data[self.fields__title.index(x)]
 def __str__(self):
 return `self.data`