4
\$\begingroup\$

I am working on converting a mailing list that has longitude and latitude coordinates within the CSV file. This script I came up with does what I need, but this is my first real-world use of python. I want to know where I am making any mistakes, not using best-practices, and what can be optimized to make it faster.

The input.csv file has the following header:

"Email Address",MEMBER_RATING,OPTIN_TIME,OPTIN_IP,CONFIRM_TIME,CONFIRM_IP,LATITUDE,LONGITUDE,GMTOFF,DSTOFF,TIMEZONE,CC,REGION,LAST_CHANGED,LEID,EUID

And the script:

import sys
import os
import csv
import signal
import json
import urllib
import urllib2
import sqlite3
import codecs
import cStringIO
csv.field_size_limit(sys.maxsize)
class EmailList:
 WEB_SERVICE_URL = 'http://open.mapquestapi.com/nominatim/v1/reverse.php?format=json'
 def __init__(self, inputFile):
 signal.signal(signal.SIGINT, self.signal_handler)
 self.conn = None
 self.initialize_database(inputFile)
 self.convert_rows()
 self.db_to_csv()
 def signal_handler(self, signal, frame):
 try:
 self.conn.commit()
 self.conn.close()
 print '[DB changes committed and connection closed.]'
 except sqlite3.ProgrammingError as e:
 print '[script stopped]'
 print e.message
 sys.exit(0)
 def initialize_database(self, file):
 print 'checking for data.db...'
 if not os.path.isfile('data.db'):
 print 'data.db does not exist, converting csv to sqlite...'
 with open(file) as inputFile:
 reader = UnicodeReader(inputFile)
 header = reader.next()
 if self.conn is None:
 self.conn = sqlite3.connect('data.db')
 c = self.conn.cursor()
 c.execute("DROP TABLE IF EXISTS email_list")
 sql = """CREATE TABLE email_list (\n""" + \
 ",\n".join([("%s varchar" % name) for name in header]) \
 + ")"
 c.execute(sql)
 for line in reader:
 if line:
 try:
 c.execute('INSERT INTO email_list VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', line)
 except sqlite3.ProgrammingError as e:
 print e.message
 print line
 try:
 c.execute('ALTER TABLE email_list ADD COLUMN CITY varchar')
 c.execute('ALTER TABLE email_list ADD COLUMN STATE varchar')
 c.execute('ALTER TABLE email_list ADD COLUMN COUNTRY varchar')
 c.execute('ALTER TABLE email_list ADD COLUMN POSTAL_CODE varchar')
 except sqlite3.OperationalError as e:
 print 'error creating new columns: '
 print e.message
 self.conn.commit()
 self.conn.close()
 print 'converted csv to sqlite, stored in data.db'
 def convert_rows(self):
 print 'converting coordinates...'
 self.conn = sqlite3.connect('data.db')
 c = self.conn.cursor()
 results = c.execute('SELECT LATITUDE AS lat, LONGITUDE AS lon, Email as email FROM email_list WHERE POSTAL_CODE IS NULL AND CITY IS NULL AND COUNTRY IS NULL AND STATE IS NULL')
 rows = []
 for row in results:
 lat, lon, email = row
 data = {'lat': lat, 'lon': lon, 'email': email}
 rows.append(data)
 self.conn.commit()
 self.conn.close()
 for item in rows:
 try:
 converted = self.convert_coordinates(item['lat'], item['lon'])
 print str(rows.index(item) + 1) + '/' + str(len(rows))
 except TypeError:
 converted['city': '']
 converted['state': '']
 converted['country': '']
 converted['postal_code': '']
 self.conn = sqlite3.connect('data.db')
 c = self.conn.cursor()
 try:
 c.execute('UPDATE email_list SET CITY=?, STATE=?, COUNTRY=?, POSTAL_CODE=? WHERE Email=?', [converted['city'], converted['state'], converted['country'], converted['postal_code'], item['email']])
 except KeyboardInterrupt:
 print 'user quit'
 self.conn.commit()
 self.conn.close()
 print 'converted coordinates.'
 def convert_coordinates(self, lat, lon):
 if lat and long:
 try:
 values = {'lat': lat, 'lon': lon}
 data = urllib.urlencode(values)
 request = urllib2.Request(self.WEB_SERVICE_URL, data)
 response = urllib2.urlopen(request)
 except urllib2.HTTPError as e:
 print 'error loading web service'
 print e.message
 json_result = json.load(response)
 try:
 city = json_result['address']['city']
 except KeyError:
 city = ''
 try:
 state = json_result['address']['state']
 except KeyError:
 state = ''
 try:
 cc = json_result['address']['country_code']
 except KeyError:
 cc = ''
 try:
 postal_code = json_result['address']['postcode']
 except KeyError:
 postal_code = ''
 else:
 city = ''
 state = ''
 cc = ''
 postal_code = ''
 return {'city': city, 'state': state, 'country': cc, 'postal_code': postal_code}
 def db_to_csv(self):
 print 'beginning write to csv...'
 self.conn = sqlite3.connect('data.db')
 c = self.conn.cursor()
 c.execute('SELECT * FROM email_list')
 with open('output.csv', 'wb') as outputFile:
 writer = UnicodeWriter(outputFile)
 writer.writerow([i[0] for i in c.description])
 writer.writerows(c)
 print 'write finished.'
 self.conn.commit()
 self.conn.close()
 print 'done.'
# The CSV module has issues with reading/writing unicode,
# the following classes were taken from docs.python.org to
# help with that: http://docs.python.org/2/library/csv.html
class UTF8Recoder:
 """
 Iterator that reads an encoded stream and reencodes the input to UTF-8
 """
 def __init__(self, f, encoding):
 self.reader = codecs.getreader(encoding)(f)
 def __iter__(self):
 return self
 def next(self):
 return self.reader.next().encode("utf-8")
class UnicodeReader:
 """
 A CSV reader which will iterate over lines in the CSV file "f",
 which is encoded in the given encoding.
 """
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
 f = UTF8Recoder(f, encoding)
 self.reader = csv.reader(f, dialect=dialect, **kwds)
 def next(self):
 row = self.reader.next()
 return [unicode(s, "utf-8") for s in row]
 def __iter__(self):
 return self
class UnicodeWriter:
 """
 A CSV writer which will write rows to CSV file "f",
 which is encoded in the given encoding.
 """
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
 # Redirect output to a queue
 self.queue = cStringIO.StringIO()
 self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
 self.stream = f
 self.encoder = codecs.getincrementalencoder(encoding)()
 def writerow(self, row):
 self.writer.writerow([s.encode("utf-8") for s in row])
 # Fetch UTF-8 output from the queue ...
 data = self.queue.getvalue()
 data = data.decode("utf-8")
 # ... and reencode it into the target encoding
 data = self.encoder.encode(data)
 # write to the target stream
 self.stream.write(data)
 # empty queue
 self.queue.truncate(0)
 def writerows(self, rows):
 for row in rows:
 self.writerow(row)
EmailList('input.csv')
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked Oct 7, 2013 at 23:38
\$\endgroup\$
1
  • \$\begingroup\$ I like how you did not try to implement CSV parsing yourself. \$\endgroup\$ Commented Oct 8, 2013 at 2:52

1 Answer 1

2
\$\begingroup\$

Two things that pop out for me immediately:

  • You named a parameter "file", which is a built-in function in python.
  • In one of your except blocks, you incorrectly use a dictionary.

See below:

converted['city': '']
converted['state': '']
converted['country': '']
converted['postal_code': '']

Note, what that will do is attempt to use the slice operator on the dictionary. I just tried it now and it fails with a TypeError exception. What you want the code to look like is this:

converted['city'] = ''
converted['state'] = ''
converted['country'] = ''
converted['postal_code'] = ''

Another thing I've noticed. Dealing with dictionaries, you're not quite sure how to handle getting "optional" parameters from them. Have a look at the refactored code I wrote for your json dict handling:

city = json_result['address'].get('city', '')
state = json_result['address'].get('state', '')
cc = json_result['address'].get('country_code', '')
postal_code = json_result['address'].get('postcode', '')

The get method on dictionary objects takes an optional second parameter that specifies a default value to return if the key is not found.

answered Nov 19, 2013 at 20:50
\$\endgroup\$
4
  • \$\begingroup\$ I don’t think file is a keyword in Python. It’s still a bad variable name, but not as bad as, say, list. \$\endgroup\$ Commented Apr 9, 2014 at 10:37
  • \$\begingroup\$ @alexwlchan Have a look here for the built-ins: docs.python.org/2/library/functions.html#file \$\endgroup\$ Commented Apr 9, 2014 at 14:11
  • \$\begingroup\$ Ah, sorry, I was taking "reserved keyword" a bit too literally, and thought you meant on of the keyword keywords. \$\endgroup\$ Commented Apr 9, 2014 at 14:12
  • \$\begingroup\$ @alexwlchan Np, and thanks... I've edited the answer to clarify a little better. \$\endgroup\$ Commented Apr 9, 2014 at 14:30

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.