I've written a script using the Django ORM to store 'configuration settings' and interacting with the Google Drive API and another API. The script creates a base project folder and subfolders in Google Drive for a list of records, and posts links back to the source database. I'm a beginner in Python, so I'm not sure if I'm doing things the right way, or if it's completely terrible.
I think there are some areas that could be cleaned up -- for example, how I am creating the 'client' and 'service' objects repeatedly inside specific methods. I wasn't sure if it would work to create the objects and pass them between the methods though. I think I could also use the Google 'batch' and 'partial response' methods to optimize HTTP requests.
Also, I need to get this thing on a periodic task or crontab. I was interested in trying celery, so I initially set up the below jobs, and they were creating the clients, but I don't think it's working anymore in Celery.
I removed some of the script and commented areas to make it more understandable.
# Celery tasks
@periodic_task(run_every=crontab(hour="*", minute="*", day_of_week="*"))
def sync_folders():
configs = Configuration.objects.all()
for config in configs:
user = config.user
client = create_client(user, config)
try:
create_document_folders(config, client)
except Exception as inst:
pass
@periodic_task(run_every=crontab(hour="*", minute="5", day_of_week="*"))
def sync_files():
configs = Configuration.objects.all()
for config in configs:
user = config.user
client = create_client(user, config)
try:
get_updated_files(config, client)
except Exception as inst:
pass
# Rest of post is the script initiated by celery tasks above.
def create_client(user, config):
# Create the QuickBase Client object
s = config
if s is not None:
if s.qb_realm is not None:
if s.qb_realm != 'www':
baseurl = 'https://' + s.qb_realm + '.quickbase.com'
else:
baseurl = 'https://www.quickbase.com'
else:
baseurl = 'https://www.quickbase.com'
client = quickbase.Client(QUICKBASE_USER, QUICKBASE_PASSWORD, base_url=baseurl)
return client
def get_jobs_wo_folders(config, client):
if client is not None:
query = "{'" + config.root_folder_id + "'.EX.''}"
clist = [config.root_folder_id, config.root_folder_name_fid, config.root_folder_user, '3']
records = client.do_query(query, columns=clist, database=config.qb_root_dbid)
return records
def retrieve_specific_files(service, param):
# Search and retrieve a list of file resources passing a params object
result = []
page_token = None
while True:
try:
if page_token:
param['pageToken'] = page_token
files = service.files().list(**param).execute()
result.extend(files['items'])
page_token = files.get('nextPageToken')
if not page_token:
break
except errors.HttpError, error:
print 'An error occurred: %s' % error
break
return result
def get_csv(list):
si = cStringIO.StringIO()
cw = csv.writer(si)
cw.writerows(list)
return si.getvalue().strip('\r\n')
# Google helper functions
def refresh_access_token(user, config):
instance = UserSocialAuth.objects.get(user=user, provider='google-oauth2')
instance.refresh_token()
# Configuration model has a field 'last_refresh_time' that is auto-now date/time
config.save()
return instance.extra_data['access_token']
def get_access_token(config):
try:
access_token = UserSocialAuth.objects.get(user=config.user, provider='google-oauth2').extra_data['access_token']
return access_token
except Exception as inst:
pprint(inst)
def create_drive_service(config):
"""
Creates a drive service using the 'Configuration' object's
related user. Users initial signup is done with django-socialauth;
the access token and refresh token is saved with django-socialauth &
refresh token handling is done with django-socialauth.
"""
c = config
user = config.user
instance = UserSocialAuth.objects.get(user=user, provider='google-oauth2')
refreshed_at = c.token_last_refresh
now = datetime.now()
expires_in = instance.extra_data['expires']
token_age = (now - refreshed_at).seconds
if token_age > (expires_in - 120):
access_token = refresh_access_token(user, config)
else:
access_token = instance.extra_data['access_token']
try:
credentials = AccessTokenCredentials(access_token, 'Python-urllib2/2.7')
http = httplib2.Http()
http = credentials.authorize(http)
return build('drive', 'v2', http=http)
except Exception as e:
pprint(e)
# Are these next lines pointless? What should I do if the tokens consistently fail?
access_token = refresh_access_token(user)
http = httplib2.Http()
http = credentials.authorize(http)
return build('drive', 'v2', http=http)
def insert_permission(service, file_id, value, perm_type, role):
new_permission = {
'value': value,
'type': perm_type,
'role': role,
}
try:
return service.permissions().insert(
fileId=file_id, body=new_permission, sendNotificationEmails=False).execute()
except errors.HttpError, error:
print 'An error occured: %s' % error
return None
def insert_folder(config, title, parent_idNone, writer=None, mime_type='application/vnd.google-apps.folder'):
service = create_drive_service(config)
owner_permission = {
'role': 'owner',
'type': 'user',
'value': 'me',
}
body = {
'title': title,
'mimeType': mime_type,
'userPermission': owner_permission,
'fields': 'items, items/parents',
}
if parent_id:
body['parents'] = [{'id': parent_id}]
try:
folder = service.files().insert(
body=body,
).execute()
insert_permission(service, folder['id'], writer, 'user', 'writer')
return folder
except errors.HttpError, error:
print 'An error occured: %s' % error
return None
def create_document_folders(config, client):
"""
Gets records that do not have Google drive folders,
and loops through the jobs creating the necessary
base project folder and subfolders.
"""
s = config
records = get_jobs_wo_folders(config, client)
"""
A bunch of logic goes below this to loop through the list of records,
create the new folders, and append the new folder metadata to a the 'rows' list
which is later converted to records_csv
"""
if rows:
records_csv = get_csv(rows)
# Create the records in QuickBase
response = client.import_from_csv(records_csv, clist=folders_clist, database=s.folders_dbid)
1 Answer 1
# Celery tasks
@periodic_task(run_every=crontab(hour="*", minute="*", day_of_week="*"))
def sync_folders():
configs = Configuration.objects.all()
for config in configs:
user = config.user
client = create_client(user, config)
I'd do: create_client(config.user, config)
try:
create_document_folders(config, client)
except Exception as inst:
pass
Don't do this. You catch an exception and ignore it. You have no clue what went wrong. You should at the very least print out the exception, or put it in a log, or something so you can figure out what what goes wrong.
@periodic_task(run_every=crontab(hour="*", minute="5", day_of_week="*"))
def sync_files():
configs = Configuration.objects.all()
for config in configs:
user = config.user
client = create_client(user, config)
try:
get_updated_files(config, client)
except Exception as inst:
pass
Pretty the same function again. Refactor them and pass get_updated_files
or create_document_folder
as a parameter to the new function.
# Rest of post is the script initiated by celery tasks above.
def create_client(user, config):
# Create the QuickBase Client object
s = config
Why? Just use config. The only advantage to s is that it's harder to read.
if s is not None:
Do you really want to support config being None?
if s.qb_realm is not None:
if s.qb_realm != 'www':
baseurl = 'https://' + s.qb_realm + '.quickbase.com'
Generally its better to use string formatting than concatenation.
else:
baseurl = 'https://www.quickbase.com'
Why is this a special case? it just does the same thing that the above case would do.
else:
baseurl = 'https://www.quickbase.com'
client = quickbase.Client(QUICKBASE_USER, QUICKBASE_PASSWORD, base_url=baseurl)
return client
Combine those two lines
def get_jobs_wo_folders(config, client):
if client is not None:
Having a function silently fail to do its job given bad input is a bad idea. You should throw an exception. I wouldn't even bother to check if client were None, and just let it fail when it tries to use it.
query = "{'" + config.root_folder_id + "'.EX.''}"
clist = [config.root_folder_id, config.root_folder_name_fid, config.root_folder_user, '3']
records = client.do_query(query, columns=clist, database=config.qb_root_dbid)
return records
def retrieve_specific_files(service, param):
# Search and retrieve a list of file resources passing a params object
result = []
page_token = None
while True:
try:
if page_token:
Use is not None
to check for None, just because a few other random things end up being false and its more explicit this way
param['pageToken'] = page_token
files = service.files().list(**param).execute()
result.extend(files['items'])
page_token = files.get('nextPageToken')
if not page_token:
break
except errors.HttpError, error:
print 'An error occurred: %s' % error
break
Do you really want to try and continue on if there is an error? Shouldn't you just give up this attempt?
return result
def get_csv(list):
avoid list as a variable name since its a builtin python type
si = cStringIO.StringIO()
cw = csv.writer(si)
cw.writerows(list)
return si.getvalue().strip('\r\n')
# Google helper functions
def refresh_access_token(user, config):
instance = UserSocialAuth.objects.get(user=user, provider='google-oauth2')
instance.refresh_token()
# Configuration model has a field 'last_refresh_time' that is auto-now date/time
config.save()
return instance.extra_data['access_token']
def get_access_token(config):
try:
access_token = UserSocialAuth.objects.get(user=config.user, provider='google-oauth2').extra_data['access_token']
You just did something very similiar, this suggests that you should extract a common function
return access_token
except Exception as inst:
pprint(inst)
At least here you print the exception.
def create_drive_service(config):
"""
Creates a drive service using the 'Configuration' object's
related user. Users initial signup is done with django-socialauth;
the access token and refresh token is saved with django-socialauth &
refresh token handling is done with django-socialauth.
"""
c = config
user = config.user
Why do you sometimes use config.user
and other times pass the user as a parameter?
instance = UserSocialAuth.objects.get(user=user, provider='google-oauth2')
avoid generic names like instance
refreshed_at = c.token_last_refresh
now = datetime.now()
expires_in = instance.extra_data['expires']
token_age = (now - refreshed_at).seconds
if token_age > (expires_in - 120):
access_token = refresh_access_token(user, config)
But you've already got the instand, so this does extra work
else:
access_token = instance.extra_data['access_token']
try:
credentials = AccessTokenCredentials(access_token, 'Python-urllib2/2.7')
http = httplib2.Http()
http = credentials.authorize(http)
return build('drive', 'v2', http=http)
except Exception as e:
pprint(e)
# Are these next lines pointless? What should I do if the tokens consistently fail?
access_token = refresh_access_token(user)
You shouldn't need to do this as you've just refreshed the token up there.
http = httplib2.Http()
http = credentials.authorize(http)
return build('drive', 'v2', http=http)
def insert_permission(service, file_id, value, perm_type, role):
new_permission = {
'value': value,
'type': perm_type,
'role': role,
}
try:
return service.permissions().insert(
fileId=file_id, body=new_permission, sendNotificationEmails=False).execute()
except errors.HttpError, error:
print 'An error occured: %s' % error
return None
def insert_folder(config, title, parent_idNone, writer=None, mime_type='application/vnd.google-apps.folder'):
service = create_drive_service(config)
owner_permission = {
'role': 'owner',
'type': 'user',
'value': 'me',
}
body = {
'title': title,
'mimeType': mime_type,
'userPermission': owner_permission,
'fields': 'items, items/parents',
}
if parent_id:
body['parents'] = [{'id': parent_id}]
try:
folder = service.files().insert(
body=body,
).execute()
insert_permission(service, folder['id'], writer, 'user', 'writer')
return folder
except errors.HttpError, error:
print 'An error occured: %s' % error
return None
def create_document_folders(config, client):
"""
Gets records that do not have Google drive folders,
and loops through the jobs creating the necessary
base project folder and subfolders.
"""
s = config
records = get_jobs_wo_folders(config, client)
"""
A bunch of logic goes below this to loop through the list of records,
create the new folders, and append the new folder metadata to a the 'rows' list
which is later converted to records_csv
"""
if rows:
records_csv = get_csv(rows)
# Create the records in QuickBase
response = client.import_from_csv(records_csv, clist=folders_clist, database=s.folders_dbid)
-
\$\begingroup\$ @Wintson Ewert - You are the man. I had to move on to other work earlier, but that really helped me think about the script differently. I was passing user sometimes because I wasn't sure if config.user was working when I was debugging parts of the script. I only had one config in my database, and it had a User, so I guess I should assume config.user will always work. It looks like at the very least, I should do some reading on how to throw exceptions properly, and strip some of the redundancy from the script. Thanks! \$\endgroup\$kevins– kevins2013年02月12日 09:56:15 +00:00Commented Feb 12, 2013 at 9:56
Explore related questions
See similar questions with these tags.