I'm writing an app by Flask with a feature to upload large file to S3 and made a class to handle this.
I have some targets in writing code:
Code must be easy to understand and maintain. (Yes, everyone wants this, so do I)
Logs should be as detail as possible, especially when working with S3. I want to have detail logs when something go wrong.
File type (mime) must be detected and written to file object on S3.
Large file will be uploaded by multi parts.
Basically, it's working for my case but I want to hear your advice/comments about the way I'm doing, especially in some points: logging, exception handling, docstring, function/variables naming, everything you see it's not pythonic way.
Edit1: have one aceepted answer but others are welcome :)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import logging
import magic
import boto3 as boto
import botocore
from Flask import abort
class S3(object):
'''
Handle request and write to S3
'''
def __init__(self):
self.bucket = 'my_bucket'
self.conn = boto.resource('s3')
self.client = boto.client('s3')
self.logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
@staticmethod
def mime(fheader):
'''
Detect mime type by reading file header.
fheader: first bytes to read
'''
return magic.from_buffer(fheader, mime=True)
def upload(self, path, req):
'''
Directly upload file to s3. Use this for small file size
path: object path on s3
req: request object contains file data. (req = request.files['file'])
'''
fheader = req.read(1024)
mime = self.mime(fheader)
body = fheader + req.read()
disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
try:
self.logger.info('Trying to upload {}'.format(path))
resp = self.conn.Object(self.bucket, path).put(
Body=body,
ContentType=mime,
ContentDisposition=disposition
)
if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
self.logger.info('{} uploaded to S3'.format(path))
return True
else:
self.logger.error('Failed to upload {} to S3. Detail: \n{} '.format(path, resp))
return False
except botocore.exceptions.ClientError:
self.logger.critical('S3 connection error', exc_info=True)
def upload_multipart(self, path, req, psize=1024*1024*5):
'''
Upload multipart to s3
path: object path on s3
req: request object contains file data.
psize: size of each part. Default is 5MB.
'''
# only need first 1024 bytes for mime()
fheader = req.read(1024)
mime = self.mime(fheader)
disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
try:
# initialize multipart upload
self.logger.debug('Initializing multipart upload for {}'.format(path))
mpu = self.client.create_multipart_upload(Bucket=self.bucket,
Key=path,
ContentType=mime,
ContentDisposition=disposition
)
self.logger.debug('Initialization of {} success with info: {}'.format(path, mpu))
part = 0
part_info = {
'Parts': [
]
}
self.logger.debug('Start uploading parts to {}'.format(path))
while True:
body = req.read(psize)
if body:
part += 1
if part == 1:
body = fheader + body
self.logger.debug('Uploading part no {} of {}'.format(part, path))
resp = self.client.upload_part(Bucket=self.bucket,
Body=body,
Key=path,
PartNumber=part,
UploadId=mpu['UploadId']
)
self.logger.debug('Part {} of {} uploaded.'.format(part, path))
part_info['Parts'].append(
{
'ETag': resp['ETag'],
'PartNumber': part
}
)
self.logger.debug('Part {} of {} info: {}'.format(part, path, part_info))
else:
break
self.logger.info('Multipart upload {} finished. Start completing...'.format(path))
# complete the multipart upload
self.client.complete_multipart_upload(Bucket=self.bucket,
Key=path,
MultipartUpload=part_info,
UploadId=mpu['UploadId']
)
self.logger.info('Multipart upload completed!')
return True
except:
self.logger.error('Failed to upload file {}'.format(path), exc_info=True)
if mpu:
self.logger.info('Aborting the upload of {}...'.format(path))
self.client.abort_multipart_upload(
Bucket=self.bucket,
Key=path,
UploadId=mpu['UploadId'])
self.logger.info('Upload of {} aborted!'.format(path))
def exists(self, path):
'''
Send a HEAD request to see if object exists.
If object exists, return its Metadata. Otherwise return HTTP 404 code
path: object path to check existence
'''
try:
resp = self.client.head_object(Bucket=self.bucket,
Key=path)
return resp['ResponseMetadata']
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == '404':
abort(404)
def download(self, path):
'''
Download an object from bucket.
This method shoud be used for development only.
path: object path to download
'''
if self.exists(path):
self.logger.info('{} downloaded from S3'.format(path))
# TODO: avoid reading all in once
return self.conn.Object(self.bucket, path).get()['Body'].read()
def info(self, path):
'''
Get metadata of object and return as a dict
path: object path to get metadata
'''
info = dict()
resp = self.exists(path)
if resp:
headers = resp['HTTPHeaders']
info['content_length'] = headers['content-length']
info['content_type'] = headers['content-type']
self.logger.info('Retrieved info of {} from S3.'.format(path))
return info
-
\$\begingroup\$ Welcome to Code Review! I hope you get some great answers. \$\endgroup\$Phrancis– Phrancis2016年08月20日 06:44:03 +00:00Commented Aug 20, 2016 at 6:44
1 Answer 1
Kind of minor, but you wrote:
while True:
body = req.read(psize)
if body:
...
else:
break
which causes the entirety of ...
to be indented. If you slightly restructure to:
while True:
body = req.read(psize)
if not body: break
...
then the ...
block can be dedented a level, making it easier to read.
-
\$\begingroup\$ Thanks for reviewing my code. Your comment makes sense, it's more easier to read. \$\endgroup\$cuongnv23– cuongnv232016年08月21日 06:19:28 +00:00Commented Aug 21, 2016 at 6:19
Explore related questions
See similar questions with these tags.