2
\$\begingroup\$

I'm writing an app by Flask with a feature to upload large file to S3 and made a class to handle this.

I have some targets in writing code:

  • Code must be easy to understand and maintain. (Yes, everyone wants this, so do I)

  • Logs should be as detail as possible, especially when working with S3. I want to have detail logs when something go wrong.

  • File type (mime) must be detected and written to file object on S3.

  • Large file will be uploaded by multi parts.

Basically, it's working for my case but I want to hear your advice/comments about the way I'm doing, especially in some points: logging, exception handling, docstring, function/variables naming, everything you see it's not pythonic way.

Edit1: have one aceepted answer but others are welcome :)

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import logging
import magic
import boto3 as boto
import botocore
from Flask import abort
class S3(object):
 '''
 Handle request and write to S3
 '''
 def __init__(self):
 self.bucket = 'my_bucket'
 self.conn = boto.resource('s3')
 self.client = boto.client('s3')
 self.logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
 @staticmethod
 def mime(fheader):
 '''
 Detect mime type by reading file header.
 fheader: first bytes to read
 '''
 return magic.from_buffer(fheader, mime=True)
 def upload(self, path, req):
 '''
 Directly upload file to s3. Use this for small file size
 path: object path on s3
 req: request object contains file data. (req = request.files['file'])
 '''
 fheader = req.read(1024)
 mime = self.mime(fheader)
 body = fheader + req.read()
 disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
 try:
 self.logger.info('Trying to upload {}'.format(path))
 resp = self.conn.Object(self.bucket, path).put(
 Body=body,
 ContentType=mime,
 ContentDisposition=disposition
 )
 if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
 self.logger.info('{} uploaded to S3'.format(path))
 return True
 else:
 self.logger.error('Failed to upload {} to S3. Detail: \n{} '.format(path, resp))
 return False
 except botocore.exceptions.ClientError:
 self.logger.critical('S3 connection error', exc_info=True)
 def upload_multipart(self, path, req, psize=1024*1024*5):
 '''
 Upload multipart to s3
 path: object path on s3
 req: request object contains file data.
 psize: size of each part. Default is 5MB.
 '''
 # only need first 1024 bytes for mime()
 fheader = req.read(1024)
 mime = self.mime(fheader)
 disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
 try:
 # initialize multipart upload
 self.logger.debug('Initializing multipart upload for {}'.format(path))
 mpu = self.client.create_multipart_upload(Bucket=self.bucket,
 Key=path,
 ContentType=mime,
 ContentDisposition=disposition
 )
 self.logger.debug('Initialization of {} success with info: {}'.format(path, mpu))
 part = 0
 part_info = {
 'Parts': [
 ]
 }
 self.logger.debug('Start uploading parts to {}'.format(path))
 while True:
 body = req.read(psize)
 if body:
 part += 1
 if part == 1:
 body = fheader + body
 self.logger.debug('Uploading part no {} of {}'.format(part, path))
 resp = self.client.upload_part(Bucket=self.bucket,
 Body=body,
 Key=path,
 PartNumber=part,
 UploadId=mpu['UploadId']
 )
 self.logger.debug('Part {} of {} uploaded.'.format(part, path))
 part_info['Parts'].append(
 {
 'ETag': resp['ETag'],
 'PartNumber': part
 }
 )
 self.logger.debug('Part {} of {} info: {}'.format(part, path, part_info))
 else:
 break
 self.logger.info('Multipart upload {} finished. Start completing...'.format(path))
 # complete the multipart upload
 self.client.complete_multipart_upload(Bucket=self.bucket,
 Key=path,
 MultipartUpload=part_info,
 UploadId=mpu['UploadId']
 )
 self.logger.info('Multipart upload completed!')
 return True
 except:
 self.logger.error('Failed to upload file {}'.format(path), exc_info=True)
 if mpu:
 self.logger.info('Aborting the upload of {}...'.format(path))
 self.client.abort_multipart_upload(
 Bucket=self.bucket,
 Key=path,
 UploadId=mpu['UploadId'])
 self.logger.info('Upload of {} aborted!'.format(path))
 def exists(self, path):
 '''
 Send a HEAD request to see if object exists.
 If object exists, return its Metadata. Otherwise return HTTP 404 code
 path: object path to check existence
 '''
 try:
 resp = self.client.head_object(Bucket=self.bucket,
 Key=path)
 return resp['ResponseMetadata']
 except botocore.exceptions.ClientError as e:
 if e.response['Error']['Code'] == '404':
 abort(404)
 def download(self, path):
 '''
 Download an object from bucket.
 This method shoud be used for development only.
 path: object path to download
 '''
 if self.exists(path):
 self.logger.info('{} downloaded from S3'.format(path))
 # TODO: avoid reading all in once
 return self.conn.Object(self.bucket, path).get()['Body'].read()
 def info(self, path):
 '''
 Get metadata of object and return as a dict
 path: object path to get metadata
 '''
 info = dict()
 resp = self.exists(path)
 if resp:
 headers = resp['HTTPHeaders']
 info['content_length'] = headers['content-length']
 info['content_type'] = headers['content-type']
 self.logger.info('Retrieved info of {} from S3.'.format(path))
 return info
asked Aug 20, 2016 at 5:59
\$\endgroup\$
1
  • \$\begingroup\$ Welcome to Code Review! I hope you get some great answers. \$\endgroup\$ Commented Aug 20, 2016 at 6:44

1 Answer 1

1
\$\begingroup\$

Kind of minor, but you wrote:

while True:
 body = req.read(psize)
 if body:
 ...
 else:
 break

which causes the entirety of ... to be indented. If you slightly restructure to:

while True:
 body = req.read(psize)
 if not body: break
 ...

then the ... block can be dedented a level, making it easier to read.

answered Aug 21, 2016 at 5:53
\$\endgroup\$
1
  • \$\begingroup\$ Thanks for reviewing my code. Your comment makes sense, it's more easier to read. \$\endgroup\$ Commented Aug 21, 2016 at 6:19

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.