Class of file uploading to S3 using boto with multipart supported

Question 1

I'm writing an app by Flask with a feature to upload large file to S3 and made a class to handle this.

I have some targets in writing code:

Code must be easy to understand and maintain. (Yes, everyone wants this, so do I)
Logs should be as detail as possible, especially when working with S3. I want to have detail logs when something go wrong.
File type (mime) must be detected and written to file object on S3.
Large file will be uploaded by multi parts.

Basically, it's working for my case but I want to hear your advice/comments about the way I'm doing, especially in some points: logging, exception handling, docstring, function/variables naming, everything you see it's not pythonic way.

Edit1: have one aceepted answer but others are welcome :)

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import logging
import magic
import boto3 as boto
import botocore
from Flask import abort
class S3(object):
 '''
 Handle request and write to S3
 '''
 def __init__(self):
 self.bucket = 'my_bucket'
 self.conn = boto.resource('s3')
 self.client = boto.client('s3')
 self.logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
 @staticmethod
 def mime(fheader):
 '''
 Detect mime type by reading file header.
 fheader: first bytes to read
 '''
 return magic.from_buffer(fheader, mime=True)
 def upload(self, path, req):
 '''
 Directly upload file to s3. Use this for small file size
 path: object path on s3
 req: request object contains file data. (req = request.files['file'])
 '''
 fheader = req.read(1024)
 mime = self.mime(fheader)
 body = fheader + req.read()
 disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
 try:
 self.logger.info('Trying to upload {}'.format(path))
 resp = self.conn.Object(self.bucket, path).put(
 Body=body,
 ContentType=mime,
 ContentDisposition=disposition
 )
 if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
 self.logger.info('{} uploaded to S3'.format(path))
 return True
 else:
 self.logger.error('Failed to upload {} to S3. Detail: \n{} '.format(path, resp))
 return False
 except botocore.exceptions.ClientError:
 self.logger.critical('S3 connection error', exc_info=True)
 def upload_multipart(self, path, req, psize=1024*1024*5):
 '''
 Upload multipart to s3
 path: object path on s3
 req: request object contains file data.
 psize: size of each part. Default is 5MB.
 '''
 # only need first 1024 bytes for mime()
 fheader = req.read(1024)
 mime = self.mime(fheader)
 disposition = 'attachment; filename="{}"'.format(os.path.basename(path))
 try:
 # initialize multipart upload
 self.logger.debug('Initializing multipart upload for {}'.format(path))
 mpu = self.client.create_multipart_upload(Bucket=self.bucket,
 Key=path,
 ContentType=mime,
 ContentDisposition=disposition
 )
 self.logger.debug('Initialization of {} success with info: {}'.format(path, mpu))
 part = 0
 part_info = {
 'Parts': [
 ]
 }
 self.logger.debug('Start uploading parts to {}'.format(path))
 while True:
 body = req.read(psize)
 if body:
 part += 1
 if part == 1:
 body = fheader + body
 self.logger.debug('Uploading part no {} of {}'.format(part, path))
 resp = self.client.upload_part(Bucket=self.bucket,
 Body=body,
 Key=path,
 PartNumber=part,
 UploadId=mpu['UploadId']
 )
 self.logger.debug('Part {} of {} uploaded.'.format(part, path))
 part_info['Parts'].append(
 {
 'ETag': resp['ETag'],
 'PartNumber': part
 }
 )
 self.logger.debug('Part {} of {} info: {}'.format(part, path, part_info))
 else:
 break
 self.logger.info('Multipart upload {} finished. Start completing...'.format(path))
 # complete the multipart upload
 self.client.complete_multipart_upload(Bucket=self.bucket,
 Key=path,
 MultipartUpload=part_info,
 UploadId=mpu['UploadId']
 )
 self.logger.info('Multipart upload completed!')
 return True
 except:
 self.logger.error('Failed to upload file {}'.format(path), exc_info=True)
 if mpu:
 self.logger.info('Aborting the upload of {}...'.format(path))
 self.client.abort_multipart_upload(
 Bucket=self.bucket,
 Key=path,
 UploadId=mpu['UploadId'])
 self.logger.info('Upload of {} aborted!'.format(path))
 def exists(self, path):
 '''
 Send a HEAD request to see if object exists.
 If object exists, return its Metadata. Otherwise return HTTP 404 code
 path: object path to check existence
 '''
 try:
 resp = self.client.head_object(Bucket=self.bucket,
 Key=path)
 return resp['ResponseMetadata']
 except botocore.exceptions.ClientError as e:
 if e.response['Error']['Code'] == '404':
 abort(404)
 def download(self, path):
 '''
 Download an object from bucket.
 This method shoud be used for development only.
 path: object path to download
 '''
 if self.exists(path):
 self.logger.info('{} downloaded from S3'.format(path))
 # TODO: avoid reading all in once
 return self.conn.Object(self.bucket, path).get()['Body'].read()
 def info(self, path):
 '''
 Get metadata of object and return as a dict
 path: object path to get metadata
 '''
 info = dict()
 resp = self.exists(path)
 if resp:
 headers = resp['HTTPHeaders']
 info['content_length'] = headers['content-length']
 info['content_type'] = headers['content-type']
 self.logger.info('Retrieved info of {} from S3.'.format(path))
 return info

Question 2

Welcome to Code Review! I hope you get some great answers.

Question 3

Kind of minor, but you wrote:

while True:
 body = req.read(psize)
 if body:
 ...
 else:
 break

which causes the entirety of ... to be indented. If you slightly restructure to:

while True:
 body = req.read(psize)
 if not body: break
 ...

then the ... block can be dedented a level, making it easier to read.

Question 4

Thanks for reviewing my code. Your comment makes sense, it's more easier to read.

pjz pjz 2,40613 silver badges15 bronze badges · Accepted Answer · 2016-08-21 05:53:21Z

Kind of minor, but you wrote:

while True:
 body = req.read(psize)
 if body:
 ...
 else:
 break

which causes the entirety of ... to be indented. If you slightly restructure to:

while True:
 body = req.read(psize)
 if not body: break
 ...

then the ... block can be dedented a level, making it easier to read.

Thanks for reviewing my code. Your comment makes sense, it's more easier to read.

Stack Exchange Network

Class of file uploading to S3 using boto with multipart supported

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Class of file uploading to S3 using boto with multipart supported

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions