I have a class for making datastore inserts with Google App Engine and WTForms. Now I want to refactor it into smaller units but I'm not sure how to do it. Can you help me find what needs to be done? The code consists of code for taking a form (AdForm) and its parameters to make a datastore insert. It also checks for inapproriate content in a way I'd like to improve.
class AdLister(BaseRequestHandler,
blobstore_handlers.BlobstoreUploadHandler):
csrf_protect = False
def post(self):
ad = Ad()
if users.get_current_user():
ad.user = users.get_current_user()
if self.current_user is not None:
try:
ad.usr = self.current_user
except Exception, e:
logging.info('exception %s' % str(e))
if self.request.get('type'):
ad.type = self.request.get('type')
if self.request.get('address'):
ad.address = self.request.get('address')
if self.request.get('rooms'):
ad.number_of_rooms = int(self.request.get('rooms'))
if self.request.get('size'):
ad.size = float(self.request.get('size'))
if self.request.get('regdate'):
ad.regdate = int(self.request.get('regdate'))
if self.request.get('mileage'):
ad.mileage = int(self.request.get('mileage'))
ad.category = self.request.get('category_group')
form = AdForm(self.request.params)
if form.validate():
title = to_unicode_or_bust(form.title.data)
#unicode(form.title.data, 'utf-8')
ad.title = title
self.session['title'] = ad.title
name = to_unicode_or_bust(form.name.data) #, 'utf-8')
ad.name = name
self.session['name'] = ad.name
ad.email = form.email.data
self.session['email'] = ad.email
ad.phoneview = form.phoneview.data
self.session['phoneview'] = ad.phoneview
try:
if form.phonenumber.data:
ad.phonenumber = form.phonenumber.data
self.session['phonenumber'] = ad.phonenumber
except:
pass
text = to_unicode_or_bust(form.text.data) # , 'utf8')
titletest = to_unicode_or_bust(form.title.data)
if 'penis' in text:
self.response.out.write('REMOVED')
return
if 'Black Money' in text:
self.response.out.write('REMOVED')
return
if 'black money' in text:
self.response.out.write('REMOVED')
return
if 'BLACK MONEY' in titletest:
self.response.out.write('REMOVED')
return
if 'BLACK DOLARS' in titletest:
self.response.out.write('REMOVED')
return
if 'Penis' in text:
self.response.out.write('REMOVED')
return
if 'penis' in title:
self.response.out.write('REMOVED')
return
if 'Penis' in title:
self.response.out.write('REMOVED')
return
if 'escort' in text:
self.response.out.write('REMOVED')
return
if 'escorts' in text:
self.response.out.write('REMOVED')
return
if 'escort' in title:
self.response.out.write('REMOVED')
return
if 'escorts' in title:
self.response.out.write('REMOVED')
return
ad.text = text
self.session['text'] = ad.text
ad.price = form.price.data.replace(' ', '').replace(',00',
'').replace('.00', '')
try:
if form.price.data:
ad.integer_price = form.price.data.replace(' ', ''
).replace(',00', '').replace('.00', '')
except:
pass
self.session['price'] = ad.price
ad.url = self.request.host
self.session['url'] = self.request.host
ad.place = self.request.get('place')
self.session['place'] = ad.place
ad.postaladress = self.request.get('place')
self.session['postaladress'] = ad.postaladress
ad.put()
self.session['ad_id'] = ad.key().id()
else:
self.render('createnewad.html', {
'user': self.current_user,
'session': self.auth.get_user_by_session(),
'request': self.request,
'form': form,
'name': to_unicode_or_bust(form.name.data) #.encode('utf-8')
})
return
if self.request.get('currency'):
ad.currency = self.request.get('currency')
self.session['currency'] = ad.currency
if self.request.get('cg'):
ad.category = self.request.get('cg')
self.session['category'] = ad.category
if self.request.get('company_ad') == '1':
ad.company_ad = True
self.session['company_ad'] = 'True'
ad.put()
ad.url = self.request.host
for upload in self.get_uploads():
try:
img = Image(reference=ad)
img.primary_image = upload.key()
image_url = images.get_serving_url(str(upload.key()), size=640)
img.put()
ad.hasimages = True
ad.image_url = images.get_serving_url(str(upload.key()), size=640)
ad.put()
ad.blobs.append(upload.key())
ad.put()
except Exception, e:
logging.error('There was an exception:%s' % str(e))
pass
ad.published = False
if self.request.get('area'):
city = \
montaomodel.City.get_by_id(long(self.request.get('area'
)))
region = montaomodel.Region.get(city.region.key())
ad.cities.append(city.key())
ad.regions.append(region.key())
ad.city = unicode(city.name)
ad.region = unicode(region.name)
ad.put()
if self.current_user:
ad.userID = str(self.current_user.auth_ids[0])
ad.put()
ad.usr = self.current_user.key.to_old_key()
ad.put()
image = ad.matched_images.get()
image_url = None
if image:
if image.primary_image:
try:
image_url = \
images.get_serving_url(str(image.primary_image.key()),
size=640)
except Exception, e:
image_url = '/images/' + str(image.key().id()) \
+ '_small.jpg'
else:
image_url = '/images/' + str(image.key().id()) \
+ '_small.jpg'
imv = []
for i in ad.matched_images:
if i.primary_image:
try:
i1 = \
images.get_serving_url(str(i.primary_image.key()))
imv.append(i1)
except Exception, e:
i1 = '/images/' + str(image.key().id()) \
+ '_small.jpg'
imv.append(i1)
if ad.price: # and doesn't contain separators
try:
price = \
i18n.I18n(self.request).format_decimal(int(ad.price))
except Exception, e:
price = ad.price
else:
price = ad.price
self.render('preview.html', {
'user': self.current_user,
'session': self.auth.get_user_by_session(),
'request': self.request,
'ad': ad,
'image_url': image_url,
'imv': imv,
'len': len(imv),
'form': PreviewAdForm(),
'price': price,
})
2 Answers 2
You've got a massive repetition:
if 'penis' in text:
self.response.out.write('REMOVED')
return
if 'Black Money' in text:
self.response.out.write('REMOVED')
return
if 'black money' in text:
self.response.out.write('REMOVED')
return
if 'BLACK MONEY' in titletest:
self.response.out.write('REMOVED')
return
if 'BLACK DOLARS' in titletest:
self.response.out.write('REMOVED')
return
if 'Penis' in text:
self.response.out.write('REMOVED')
return
if 'penis' in title:
self.response.out.write('REMOVED')
return
if 'Penis' in title:
self.response.out.write('REMOVED')
return
if 'escort' in text:
self.response.out.write('REMOVED')
return
if 'escorts' in text:
self.response.out.write('REMOVED')
return
if 'escort' in title:
self.response.out.write('REMOVED')
return
if 'escorts' in title:
self.response.out.write('REMOVED')
return
You can use a list comprehension to shorten this very much:
BAD_WORDS = ['penis', 'black money', 'escort']
if any([w in text or w.upper() in text or w.capitalize() in text
for w in BAD_WORDS]):
self.response.out.write('REMOVED')
return
Never use bare except:
, always tell the expected exception.
-
\$\begingroup\$ Thank you. I'm still learning python and the way you show is much more concise and better. \$\endgroup\$Niklas Rosencrantz– Niklas Rosencrantz2015年05月11日 13:12:31 +00:00Commented May 11, 2015 at 13:12
-
3\$\begingroup\$
any[w in text
doesn't look right to me. Shouldn't there be parentheses? \$\endgroup\$Kevin– Kevin2015年05月11日 13:24:19 +00:00Commented May 11, 2015 at 13:24 -
3\$\begingroup\$ Please take a bit more care over the code snippets, or this happens: stackoverflow.com/q/30168806/3001761 \$\endgroup\$jonrsharpe– jonrsharpe2015年05月11日 13:24:51 +00:00Commented May 11, 2015 at 13:24
-
1\$\begingroup\$ Wouldn't
w in text.lower()
be more concise and cover all the cases? \$\endgroup\$Morwenn– Morwenn2015年05月11日 15:55:45 +00:00Commented May 11, 2015 at 15:55
Style
You have commented out bits of code. Use version control and don't keep that stuff around (unless you have a very good reason; I don't see one here).
In some cases the formatting is off. The general advice is to follow
PEP8, that's very easy to accomplish if you run the corresponding tools,
i.e. pep8
or so.
Code
First of all: You should split your functionality into smaller chunks. At the moment you have one massive method which does way too many things.
I'd suggest something along the lines of one method for parsing
input, initialising the Ad
object, validating input, importing
uploaded files, rendering output, etc. That way you can later actually
see what the control flow is, because at the moment you have return
s
mixed into your logic and it's quite confusing to follow.
For filtering certain keywords I'd actually recommend constructing a
regular expression for all words. That way you can use the features of
the engine to specify patterns more easily than with that loop with
lower
etc. It might be more efficient as well since you can
precompile the regex and run it only once on the whole text. Then
again, if you're still learning maybe just keep what you currently have
and take a look at it later. See also
e.g. this SO post.
import re
BAD_WORDS = ['penis', 'black money', 'escort']
bad_words_regex = re.compile("|".join(BAD_WORDS), re.IGNORECASE)
...
if bad_words_regex.search("some text with BLaCK moNey in it"):
...
Build helper functions. If you see a pattern twice (or three times, whatever your threshold is going to be), then extract it into a function, method, class, whatever. But don't repeat yourself.
Explore related questions
See similar questions with these tags.