1
\$\begingroup\$

I have a class for making datastore inserts with Google App Engine and WTForms. Now I want to refactor it into smaller units but I'm not sure how to do it. Can you help me find what needs to be done? The code consists of code for taking a form (AdForm) and its parameters to make a datastore insert. It also checks for inapproriate content in a way I'd like to improve.

class AdLister(BaseRequestHandler,
 blobstore_handlers.BlobstoreUploadHandler):
 csrf_protect = False
 def post(self):
 ad = Ad()
 if users.get_current_user():
 ad.user = users.get_current_user()
 if self.current_user is not None:
 try:
 ad.usr = self.current_user
 except Exception, e:
 logging.info('exception %s' % str(e))
 if self.request.get('type'):
 ad.type = self.request.get('type')
 if self.request.get('address'):
 ad.address = self.request.get('address')
 if self.request.get('rooms'):
 ad.number_of_rooms = int(self.request.get('rooms'))
 if self.request.get('size'):
 ad.size = float(self.request.get('size'))
 if self.request.get('regdate'):
 ad.regdate = int(self.request.get('regdate'))
 if self.request.get('mileage'):
 ad.mileage = int(self.request.get('mileage'))
 ad.category = self.request.get('category_group')
 form = AdForm(self.request.params)
 if form.validate():
 title = to_unicode_or_bust(form.title.data)
 #unicode(form.title.data, 'utf-8')
 ad.title = title
 self.session['title'] = ad.title
 name = to_unicode_or_bust(form.name.data) #, 'utf-8')
 ad.name = name
 self.session['name'] = ad.name
 ad.email = form.email.data
 self.session['email'] = ad.email
 ad.phoneview = form.phoneview.data
 self.session['phoneview'] = ad.phoneview
 try:
 if form.phonenumber.data:
 ad.phonenumber = form.phonenumber.data
 self.session['phonenumber'] = ad.phonenumber
 except:
 pass
 text = to_unicode_or_bust(form.text.data) # , 'utf8')
 titletest = to_unicode_or_bust(form.title.data)
 if 'penis' in text:
 self.response.out.write('REMOVED')
 return
 if 'Black Money' in text:
 self.response.out.write('REMOVED')
 return
 if 'black money' in text:
 self.response.out.write('REMOVED')
 return
 if 'BLACK MONEY' in titletest:
 self.response.out.write('REMOVED')
 return
 if 'BLACK DOLARS' in titletest:
 self.response.out.write('REMOVED')
 return
 if 'Penis' in text:
 self.response.out.write('REMOVED')
 return
 if 'penis' in title:
 self.response.out.write('REMOVED')
 return
 if 'Penis' in title:
 self.response.out.write('REMOVED')
 return
 if 'escort' in text:
 self.response.out.write('REMOVED')
 return
 if 'escorts' in text:
 self.response.out.write('REMOVED')
 return
 if 'escort' in title:
 self.response.out.write('REMOVED')
 return
 if 'escorts' in title:
 self.response.out.write('REMOVED')
 return
 ad.text = text
 self.session['text'] = ad.text
 ad.price = form.price.data.replace(' ', '').replace(',00',
 '').replace('.00', '')
 try:
 if form.price.data:
 ad.integer_price = form.price.data.replace(' ', ''
 ).replace(',00', '').replace('.00', '')
 except:
 pass
 self.session['price'] = ad.price
 ad.url = self.request.host
 self.session['url'] = self.request.host
 ad.place = self.request.get('place')
 self.session['place'] = ad.place
 ad.postaladress = self.request.get('place')
 self.session['postaladress'] = ad.postaladress
 ad.put()
 self.session['ad_id'] = ad.key().id()
 else:
 self.render('createnewad.html', {
 'user': self.current_user,
 'session': self.auth.get_user_by_session(),
 'request': self.request,
 'form': form,
 'name': to_unicode_or_bust(form.name.data) #.encode('utf-8')
 })
 return
 if self.request.get('currency'):
 ad.currency = self.request.get('currency')
 self.session['currency'] = ad.currency
 if self.request.get('cg'):
 ad.category = self.request.get('cg')
 self.session['category'] = ad.category
 if self.request.get('company_ad') == '1':
 ad.company_ad = True
 self.session['company_ad'] = 'True'
 ad.put()
 ad.url = self.request.host
 for upload in self.get_uploads():
 try:
 img = Image(reference=ad)
 img.primary_image = upload.key()
 image_url = images.get_serving_url(str(upload.key()), size=640)
 img.put()
 ad.hasimages = True
 ad.image_url = images.get_serving_url(str(upload.key()), size=640)
 ad.put()
 ad.blobs.append(upload.key())
 ad.put()
 except Exception, e:
 logging.error('There was an exception:%s' % str(e))
 pass
 ad.published = False
 if self.request.get('area'):
 city = \
 montaomodel.City.get_by_id(long(self.request.get('area'
 )))
 region = montaomodel.Region.get(city.region.key())
 ad.cities.append(city.key())
 ad.regions.append(region.key())
 ad.city = unicode(city.name)
 ad.region = unicode(region.name)
 ad.put()
 if self.current_user:
 ad.userID = str(self.current_user.auth_ids[0])
 ad.put()
 ad.usr = self.current_user.key.to_old_key()
 ad.put()
 image = ad.matched_images.get()
 image_url = None
 if image:
 if image.primary_image:
 try:
 image_url = \
 images.get_serving_url(str(image.primary_image.key()),
 size=640)
 except Exception, e:
 image_url = '/images/' + str(image.key().id()) \
 + '_small.jpg'
 else:
 image_url = '/images/' + str(image.key().id()) \
 + '_small.jpg'
 imv = []
 for i in ad.matched_images:
 if i.primary_image:
 try:
 i1 = \
 images.get_serving_url(str(i.primary_image.key()))
 imv.append(i1)
 except Exception, e:
 i1 = '/images/' + str(image.key().id()) \
 + '_small.jpg'
 imv.append(i1)
 if ad.price: # and doesn't contain separators
 try:
 price = \
 i18n.I18n(self.request).format_decimal(int(ad.price))
 except Exception, e:
 price = ad.price
 else:
 price = ad.price
 self.render('preview.html', {
 'user': self.current_user,
 'session': self.auth.get_user_by_session(),
 'request': self.request,
 'ad': ad,
 'image_url': image_url,
 'imv': imv,
 'len': len(imv),
 'form': PreviewAdForm(),
 'price': price,
 })
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked May 11, 2015 at 10:52
\$\endgroup\$

2 Answers 2

4
\$\begingroup\$

You've got a massive repetition:

if 'penis' in text:
 self.response.out.write('REMOVED')
 return
if 'Black Money' in text:
 self.response.out.write('REMOVED')
 return
if 'black money' in text:
 self.response.out.write('REMOVED')
 return
if 'BLACK MONEY' in titletest:
 self.response.out.write('REMOVED')
 return
if 'BLACK DOLARS' in titletest:
 self.response.out.write('REMOVED')
 return
if 'Penis' in text:
 self.response.out.write('REMOVED')
 return
if 'penis' in title:
 self.response.out.write('REMOVED')
 return
if 'Penis' in title:
 self.response.out.write('REMOVED')
 return
if 'escort' in text:
 self.response.out.write('REMOVED')
 return
if 'escorts' in text:
 self.response.out.write('REMOVED')
 return
if 'escort' in title:
 self.response.out.write('REMOVED')
 return
if 'escorts' in title:
 self.response.out.write('REMOVED')
 return

You can use a list comprehension to shorten this very much:

BAD_WORDS = ['penis', 'black money', 'escort']
if any([w in text or w.upper() in text or w.capitalize() in text
 for w in BAD_WORDS]):
 self.response.out.write('REMOVED')
 return

Never use bare except:, always tell the expected exception.

answered May 11, 2015 at 11:52
\$\endgroup\$
4
  • \$\begingroup\$ Thank you. I'm still learning python and the way you show is much more concise and better. \$\endgroup\$ Commented May 11, 2015 at 13:12
  • 3
    \$\begingroup\$ any[w in text doesn't look right to me. Shouldn't there be parentheses? \$\endgroup\$ Commented May 11, 2015 at 13:24
  • 3
    \$\begingroup\$ Please take a bit more care over the code snippets, or this happens: stackoverflow.com/q/30168806/3001761 \$\endgroup\$ Commented May 11, 2015 at 13:24
  • 1
    \$\begingroup\$ Wouldn't w in text.lower() be more concise and cover all the cases? \$\endgroup\$ Commented May 11, 2015 at 15:55
3
\$\begingroup\$

Style

You have commented out bits of code. Use version control and don't keep that stuff around (unless you have a very good reason; I don't see one here).

In some cases the formatting is off. The general advice is to follow PEP8, that's very easy to accomplish if you run the corresponding tools, i.e. pep8 or so.

Code

First of all: You should split your functionality into smaller chunks. At the moment you have one massive method which does way too many things.

I'd suggest something along the lines of one method for parsing input, initialising the Ad object, validating input, importing uploaded files, rendering output, etc. That way you can later actually see what the control flow is, because at the moment you have returns mixed into your logic and it's quite confusing to follow.

For filtering certain keywords I'd actually recommend constructing a regular expression for all words. That way you can use the features of the engine to specify patterns more easily than with that loop with lower etc. It might be more efficient as well since you can precompile the regex and run it only once on the whole text. Then again, if you're still learning maybe just keep what you currently have and take a look at it later. See also e.g. this SO post.

import re
BAD_WORDS = ['penis', 'black money', 'escort']
bad_words_regex = re.compile("|".join(BAD_WORDS), re.IGNORECASE)
...
if bad_words_regex.search("some text with BLaCK moNey in it"):
 ...

Build helper functions. If you see a pattern twice (or three times, whatever your threshold is going to be), then extract it into a function, method, class, whatever. But don't repeat yourself.

answered May 11, 2015 at 14:25
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.