2
\$\begingroup\$

Can someone please review my code? I'm saving a hotel rate to a database. I want to save the rate twice, one field will update with every new rate, and one field will contain the rate that was found for that date the first time it was captured.

The code looks very clunky and I know it can be improved.

def save_results(rates, session, hotel, govt):
 for item in rates:
 rate = Rate(**item)
 try:
 # check if already in database
 q = session.query(Rate).filter(Rate.hotel==hotel['object'], Rate.arrive==rate.arrive).first()
 # update inital_rate if that field is empty
 if q:
 if 'govt_rate' in item and q.govt_rate_initial is None:
 q.govt_rate_initial = rate.govt_rate
 elif 'commercial_rate' in item and q.commercial_rate_initial is None:
 q.commercial_rate_initial = rate.commercial_rate
 if q and govt is True:
 q.updated = datetime.utcnow()
 q.govt_rate = rate.govt_rate
 elif q and govt is False:
 q.updated = datetime.utcnow()
 q.commercial_rate = rate.commercial_rate
 else:
 if govt is True:
 rate.govt_rate_initial = rate.govt_rate
 elif govt is False:
 rate.commercial_rate_initial = rate.commercial_rate
 hotel['object'].rates.append(rate)
 session.commit()
 except:
 session.rollback()
 raise

Full code is below for reference. I would appreciate comments on any other portion as well!

# models
class Location(Base):
 __tablename__ = 'locations'
 id = Column(Integer, primary_key=True)
 city = Column(String(50), nullable=False, unique=True)
 per_diem_rate = Column(Numeric(6, 2))
 hotels = relationship('Hotel', back_populates='location')
class Hotel(Base):
 __tablename__ = 'hotels'
 id = Column(Integer, primary_key=True)
 name = Column(String(100), nullable=False, unique=True)
 phone_number = Column(String(20))
 parking_fee = Column(String(10))
 location_id = Column(Integer, ForeignKey('locations.id'), nullable=False)
 location = relationship('Location', back_populates='hotels')
 rates = relationship('Rate', back_populates='hotel', order_by='Rate.arrive', lazy='joined')
class Rate(Base):
 __tablename__ = 'rates'
 id = Column(Integer, primary_key=True)
 govt_rate = Column(Numeric(6, 2))
 govt_rate_initial = Column(Numeric(6, 2))
 commercial_rate = Column(Numeric(6, 2))
 commercial_rate_initial = Column(Numeric(6, 2))
 arrive = Column(Date, nullable=False)
 govt_link = Column(String(500))
 commercial_link = Column(String(500))
 updated = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
 hotel_id = Column(Integer, ForeignKey('hotels.id'), nullable=False)
 hotel = relationship('Hotel', back_populates='rates')
def scrape_marriott(HOTELS_TO_SCRAPE):
 # create db session
 session = create_db_session()
 good = 0
 bad = 0
 # loop through list of hotels to scrape
 for item in HOTELS_TO_SCRAPE:
 try:
 # get or create a hotel linked to a location
 location = get_or_create(session, Location, city=item['city'])
 hotel = get_or_create(session, Hotel, name=item['name'], location=location)
 # create a hotel dictionary to pass to the other functions
 hotel = {'property_code': item['property_code'], 'object': hotel}
 # govt rates
 # get rates dictionary
 rates = get_rates(hotel, govt=True)
 # save to database
 save_results(rates, session, hotel, govt=True)
 time.sleep(randint(20, 30))
 # commercial rates
 # get rates dictionary
 rates = get_rates(hotel, govt=False)
 # save to database
 save_results(rates, session, hotel, govt=False)
 # log result and increase 'good process' counter
 print(item['name'] + ' processed successfully')
 good += 1
 # wait between 30 and 60 seconds before next loop
 time.sleep(randint(30, 60))
 except (AttributeError, TypeError, ConnectionError) as e:
 # log exception
 print('Error occured for ' + item['name'] + '. ' + e)
 email_message('Error occured for ' + item['name'] + '. ' + e)
 bad += 1
 continue
 print('{} processed, {} failed'.format(good, bad))
 email_message('{} processed, {} failed'.format(good, bad))
 session.close()
def get_rates(hotel, govt):
 dates = build_dates()
 rates = []
 # get rates for this month and next month
 for d in dates:
 soup = get_soup(d['arrive'], d['depart'], hotel, govt)
 rates += parse_rates(soup, govt)
 time.sleep(randint(2, 5))
 # remove duplicates
 filtered = []
 for i in range(0, len(rates)):
 if rates[i] not in rates[i + 1:]:
 filtered.append(rates[i])
 rates = filtered
 return rates
def get_soup(arrive, depart, hotel, govt):
 if govt is True:
 rateCode = 'GOV'
 else:
 rateCode = 'none'
 browser = RoboBrowser(parser='html.parser')
 browser.open('http://www.urlremoved?propertyCode=' + hotel['property_code'])
 time.sleep(1)
 form = browser.get_form(action='/reservation/availabilitySearch.mi?isSearch=false')
 form['fromDate'].value = arrive
 form['toDate'].value = depart
 form['flexibleDateSearch'] = 'true'
 form['clusterCode'] = rateCode
 # submit form
 browser.submit_form(form)
 return browser
def parse_rates(soup, govt):
 # get calendar links
 table = soup.find('table')
 urls = table.find_all('a', class_='t-no-decor')
 rates = []
 # loop through urls and parse each query string
 for item in urls:
 if len(item["class"]) == 1:
 # strip newlines and tabs
 raw_url = item['href'].replace('\n', '').replace('\t', '').replace(' ', '')
 parsed_url = urlparse(raw_url)
 query = parse_qs(parsed_url.query)
 # convert date to datetime format
 res_date = query['fromDate'][0]
 res_date = datetime.strptime(res_date, '%m/%d/%y')
 if govt == True:
 # append data to rates list
 rates.append({
 'arrive': res_date,
 'govt_rate': query['rate'][0],
 'govt_link': 'https://marriott.com' + urlunparse(parsed_url)
 })
 elif govt == False:
 # append data to rates list
 rates.append({
 'arrive': res_date,
 'commercial_rate': query['rate'][0],
 'commercial_link': 'https://marriott.com' + urlunparse(parsed_url)
 })
 return rates
def save_results(rates, session, hotel, govt):
 for item in rates:
 rate = Rate(**item)
 try:
 # check if already in database
 q = session.query(Rate).filter(Rate.hotel==hotel['object'], Rate.arrive==rate.arrive).first()
 # update inital_rate if that field is empty
 if q:
 if 'govt_rate' in item and q.govt_rate_initial is None:
 q.govt_rate_initial = rate.govt_rate
 elif 'commercial_rate' in item and q.commercial_rate_initial is None:
 q.commercial_rate_initial = rate.commercial_rate
 if q and govt is True:
 q.updated = datetime.utcnow()
 q.govt_rate = rate.govt_rate
 elif q and govt is False:
 q.updated = datetime.utcnow()
 q.commercial_rate = rate.commercial_rate
 else:
 if govt is True:
 rate.govt_rate_initial = rate.govt_rate
 elif govt is False:
 rate.commercial_rate_initial = rate.commercial_rate
 hotel['object'].rates.append(rate)
 session.commit()
 except:
 session.rollback()
 raise
asked Dec 13, 2016 at 0:10
\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

Without changing the rest of the data structure, your try clause can be shortened:

 try:
 q = session.query(Rate).filter(Rate.hotel==hotel['object'], Rate.arrive==rate.arrive).first()
 if govt is True:
 sector = "govt"
 else:
 sector = "commercial"
 if q:
 if 'govt_rate' in item:
 sector = "govt"
 elif 'commercial_rate' in item:
 sector = "commercial"
 if q[sector + "_rate_initial"] is None:
 q[sector + "_rate_initial"] = rate[sector + "rate"]
 else:
 rate[sector + "_rate_initial"] = rate[sector + "_rate"]
 hotel['object'].rates.append(rate)

(This assumes you want the govt argument to save_results to be over-ridden by existing data in the field in cases where they don't match.)

Ideally, your fields should have a level for "sector". So instead of string concatenation as I've done, you would have:

q[sector].rate_initial = rate[sector].rate
answered Dec 13, 2016 at 1:04
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.