I wrote a small aggregator app that aggregates values from a json http post request and outputs the aggregated values. Now the aggregator function is somewhat large but the output appears correct.
I start it with gunicorn:
$ gunicorn --workers 2 --threads 4 aggregator:api
I test it with curl :
$ curl -X POST -d @aggregator.json http://localhost:8000/aggregate
It outputs the aggregated values.
import falcon
import json
from datetime import datetime
from dateutil import tz
import requests
import time
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('Europe/Stockholm')
class DataService:
def on_post(self, req, resp):
data = json.loads(req.stream.read().decode('utf-8'))
# output the data, we could write it to persistent storage here
print(data)
class AggregatorService:
def zero_if_none(self, value):
if value is not None:
x = int(value)
else:
x = 0
return x
# return the start day of consumption e.g. 1 for 2014年12月01日
def get_day_start(self, hours):
return int(datetime.fromtimestamp(
int(hours[0][0])
).strftime('%d'))
# return the month number e.g. 2 for February
def get_month_start(self, hours):
return int(datetime.fromtimestamp(
int(hours[0][0])
).strftime('%m'))
# return the day of month number e.g. 5 for 2015年01月05日
def get_day(self, hour):
return int(datetime.fromtimestamp(
int(hour[0])
).strftime('%d'))
# return the month number for a timestamp
def get_month(self, hour):
return int(datetime.fromtimestamp(
int(hour[0])
).strftime('%m'))
def on_post(self, req, resp):
data = json.loads(req.stream.read().decode('utf-8'))
hours = data['hours']
day_start = self.get_day_start(hours)
month_start = self.get_month_start(hours)
aggr_daily_wh = 0
aggr_monthly_wh = 0
aggr_daily_th = 0
aggr_monthly_th = 0
jdict = {}
jdict['user'] = data['user']
jhours = []
jdays = []
jmonths = []
last_h = 0
last_day_wh = 0
last_day_th = 0
last_month_wh = 0
last_month_th = 0
for hour in hours:
day = self.get_day(hour)
print("day %d" % day)
month = self.get_month(hour)
print("month %d" % month)
utime = datetime.fromtimestamp(
int(hour[0])
).strftime('%Y-%m-%d %H:%M:%S')
utc = datetime.strptime(utime, '%Y-%m-%d %H:%M:%S')
# Tell the datetime object that it's in UTC time zone since
# datetime objects are 'naive' by default
utc = utc.replace(tzinfo=from_zone)
# Convert time zone and change the timestamp
tstamp = int(time.mktime(utc.astimezone(to_zone).timetuple()))
# consumption is 0 if there is no value
wh = self.zero_if_none(hour[1])
th = self.zero_if_none(hour[2])
# append hourly comsumption
jhours.append([tstamp, wh, th])
if day == day_start: # aggregate daily comsumption
aggr_daily_wh += wh
aggr_daily_th += th
else: # new day
# append daily comsumption
jdays.append([tstamp, aggr_daily_wh, aggr_daily_th])
# begin new day
day_start = day
aggr_daily_wh = 0
aggr_daily_th = 0
aggr_daily_wh += wh
aggr_daily_th += th
if month == month_start: # aggregate monthly consumption
print("adding from month %d" % month)
aggr_monthly_wh += wh
aggr_monthly_th += th
else: # new month
# append monthly comsumption
jmonths.append([int(tstamp), aggr_monthly_wh, aggr_monthly_th])
# begin new month
month_start = month
aggr_monthly_wh = 0
aggr_monthly_th = 0
aggr_monthly_wh += wh
aggr_monthly_th += th
# make the values from the last iteration visible outside the loop
last_h = tstamp
last_month_wh = aggr_monthly_wh
last_month_th = aggr_monthly_th
last_day_wh = aggr_daily_wh
last_day_th = aggr_daily_th
# append the last values
jdays.append([last_h, last_day_wh, last_day_th])
jmonths.append([last_h, last_month_wh, last_month_th])
# create the json dictionary
jdict['hours'] = jhours
jdict['days'] = jdays
jdict['months'] = jmonths
r = requests.post('http://localhost:8000/store', json=jdict)
api = falcon.API()
api.add_route('/aggregate', AggregatorService())
api.add_route('/store', DataService())
aggregator.json
{
"user": 42,
"hours": [
[
1417392000,
1,
null
],
[
1417395600,
2,
3
],
[
1417647600,
3,
2
],
[
1417651200,
null,
6
],
[
1422835200,
6,
2
],
[
1423094400,
4,
3
]
]
}
1 Answer 1
Store disimillar data as a dict, not a list
In each hour
in hours
of your JSON, you have a list that stores:
- a date in Unix format
- two integers that can also be null
It would be clearer and more standard if, instead of a list, you used a dict. As in:
"hours": [
{
(your_label_1): 1417392000,
(your_label_2): 1,
(your_label_3): null
},
This would aid greatly in getting across what the code is doing.
(I still don't exactly what the second and third values represent. Neither of the wh
and th
abreviations are really very helpful.)
Perform all time operations in Unix time
An advantage of Unix time is that dates and times can be added and subtracted without separate handlers to take care of days, hours etc.
This would dramatically shorten your on_post
function.
So, first do all operations in Unix time format (UTC time zone), and finally convert to days/hours/minutes only when you need to display the time information to the user.
(Note: Your wh
and th
variables (in units of hours in your code) would need to be changed to units of seconds. This could either be done by extracting the hour value from the JSON and multiplying by 60, or changing the JSON format to seconds to match the Unix times.)
(Also note: If you have no control over the JSON format, then most of my points above won't exactly apply to you.)