3
\$\begingroup\$

I used this API to download some data.

The code is supposed to create a dataframe table using specific fields of an object, and it seems to work, but running slow.

I think it would run faster if I used something instead of json_nomarlize.

def create_table(data: List[dict]) -> pd.DataFrame:
 """ 
 Creates pd.DataFrame using `infocard` values, 
 calculates `income` using `["unified_source"]["step_11"][k]` values.
 Note that `[k]` keys are randomly generated. 
 Please see sample data at the end of the post.
 """
 infocard_container = []
 step_11_container = []
 for i in range(len(data)):
 infocard_container.append(json_normalize(data[i]["infocard"]))
 step_11_subcontainer = []
 try: # if document contains ["unified_source"]["step_11"] field
 for k in data[i]["unified_source"]["step_11"].keys():
 s_df = json_normalize(data[i]["unified_source"]["step_11"][k])[
 ["person", "sizeIncome"]
 ]
 income_sum = s_df.loc[s_df["person"].eq("1")].sum()["sizeIncome"]
 step_11_subcontainer.append(income_sum)
 except KeyError: # if not, NaN would be appended
 step_11_subcontainer.append(np.nan) 
 step_11_container.append(step_11_subcontainer)
 df = pd.concat(infocard_container)
 df["income"] = [sum(i) for i in step_11_container]
 assert len(data) == len(df)
 return df 

I need all data[0]["infocard"] values, and within data[0]["unified_source"]["step_11"][k] values, I need person, sizeIncome values (and source_ua_company_code if possible - it's not in the code). I'm adding all sizeIncome values if the person == 1

Sample data (data[0]):

{'guid': 'nacp_3bb5b983-edd9...',
 'infocard': {'first_name': 'NAME',
 'patronymic': 'SNAME',
 'last_name': 'SURNAME',
 'office': '"OFFICE"',
 'position': 'POSITION"',
 'source': 'NACP',
 'id': 'nacp_3bb5b983-edd9...',
 'url': 'https://declarations.com.ua/declaration/nacp_3bb5b983-edd9...',
 'document_type': 'Yearly',
 'is_corrected': False,
 'created_date': '2018-03-27T00:00:00',
 'declaration_year': 2017},
 'raw_source': {'url': 'https://public-api.nazk.gov.ua/v1/declaration/nacp_3bb5b983-edd9...'},
 'unified_source': {'step_0': {'declarationType': '1',
 'declarationYear1': '2017'},
 'step_1': {'actual_cityType': '[hidden]',
 'actual_country': '',
 'actual_postCode': '[hidden]',
 'actual_street': '[hidden]',
 'actual_streetType': '[hidden]',
 'changedName': False,
 'city': '[hidden]',
 'cityPath': '[hidden]',
 'cityType': '[hidden]',
 'city_extendedstatus': '1',
 'corruptionAffected': 'No',
 'country': '1',
 'countryPath': '',
 'district': '[hidden]',
 'eng_actualAddress': '[hidden]',
 'eng_actualPostCode': '[hidden]',
 'eng_postCode': '',
 'eng_sameRegLivingAddress': '[hidden]',
 'firstname': 'NAME',
 'housePartNum_extendedstatus': '1',
 'lastname': 'SURNAME',
 'middlename': 'SNAME',
 'postCategory': '',
 'postCategory_extendedstatus': '1',
 'postCode': '[hidden]',
 'postType': '',
 'postType_extendedstatus': '1',
 'previous_firstname': '',
 'previous_lastname': '',
 'previous_middlename': '',
 'region': '[hidden]',
 'responsiblePosition': 'Ні',
 'sameRegLivingAddress': '[hidden]',
 'street': '[hidden]',
 'streetType': '[hidden]',
 'ukr_actualAddress': '[hidden]',
 'workPlace': 'workPlace',
 'workPost': 'workPost',
 'dnt_organization_group': 'n'},
 'step_11': {'1493274700481': {'incomeSource': 'j',
 'iteration': '1493274700481',
 'objectType': 'salary',
 'otherObjectType': '',
 'person': '1',
 'rights': {'1': {'citizen': '',
 'eng_company_address': '',
 'eng_company_code': '',
 'eng_company_name': '',
 'eng_firstname': '',
 'eng_fullname': '',
 'eng_lastname': '',
 'eng_middlename': '',
 'eng_middlename_extendedstatus': '',
 'eng_postCode': '',
 'eng_postCode_extendedstatus': '',
 'otherOwnership': '',
 'ownershipType': 'property',
 'percent-ownership': '',
 'postCode': '[hidden]',
 'rightBelongs': '1',
 'rights_cityPath': '',
 'ua_apartmentsNum_extendedstatus': '',
 'ua_city': '',
 'ua_company_code': '',
 'ua_company_name': '',
 'ua_firstname': '',
 'ua_houseNum_extendedstatus': '',
 'ua_housePartNum_extendedstatus': '',
 'ua_lastname': '',
 'ua_middlename': '',
 'ua_middlename_extendedstatus': '',
 'ua_postCode': '',
 'ua_postCode_extendedstatus': '',
 'ua_street': '[hidden]',
 'ua_streetType': '[hidden]',
 'ua_street_extendedstatus': '',
 'ukr_company_address': '',
 'ukr_company_name': '',
 'ukr_firstname': '',
 'ukr_fullname': '',
 'ukr_lastname': '',
 'ukr_middlename': '',
 'ukr_middlename_extendedstatus': ''}},
 'sizeIncome': 44505.0,
 'source_citizen': '_',
 'source_eng_company_address': '',
 'source_eng_company_code': '',
 'source_eng_company_name': '',
 'source_eng_fullname': '',
 'source_ua_company_code': '000000',
 'source_ua_company_code_extendedstatus': '0',
 'source_ua_company_name': '_',
 'source_ua_firstname': '',
 'source_ua_lastname': '',
 'source_ua_middlename': '',
 'source_ua_sameRegLivingAddress': '',
 'source_ukr_company_address': '',
 'source_ukr_company_name': '',
 'source_ukr_fullname': '',
 'dnt_sizeIncome_hidden': False,
 'dnt_objectType_encoded': 'salarymain',
 'dnt_is_foreign': False},
 '1493274779231': {'incomeSource': '1',
 'iteration': '1493274779231',
 'objectType': 'business',
 'otherObjectType': '',
 'person': '1',
 'rights': {'1': {'citizen': '',
 'eng_company_address': '',
 'eng_company_code': '',
 'eng_company_name': '',
 'eng_firstname': '',
 'eng_fullname': '',
 'eng_lastname': '',
 'eng_middlename': '',
 'eng_middlename_extendedstatus': '',
 'eng_postCode': '',
 'eng_postCode_extendedstatus': '',
 'otherOwnership': '',
 'ownershipType': 'property',
 'percent-ownership': '',
 'postCode': '[hidden]',
 'rightBelongs': '1',
 'rights_cityPath': '',
 'ua_apartmentsNum_extendedstatus': '',
 'ua_city': '',
 'ua_company_code': '',
 'ua_company_name': '',
 'ua_firstname': '',
 'ua_houseNum_extendedstatus': '',
 'ua_housePartNum_extendedstatus': '',
 'ua_lastname': '',
 'ua_middlename': '',
 'ua_middlename_extendedstatus': '',
 'ua_postCode': '',
 'ua_postCode_extendedstatus': '',
 'ua_street': '[hidden]',
 'ua_streetType': '[hidden]',
 'ua_street_extendedstatus': '',
 'ukr_company_address': '',
 'ukr_company_name': '',
 'ukr_firstname': '',
 'ukr_fullname': '',
 'ukr_lastname': '',
 'ukr_middlename': '',
 'ukr_middlename_extendedstatus': ''}},
 'sizeIncome': 19100.0,
 'source_citizen': '',
 'source_eng_company_address': '',
 'source_eng_company_code': '',
 'source_eng_company_name': '',
 'source_eng_fullname': '',
 'source_ua_company_code': '',
 'source_ua_company_name': '',
 'source_ua_firstname': '',
 'source_ua_lastname': '',
 'source_ua_middlename': '',
 'source_ua_sameRegLivingAddress': '',
 'source_ukr_company_address': '',
 'source_ukr_company_name': '',
 'source_ukr_fullname': '',
 'dnt_sizeIncome_hidden': False,
 'dnt_objectType_encoded': 'business',
 'dnt_is_foreign': False},
 '1493275433175': {'incomeSource': 'j',
 'iteration': '1493275433175',
 'objectType': 'pension',
 'otherObjectType': '',
 'person': '1',
 'rights': {'1': {'citizen': '',
 'eng_company_address': '',
 'eng_company_code': '',
 'eng_company_name': '',
 'eng_firstname': '',
 'eng_fullname': '',
 'eng_lastname': '',
 'eng_middlename': '',
 'eng_middlename_extendedstatus': '',
 'eng_postCode': '',
 'eng_postCode_extendedstatus': '',
 'otherOwnership': '',
 'ownershipType': 'property',
 'percent-ownership': '',
 'postCode': '[hidden]',
 'rightBelongs': '1',
 'rights_cityPath': '',
 'ua_apartmentsNum_extendedstatus': '',
 'ua_city': '',
 'ua_company_code': '',
 'ua_company_name': '',
 'ua_firstname': '',
 'ua_houseNum_extendedstatus': '',
 'ua_housePartNum_extendedstatus': '',
 'ua_lastname': '',
 'ua_middlename': '',
 'ua_middlename_extendedstatus': '',
 'ua_postCode': '',
 'ua_postCode_extendedstatus': '',
 'ua_street': '[hidden]',
 'ua_streetType': '[hidden]',
 'ua_street_extendedstatus': '',
 'ukr_company_address': '',
 'ukr_company_name': '',
 'ukr_firstname': '',
 'ukr_fullname': '',
 'ukr_lastname': '',
 'ukr_middlename': '',
 'ukr_middlename_extendedstatus': ''}},
 'sizeIncome': 20075.0,
 'source_citizen': '_',
 'source_eng_company_address': '',
 'source_eng_company_code': '',
 'source_eng_company_name': '',
 'source_eng_fullname': '',
 'source_ua_company_code': '',
 'source_ua_company_code_extendedstatus': '2',
 'source_ua_company_name': '_',
 'source_ua_firstname': '',
 'source_ua_lastname': '',
 'source_ua_middlename': '',
 'source_ua_sameRegLivingAddress': '',
 'source_ukr_company_address': '',
 'source_ukr_company_name': '',
 'source_ukr_fullname': '',
 'dnt_sizeIncome_hidden': False,
 'dnt_objectType_encoded': 'pension',
 'dnt_is_foreign': False}},
 'step_3': {'1493273226784': {'city': '[hidden]',
 'cityPath': '[hidden]',
 'costAssessment': 0,
 'costAssessment_extendedstatus': '2',
 'costDate': 0,
 'costDate_extendedstatus': '2',
 'country': '1',
 'district': '[hidden]',
 'iteration': '1493273226784',
 'objectType': '_',
 'otherObjectType': '',
 'owningDate': 'date',
 'person': '1',
 'postCode': '[hidden]',
 'regNumber_extendedstatus': '1',
 'region': '[hidden]',
 'rights': {'1': {'citizen': '',
 'eng_company_address': '',
 'eng_company_code': '',
 'eng_company_name': '',
 'eng_firstname': '',
 'eng_fullname': '',
 'eng_lastname': '',
 'eng_middlename': '',
 'eng_middlename_extendedstatus': '',
 'eng_postCode': '',
 'eng_postCode_extendedstatus': '',
 'otherOwnership': '',
 'ownershipType': '_',
 'percent-ownership': '50',
 'postCode': '[hidden]',
 'rightBelongs': '1',
 'rights_cityPath': '',
 'ua_apartmentsNum_extendedstatus': '',
 'ua_city': '',
 'ua_company_code': '',
 'ua_company_name': '',
 'ua_firstname': '',
 'ua_houseNum_extendedstatus': '',
 'ua_housePartNum_extendedstatus': '',
 'ua_lastname': '',
 'ua_middlename': '',
 'ua_middlename_extendedstatus': '',
 'ua_postCode': '',
 'ua_postCode_extendedstatus': '',
 'ua_street': '[hidden]',
 'ua_streetType': '[hidden]',
 'ua_street_extendedstatus': '',
 'ukr_company_address': '',
 'ukr_company_name': '',
 'ukr_firstname': '',
 'ukr_fullname': '',
 'ukr_lastname': '',
 'ukr_middlename': '',
 'ukr_middlename_extendedstatus': '',
 'dnt_ownershipType_encoded': 'ownproperty'}},
 'totalArea': 32.3,
 'ua_cityType': '_',
 'ua_housePartNum_extendedstatus': '1',
 'ua_postCode': '_',
 'ua_street': '[hidden]',
 'ua_streetType': '[hidden]',
 'dnt_costDate_hidden': True,
 'dnt_costAssessment_hidden': True,
 'dnt_totalArea_hidden': False,
 'dnt_objectType_encoded': 'apt'}}},
 'related_entities': {'people': {'family': []},
 'documents': {'corrected': [], 'originals': []},
 'companies': {'owned': [], 'related': ['_'], 'all': ['_']}}}
asked Sep 23, 2019 at 10:39
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

Use some generators.

Separate out your logic for iterating through your data. For instance, don't form a "subcontainer" list at all since you're just summing it. Also, use values() instead of keys() for your situation.

def step_11(datum: dict)
 for v in datum["unified_source"]["step_11"].values():
 s_df = json_normalize(v)[
 ["person", "sizeIncome"]
 ]
 yield s_df.loc[s_df["person"].eq("1")].sum()["sizeIncome"]
...
df['income'] = [sum(step_11(d)) for d in data]
answered Sep 23, 2019 at 14:31
\$\endgroup\$
1
  • 1
    \$\begingroup\$ data and i will raise a NameError unless they are global. Should this be for v in datum[...].values()? \$\endgroup\$ Commented Sep 24, 2019 at 17:20

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.