Commit e1cefc0

committed

Update DataUpdate.py

Removed usage of dask. Replaced with chunk processing

1 parent e320eb2 commit e1cefc0Copy full SHA for e1cefc0

File tree

1 file changed

+56

-71

lines changed

DataUpdate.py

1 file changed

+56

-71

lines changed

`‎DataUpdate.py‎`

Lines changed: 56 additions & 71 deletions

Original file line number	Diff line number	Diff line change
`@@ -148,34 +148,30 @@ def FAOFBS():`
`148`	`148`	`import dask.dataframe as dd`
`149`	`149`	`import statsmodels.api as sm`
`150`	`150`
`151`		`- data = dd.read_csv(r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',`
`152`		`- encoding="ISO-8859-1")`
`153`		`- # data=pd.concat(tp,ignore_index=True)`
`154`		`-`
`155`		`- data['Code'] = data[str('Element Code')] + data[str('Item Code')]`
`156`		`- concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv')`
`157`		`-`
`158`		`- data = dd.merge(data, concord_table, how="left", left_on="Item Code", right_on='Code no')`
`159`		`-`
`160`		`- data['Series_Name'] = data[str('Code Name')] + data[str('Element')]`
`161`		`- series_concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv')`
`162`		`-`
`163`		`- data.columns = list(data.columns)`
`164`		`- data = data.drop(`
	`151`	`+ Country_Concord = pd.read_csv('C:\\Users\Public\Pythonfiles\CountryConcordFAO.csv', encoding="ISO-8859-1")`
	`152`	`+ concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv')`
	`153`	`+ series_concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv')`
	`154`	`+ data = pd.read_csv(r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',`
	`155`	`+ encoding="ISO-8859-1",chunksize=100000)`
	`156`	`+`
	`157`	`+ chunk_list=[]`
	`158`	`+ for chunk in data:`
	`159`	`+ chunk['Code'] = chunk[str('Element Code')] + chunk[str('Item Code')]`
	`160`	`+ chunk = pd.merge(chunk, concord_table, how="left", left_on="Item Code", right_on='Code no')`
	`161`	`+ chunk['Series_Name'] = chunk[str('Code Name')] + chunk[str('Element')]`
	`162`	`+ chunk = pd.merge(chunk, series_concord_table, how="left", left_on="Series_Name", right_on="Code in file")`
	`163`	`+ chunk = pd.merge(chunk, Country_Concord, how="left", left_on="Area", right_on='Area Name')`
	`164`	`+ chunk = chunk.dropna(how='any')`
	`165`	`+ chunk = chunk.dropna(how='any')`
	`166`	`+ chunk_list.append(chunk)`
	`167`	`+`
	`168`	`+ data=pd.concat(chunk_list)`
	`169`	`+`
	`170`	`+ data.drop(`
`165`	`171`	`['Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element', 'Element Code', 'Code', 'Code Name', 'Item',`
`166`	`172`	`'Code no'], axis=1)`
`167`		`- print(data.head())`
`168`		`- data = data.dropna(how='any')`
`169`		`- print(data.head())`
`170`		`-`
`171`		`- data.reset_index()`
`172`	`173`
`173`		`- datapanda = data.compute()`
`174`		`- # data=pd.DataFrame(data)`
`175`		`- # p= datapanda.pivot_table(index=["Area",'Year'],values=['Value'],`
`176`		`- # columns=["Series Name in Ifs"],aggfunc=[np.sum])`
`177`		`-`
`178`		`- p = pd.pivot_table(datapanda, index=["Area", 'Year'], values=['Value'], columns=["Series_Name"], aggfunc=[np.sum])`
	`174`	`+ p = pd.pivot_table(data, index=["Area", 'Year'], values=['Value'], columns=["Series_Name"], aggfunc=[np.sum])`
`179`	`175`
`180`	`176`	`return (p)`
`181`	`177`
`@@ -213,24 +209,34 @@ def FAOFBSFish():`
`213`	`209`	`import dask.dataframe as dd`
`214`	`210`	`import statsmodels.api as sm`
`215`	`211`
`216`		`- data = dd.read_csv('C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',`
`217`		`- encoding="ISO-8859-1")`
`218`		`- # data=pd.concat(tp,ignore_index=True)`
	`212`	`+ Country_Concord = pd.read_csv('C:\\Users\Public\Pythonfiles\CountryConcordFAO.csv', encoding="ISO-8859-1")`
	`213`	`+ data = pd.read_csv('C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',`
	`214`	`+ encoding="ISO-8859-1",chunksize=100000)`
	`215`	`+ concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\AggregationforFish.csv')`
	`216`	`+ chunk_list = []`
`219`	`217`
`220`		`- data['Code'] = data[str('Element Code')] + data[str('Item Code')]`
`221`		`- concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\AggregationforFish.csv')`
	`218`	`+ for chunk in data:`
	`219`	`+ chunk['Code'] = chunk[str('Element Code')] + chunk[str('Item Code')]`
	`220`	`+ chunk= pd.merge(chunk, concord_table, how="left", left_on="Code", right_on='Code in Source')`
	`221`	`+ chunk = pd.merge(chunk, Country_Concord, how="left", left_on="Area", right_on='Area Name')`
	`222`	`+ chunk = chunk.dropna(how='any')`
	`223`	`+ chunk_list.append(chunk)`
`222`	`224`
`223`		`- data=dd.merge(data, concord_table, how="left", left_on="Code", right_on='Code in Source')`
	`225`	`+ data=pd.concat(chunk_list)`
`224`	`226`
`225`	`227`	`data = data.drop(`
`226`	`228`	`['Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element', 'Element Code', 'Code', 'Item'], axis=1)`
`227`	`229`
`228`		`- data = data.dropna(how='any')`
`229`		`- data.reset_index()`
	`230`	`+ #data = data.dropna(how='any')`
	`231`	`+ #print(data.Country.unique())`
`230`	`232`
`231`		`- datapanda = data.compute()`
`232`		`- print(datapanda.head())`
`233`		`- p = pd.pivot_table(datapanda, index=["Area", 'Year'], values=['Value'], columns=["Variable"], aggfunc=[np.sum])`
	`233`	`+ #print("Dropped irrelevant columns, Na")`
	`234`	`+ #data.reset_index()`
	`235`	`+`
	`236`	`+ #datapanda = data.groupby(["Area","Year","Variable"]).sum().compute()`
	`237`	`+ #print(datapanda.head())`
	`238`	`+`
	`239`	`+ p = pd.pivot_table(data, index=["Country name in IFs", 'Year'], values=['Value'], columns=["Variable"], aggfunc=[np.sum])`
`234`	`240`
`235`	`241`	`return (p)`
`236`	`242`
`@@ -385,7 +391,6 @@ def AQUASTATData():`
`385`	`391`
`386`	`392`	`def AQUASTATDataFile():`
`387`	`393`	`import pandas as pd`
`388`		`-`
`389`	`394`	`p = AQUASTATData()`
`390`	`395`	`p = p.reset_index()`
`391`	`396`	`writer = pd.ExcelWriter('AQUASTAT.xlsx', engine='xlsxwriter')`
`@@ -394,49 +399,31 @@ def AQUASTATDataFile():`
`394`	`399`
`395`	`400`
`396`	`401`	`def IMFGFSRevenueData():`
`397`		`- import requests`
`398`		`- import numpy as np`
`399`		`- import matplotlib.pyplot as plt`
`400`		`- import pandas as pd`
`401`		`- import csv`
`402`		`- import xlrd`
`403`		`- import matplotlib.lines as mlines`
`404`		`- import matplotlib.transforms as mtransforms`
`405`		`- import xlsxwriter`
`406`		`- import statsmodels.api as sm`
`407`		`- import dask.dataframe as dd`
`408`		`-`
`409`		`- import requests`
`410`	`402`	`import numpy as np`
`411`		`- import matplotlib.pyplot as plt`
`412`	`403`	`import pandas as pd`
`413`		`- import csv`
`414`		`- import xlrd`
`415`		`- import matplotlib.lines as mlines`
`416`		`- import matplotlib.transforms as mtransforms`
`417`		`- import xlsxwriter`
`418`		`- import statsmodels.api as sm`
`419`	`404`	`import dask.dataframe as dd`
`420`		`-`
`421`		`- data = dd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv')`
`422`		`-`
`423`		`- data['FuncSector'] = data[str('Sector Name')] + data[str('Classification Name')]`
`424`		`-`
`425`	`405`	`concord_table = pd.read_excel('C:\\Users\Public\Pythonfiles\CountryConcordanceIMF.xlsx')`
	`406`	`+ data = pd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv',chunksize=100000)`
	`407`	`+ chunk_list=[]`
	`408`	`+ for chunk in data:`
	`409`	`+ chunk['FuncSector'] = chunk[str('Sector Name')] + chunk[str('Classification Name')]`
	`410`	`+ chunk = chunk.merge(concord_table, on="Country Name", how='left')`
	`411`	`+ chunk=chunk.rename(columns={"Time Period":"Year"})`
	`412`	`+ chunk = chunk.loc[chunk['Unit Name'] == 'Percent of GDP']`
	`413`	`+ chunk.dropna(how='any')`
	`414`	`+ print(chunk.head())`
	`415`	`+ chunk_list.append(chunk)`
	`416`	`+ data=pd.concat(chunk_list)`
`426`	`417`
`427`		`- data = data.merge(concord_table, on="Country Name", how='left')`
`428`		`- data = data.loc[data['Unit Name'] == 'Percent of GDP']`
`429`		`- print(data.head())`
`430`	`418`	`data = data.drop(`
`431`	`419`	`['Country Code', 'Country Name', 'Classification Code', 'Sector Code', 'Unit Code', 'Status', 'Valuation',`
`432`	`420`	`'Bases of recording (Gross/Net)', 'Nature of data'], axis=1)`
`433`	`421`
`434`	`422`	`data = data.reset_index()`
`435`		`- data = data.compute()`
`436`	`423`
`437`		`- data = data.reset_index()`
	`424`	`+ #data = data.reset_index()`
`438`	`425`
`439`		`- p = pd.pivot_table(data, index=["Country name in IFs", "Unit Name", 'Time Period'], values=['Value'],`
	`426`	`+ p = pd.pivot_table(data, index=["Country name in IFs", "Unit Name", 'Year'], values=['Value'],`
`440`	`427`	`columns=['FuncSector'], aggfunc=[np.sum])`
`441`	`428`
`442`	`429`	`return (p)`
`@@ -617,6 +604,4 @@ def WDIDataFile():`
`617`	`604`	`data = WDIData()`
`618`	`605`	`writer = pd.ExcelWriter('WDISeries.xlsx', engine='xlsxwriter')`
`619`	`606`	`data.to_excel(writer, sheet_name='WDIData', merge_cells=False)`
`620`		`- writer.save()`
`621`		`-`
`622`		`-`
	`607`	`+ writer.save()`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit e1cefc0

File tree

1 file changed

1 file changed

`‎DataUpdate.py‎`

0 commit comments