Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit e1cefc0

Browse files
Update DataUpdate.py
Removed usage of dask. Replaced with chunk processing
1 parent e320eb2 commit e1cefc0

File tree

1 file changed

+56
-71
lines changed

1 file changed

+56
-71
lines changed

‎DataUpdate.py‎

Lines changed: 56 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -148,34 +148,30 @@ def FAOFBS():
148148
import dask.dataframe as dd
149149
import statsmodels.api as sm
150150

151-
data = dd.read_csv(r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',
152-
encoding="ISO-8859-1")
153-
# data=pd.concat(tp,ignore_index=True)
154-
155-
data['Code'] = data[str('Element Code')] + data[str('Item Code')]
156-
concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv')
157-
158-
data = dd.merge(data, concord_table, how="left", left_on="Item Code", right_on='Code no')
159-
160-
data['Series_Name'] = data[str('Code Name')] + data[str('Element')]
161-
series_concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv')
162-
163-
data.columns = list(data.columns)
164-
data = data.drop(
151+
Country_Concord = pd.read_csv('C:\\Users\Public\Pythonfiles\CountryConcordFAO.csv', encoding="ISO-8859-1")
152+
concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\Aggregation for crop type.csv')
153+
series_concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\FAOSeriesConcordance.csv')
154+
data = pd.read_csv(r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',
155+
encoding="ISO-8859-1",chunksize=100000)
156+
157+
chunk_list=[]
158+
for chunk in data:
159+
chunk['Code'] = chunk[str('Element Code')] + chunk[str('Item Code')]
160+
chunk = pd.merge(chunk, concord_table, how="left", left_on="Item Code", right_on='Code no')
161+
chunk['Series_Name'] = chunk[str('Code Name')] + chunk[str('Element')]
162+
chunk = pd.merge(chunk, series_concord_table, how="left", left_on="Series_Name", right_on="Code in file")
163+
chunk = pd.merge(chunk, Country_Concord, how="left", left_on="Area", right_on='Area Name')
164+
chunk = chunk.dropna(how='any')
165+
chunk = chunk.dropna(how='any')
166+
chunk_list.append(chunk)
167+
168+
data=pd.concat(chunk_list)
169+
170+
data.drop(
165171
['Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element', 'Element Code', 'Code', 'Code Name', 'Item',
166172
'Code no'], axis=1)
167-
print(data.head())
168-
data = data.dropna(how='any')
169-
print(data.head())
170-
171-
data.reset_index()
172173

173-
datapanda = data.compute()
174-
# data=pd.DataFrame(data)
175-
# p= datapanda.pivot_table(index=["Area",'Year'],values=['Value'],
176-
# columns=["Series Name in Ifs"],aggfunc=[np.sum])
177-
178-
p = pd.pivot_table(datapanda, index=["Area", 'Year'], values=['Value'], columns=["Series_Name"], aggfunc=[np.sum])
174+
p = pd.pivot_table(data, index=["Area", 'Year'], values=['Value'], columns=["Series_Name"], aggfunc=[np.sum])
179175

180176
return (p)
181177

@@ -213,24 +209,34 @@ def FAOFBSFish():
213209
import dask.dataframe as dd
214210
import statsmodels.api as sm
215211

216-
data = dd.read_csv('C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',
217-
encoding="ISO-8859-1")
218-
# data=pd.concat(tp,ignore_index=True)
212+
Country_Concord = pd.read_csv('C:\\Users\Public\Pythonfiles\CountryConcordFAO.csv', encoding="ISO-8859-1")
213+
data = pd.read_csv('C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv',
214+
encoding="ISO-8859-1",chunksize=100000)
215+
concord_table = pd.read_csv('C:\\Users\Public\Pythonfiles\AggregationforFish.csv')
216+
chunk_list = []
219217

220-
data['Code'] = data[str('Element Code')] + data[str('Item Code')]
221-
concord_table = dd.read_csv('C:\\Users\Public\Pythonfiles\AggregationforFish.csv')
218+
for chunk in data:
219+
chunk['Code'] = chunk[str('Element Code')] + chunk[str('Item Code')]
220+
chunk= pd.merge(chunk, concord_table, how="left", left_on="Code", right_on='Code in Source')
221+
chunk = pd.merge(chunk, Country_Concord, how="left", left_on="Area", right_on='Area Name')
222+
chunk = chunk.dropna(how='any')
223+
chunk_list.append(chunk)
222224

223-
data=dd.merge(data, concord_table, how="left", left_on="Code", right_on='Code in Source')
225+
data=pd.concat(chunk_list)
224226

225227
data = data.drop(
226228
['Area Code', 'Item Code', 'Flag', 'Unit', 'Year Code', 'Element', 'Element Code', 'Code', 'Item'], axis=1)
227229

228-
data = data.dropna(how='any')
229-
data.reset_index()
230+
#data = data.dropna(how='any')
231+
#print(data.Country.unique())
230232

231-
datapanda = data.compute()
232-
print(datapanda.head())
233-
p = pd.pivot_table(datapanda, index=["Area", 'Year'], values=['Value'], columns=["Variable"], aggfunc=[np.sum])
233+
#print("Dropped irrelevant columns, Na")
234+
#data.reset_index()
235+
236+
#datapanda = data.groupby(["Area","Year","Variable"]).sum().compute()
237+
#print(datapanda.head())
238+
239+
p = pd.pivot_table(data, index=["Country name in IFs", 'Year'], values=['Value'], columns=["Variable"], aggfunc=[np.sum])
234240

235241
return (p)
236242

@@ -385,7 +391,6 @@ def AQUASTATData():
385391

386392
def AQUASTATDataFile():
387393
import pandas as pd
388-
389394
p = AQUASTATData()
390395
p = p.reset_index()
391396
writer = pd.ExcelWriter('AQUASTAT.xlsx', engine='xlsxwriter')
@@ -394,49 +399,31 @@ def AQUASTATDataFile():
394399

395400

396401
def IMFGFSRevenueData():
397-
import requests
398-
import numpy as np
399-
import matplotlib.pyplot as plt
400-
import pandas as pd
401-
import csv
402-
import xlrd
403-
import matplotlib.lines as mlines
404-
import matplotlib.transforms as mtransforms
405-
import xlsxwriter
406-
import statsmodels.api as sm
407-
import dask.dataframe as dd
408-
409-
import requests
410402
import numpy as np
411-
import matplotlib.pyplot as plt
412403
import pandas as pd
413-
import csv
414-
import xlrd
415-
import matplotlib.lines as mlines
416-
import matplotlib.transforms as mtransforms
417-
import xlsxwriter
418-
import statsmodels.api as sm
419404
import dask.dataframe as dd
420-
421-
data = dd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv')
422-
423-
data['FuncSector'] = data[str('Sector Name')] + data[str('Classification Name')]
424-
425405
concord_table = pd.read_excel('C:\\Users\Public\Pythonfiles\CountryConcordanceIMF.xlsx')
406+
data = pd.read_csv('C:\\Users\Public\Pythonfiles\GFSRevenue.csv',chunksize=100000)
407+
chunk_list=[]
408+
for chunk in data:
409+
chunk['FuncSector'] = chunk[str('Sector Name')] + chunk[str('Classification Name')]
410+
chunk = chunk.merge(concord_table, on="Country Name", how='left')
411+
chunk=chunk.rename(columns={"Time Period":"Year"})
412+
chunk = chunk.loc[chunk['Unit Name'] == 'Percent of GDP']
413+
chunk.dropna(how='any')
414+
print(chunk.head())
415+
chunk_list.append(chunk)
416+
data=pd.concat(chunk_list)
426417

427-
data = data.merge(concord_table, on="Country Name", how='left')
428-
data = data.loc[data['Unit Name'] == 'Percent of GDP']
429-
print(data.head())
430418
data = data.drop(
431419
['Country Code', 'Country Name', 'Classification Code', 'Sector Code', 'Unit Code', 'Status', 'Valuation',
432420
'Bases of recording (Gross/Net)', 'Nature of data'], axis=1)
433421

434422
data = data.reset_index()
435-
data = data.compute()
436423

437-
data = data.reset_index()
424+
#data = data.reset_index()
438425

439-
p = pd.pivot_table(data, index=["Country name in IFs", "Unit Name", 'Time Period'], values=['Value'],
426+
p = pd.pivot_table(data, index=["Country name in IFs", "Unit Name", 'Year'], values=['Value'],
440427
columns=['FuncSector'], aggfunc=[np.sum])
441428

442429
return (p)
@@ -617,6 +604,4 @@ def WDIDataFile():
617604
data = WDIData()
618605
writer = pd.ExcelWriter('WDISeries.xlsx', engine='xlsxwriter')
619606
data.to_excel(writer, sheet_name='WDIData', merge_cells=False)
620-
writer.save()
621-
622-
607+
writer.save()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /