@@ -148,34 +148,30 @@ def FAOFBS():
148148 import dask .dataframe as dd
149149 import statsmodels .api as sm
150150
151- data = dd .read_csv (r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
152- encoding = "ISO-8859-1" )
153- # data=pd.concat(tp,ignore_index=True)
154- 155- data ['Code' ] = data [str ('Element Code' )] + data [str ('Item Code' )]
156- concord_table = dd .read_csv ('C:\\ Users\Public\Pythonfiles\Aggregation for crop type.csv' )
157- 158- data = dd .merge (data , concord_table , how = "left" , left_on = "Item Code" , right_on = 'Code no' )
159- 160- data ['Series_Name' ] = data [str ('Code Name' )] + data [str ('Element' )]
161- series_concord_table = dd .read_csv ('C:\\ Users\Public\Pythonfiles\FAOSeriesConcordance.csv' )
162- 163- data .columns = list (data .columns )
164- data = data .drop (
151+ Country_Concord = pd .read_csv ('C:\\ Users\Public\Pythonfiles\CountryConcordFAO.csv' , encoding = "ISO-8859-1" )
152+ concord_table = pd .read_csv ('C:\\ Users\Public\Pythonfiles\Aggregation for crop type.csv' )
153+ series_concord_table = pd .read_csv ('C:\\ Users\Public\Pythonfiles\FAOSeriesConcordance.csv' )
154+ data = pd .read_csv (r'C:\\Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
155+ encoding = "ISO-8859-1" ,chunksize = 100000 )
156+ 157+ chunk_list = []
158+ for chunk in data :
159+ chunk ['Code' ] = chunk [str ('Element Code' )] + chunk [str ('Item Code' )]
160+ chunk = pd .merge (chunk , concord_table , how = "left" , left_on = "Item Code" , right_on = 'Code no' )
161+ chunk ['Series_Name' ] = chunk [str ('Code Name' )] + chunk [str ('Element' )]
162+ chunk = pd .merge (chunk , series_concord_table , how = "left" , left_on = "Series_Name" , right_on = "Code in file" )
163+ chunk = pd .merge (chunk , Country_Concord , how = "left" , left_on = "Area" , right_on = 'Area Name' )
164+ chunk = chunk .dropna (how = 'any' )
165+ chunk = chunk .dropna (how = 'any' )
166+ chunk_list .append (chunk )
167+ 168+ data = pd .concat (chunk_list )
169+ 170+ data .drop (
165171 ['Area Code' , 'Item Code' , 'Flag' , 'Unit' , 'Year Code' , 'Element' , 'Element Code' , 'Code' , 'Code Name' , 'Item' ,
166172 'Code no' ], axis = 1 )
167- print (data .head ())
168- data = data .dropna (how = 'any' )
169- print (data .head ())
170- 171- data .reset_index ()
172173
173- datapanda = data .compute ()
174- # data=pd.DataFrame(data)
175- # p= datapanda.pivot_table(index=["Area",'Year'],values=['Value'],
176- # columns=["Series Name in Ifs"],aggfunc=[np.sum])
177- 178- p = pd .pivot_table (datapanda , index = ["Area" , 'Year' ], values = ['Value' ], columns = ["Series_Name" ], aggfunc = [np .sum ])
174+ p = pd .pivot_table (data , index = ["Area" , 'Year' ], values = ['Value' ], columns = ["Series_Name" ], aggfunc = [np .sum ])
179175
180176 return (p )
181177
@@ -213,24 +209,34 @@ def FAOFBSFish():
213209 import dask .dataframe as dd
214210 import statsmodels .api as sm
215211
216- data = dd .read_csv ('C:\\ Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
217- encoding = "ISO-8859-1" )
218- # data=pd.concat(tp,ignore_index=True)
212+ Country_Concord = pd .read_csv ('C:\\ Users\Public\Pythonfiles\CountryConcordFAO.csv' , encoding = "ISO-8859-1" )
213+ data = pd .read_csv ('C:\\ Users\Public\Pythonfiles\FoodBalanceSheets_E_All_Data_(Normalized).csv' ,
214+ encoding = "ISO-8859-1" ,chunksize = 100000 )
215+ concord_table = pd .read_csv ('C:\\ Users\Public\Pythonfiles\AggregationforFish.csv' )
216+ chunk_list = []
219217
220- data ['Code' ] = data [str ('Element Code' )] + data [str ('Item Code' )]
221- concord_table = dd .read_csv ('C:\\ Users\Public\Pythonfiles\AggregationforFish.csv' )
218+ for chunk in data :
219+ chunk ['Code' ] = chunk [str ('Element Code' )] + chunk [str ('Item Code' )]
220+ chunk = pd .merge (chunk , concord_table , how = "left" , left_on = "Code" , right_on = 'Code in Source' )
221+ chunk = pd .merge (chunk , Country_Concord , how = "left" , left_on = "Area" , right_on = 'Area Name' )
222+ chunk = chunk .dropna (how = 'any' )
223+ chunk_list .append (chunk )
222224
223- data = dd . merge ( data , concord_table , how = "left" , left_on = "Code" , right_on = 'Code in Source' )
225+ data = pd . concat ( chunk_list )
224226
225227 data = data .drop (
226228 ['Area Code' , 'Item Code' , 'Flag' , 'Unit' , 'Year Code' , 'Element' , 'Element Code' , 'Code' , 'Item' ], axis = 1 )
227229
228- data = data .dropna (how = 'any' )
229- data .reset_index ( )
230+ # data = data.dropna(how='any')
231+ #print( data.Country.unique() )
230232
231- datapanda = data .compute ()
232- print (datapanda .head ())
233- p = pd .pivot_table (datapanda , index = ["Area" , 'Year' ], values = ['Value' ], columns = ["Variable" ], aggfunc = [np .sum ])
233+ #print("Dropped irrelevant columns, Na")
234+ #data.reset_index()
235+ 236+ #datapanda = data.groupby(["Area","Year","Variable"]).sum().compute()
237+ #print(datapanda.head())
238+ 239+ p = pd .pivot_table (data , index = ["Country name in IFs" , 'Year' ], values = ['Value' ], columns = ["Variable" ], aggfunc = [np .sum ])
234240
235241 return (p )
236242
@@ -385,7 +391,6 @@ def AQUASTATData():
385391
386392def AQUASTATDataFile ():
387393 import pandas as pd
388- 389394 p = AQUASTATData ()
390395 p = p .reset_index ()
391396 writer = pd .ExcelWriter ('AQUASTAT.xlsx' , engine = 'xlsxwriter' )
@@ -394,49 +399,31 @@ def AQUASTATDataFile():
394399
395400
396401def IMFGFSRevenueData ():
397- import requests
398- import numpy as np
399- import matplotlib .pyplot as plt
400- import pandas as pd
401- import csv
402- import xlrd
403- import matplotlib .lines as mlines
404- import matplotlib .transforms as mtransforms
405- import xlsxwriter
406- import statsmodels .api as sm
407- import dask .dataframe as dd
408- 409- import requests
410402 import numpy as np
411- import matplotlib .pyplot as plt
412403 import pandas as pd
413- import csv
414- import xlrd
415- import matplotlib .lines as mlines
416- import matplotlib .transforms as mtransforms
417- import xlsxwriter
418- import statsmodels .api as sm
419404 import dask .dataframe as dd
420- 421- data = dd .read_csv ('C:\\ Users\Public\Pythonfiles\GFSRevenue.csv' )
422- 423- data ['FuncSector' ] = data [str ('Sector Name' )] + data [str ('Classification Name' )]
424- 425405 concord_table = pd .read_excel ('C:\\ Users\Public\Pythonfiles\CountryConcordanceIMF.xlsx' )
406+ data = pd .read_csv ('C:\\ Users\Public\Pythonfiles\GFSRevenue.csv' ,chunksize = 100000 )
407+ chunk_list = []
408+ for chunk in data :
409+ chunk ['FuncSector' ] = chunk [str ('Sector Name' )] + chunk [str ('Classification Name' )]
410+ chunk = chunk .merge (concord_table , on = "Country Name" , how = 'left' )
411+ chunk = chunk .rename (columns = {"Time Period" :"Year" })
412+ chunk = chunk .loc [chunk ['Unit Name' ] == 'Percent of GDP' ]
413+ chunk .dropna (how = 'any' )
414+ print (chunk .head ())
415+ chunk_list .append (chunk )
416+ data = pd .concat (chunk_list )
426417
427- data = data .merge (concord_table , on = "Country Name" , how = 'left' )
428- data = data .loc [data ['Unit Name' ] == 'Percent of GDP' ]
429- print (data .head ())
430418 data = data .drop (
431419 ['Country Code' , 'Country Name' , 'Classification Code' , 'Sector Code' , 'Unit Code' , 'Status' , 'Valuation' ,
432420 'Bases of recording (Gross/Net)' , 'Nature of data' ], axis = 1 )
433421
434422 data = data .reset_index ()
435- data = data .compute ()
436423
437- data = data .reset_index ()
424+ # data = data.reset_index()
438425
439- p = pd .pivot_table (data , index = ["Country name in IFs" , "Unit Name" , 'Time Period ' ], values = ['Value' ],
426+ p = pd .pivot_table (data , index = ["Country name in IFs" , "Unit Name" , 'Year ' ], values = ['Value' ],
440427 columns = ['FuncSector' ], aggfunc = [np .sum ])
441428
442429 return (p )
@@ -617,6 +604,4 @@ def WDIDataFile():
617604 data = WDIData ()
618605 writer = pd .ExcelWriter ('WDISeries.xlsx' , engine = 'xlsxwriter' )
619606 data .to_excel (writer , sheet_name = 'WDIData' , merge_cells = False )
620- writer .save ()
621- 622- 607+ writer .save ()
0 commit comments