@@ -207,9 +207,58 @@ df['close_matches'] = df['close_matches'].apply(lambda x: x[0] if len(x)>0 else
207
207
### json解析
208
208
209
209
``` python
210
- from pandas.io.json import json_normalize
211
- import json
210
+ import pandas as pd
211
+ from pandas import json_normalize
212
+ import json
213
+
214
+ # json.dumps参数设置:转化为标准json格式
215
+ dict_data = {' name' :' 小明' , ' age' :' 18' , ' address' :' Beijing' }
216
+ json_data = json.dumps(dict_data,
217
+ ensure_ascii = False , # 中文
218
+ indent = 2 , # 缩进
219
+ sort_keys = True # 排序
220
+ )
221
+
222
+ df = pd.DataFrame(dict_data, index = [0 ])
223
+ ```
224
+
225
+ <br />
226
+
227
+ ``` python
228
+ # 根据层级解析
229
+ data = [{' id' :' 100001' ,
230
+ ' name' : ' 小明' ,
231
+ ' info' :{
232
+ ' addr' :{' country' :' CN' ,
233
+ ' province' :' Beijing' },
234
+ ' phone' :' 133***6069'
235
+ }
236
+ },
237
+ {' id' :' 100002' ,
238
+ ' name' : ' 小兰' ,
239
+ ' info' :{
240
+ ' addr' :{' country' :' CN' ,
241
+ ' province' :' Shanghai' },
242
+ ' phone' :' 133***5050'
243
+ }
244
+ }]
245
+
246
+ # 转为json标准格式
247
+ data_json = json.dumps(data, ensure_ascii = False , indent = 2 )
248
+
249
+ # 根据层级解析
250
+ df1 = pd.json_normalize(data, max_level = 0 )
251
+ df2 = pd.json_normalize(data, max_level = 1 )
252
+ df3 = pd.json_normalize(data, max_level = 2 )
253
+
254
+ # 取最后一个层级作为列名
255
+ df3.columns = [i.split(' .' )[- 1 ] if len (i.split(' .' ))> 1 else i for i in df3.columns]
256
+ ```
257
+
258
+ <br />
212
259
260
+ ``` python
261
+ # 单一层级json解析为DataFrame
213
262
def _SORTDICTKEY (dictin ):
214
263
for key in dictin:
215
264
if not isinstance (dictin[key],(set ,list ,dict )):
@@ -233,13 +282,61 @@ def flatten_json(y):
233
282
flatten(y)
234
283
return out
235
284
236
- data = df[' report' ].tolist()
285
+ df = pd.DataFrame({
286
+ ' id' : [' 001' ,' 002' ,' 003' ],
287
+ ' report' : [{' 语文' :80 , ' 数学' :85 , ' 外语' :90 },
288
+ {' 语文' :75 , ' 数学' :80 , ' 外语' :85 },
289
+ {' 语文' :90 , ' 数学' :85 , ' 外语' :80 }]
290
+ })
291
+
292
+ data = df[' report' ].apply(lambda x : json.dumps(x, ensure_ascii = False )).tolist()
237
293
a = [flatten_json(json.loads(d)) for d in data]
238
294
res = json_normalize(a)
239
295
```
240
296
241
297
<br />
242
298
299
+ ``` python
300
+ # 嵌套式json解析
301
+ data = [{' id' :' 100001' ,
302
+ ' name' : ' 小明' ,
303
+ ' describe' :[{' subject' :' 语' , ' score' :80 },
304
+ {' subject' :' 数' , ' score' :85 },
305
+ {' subject' :' 外' , ' score' :90 }]
306
+ },
307
+ {' id' :' 100002' ,
308
+ ' name' : ' 小兰' ,
309
+ ' describe' :[{' subject' :' 语' , ' score' :82 },
310
+ {' subject' :' 数' , ' score' :88 },
311
+ {' subject' :' 外' , ' score' :92 }]
312
+ }]
313
+
314
+ # 生成json_normalize所需参数
315
+ agr1= []
316
+ agr2= []
317
+ for key, value in data[0 ].items():
318
+ if isinstance (value, list ):
319
+ agr1.append(key)
320
+ elif isinstance (value, dict ):
321
+ for j in data[0 ][key].keys():
322
+ l= []
323
+ l.append(key)
324
+ l.append(j)
325
+ agr2.append(l)
326
+ else :
327
+ agr2.append(key)
328
+ print (agr1, agr2)
329
+
330
+ # 只读取层级嵌套中的部分内容
331
+ df = json_normalize(data, ' describe' )
332
+
333
+ # 读取全部内容
334
+ df = json_normalize(data, agr1, agr2)
335
+ df.columns = [i.split(' .' )[1 ] if len (i.split(' .' ))> 1 else i for i in df.columns]
336
+ ```
337
+
338
+ <br />
339
+
243
340
------
244
341
245
342
### 日期格式清洗
0 commit comments