Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 202ed6f

Browse files
Update 07-常用函数.md
1 parent fcba9a5 commit 202ed6f

File tree

1 file changed

+100
-3
lines changed

1 file changed

+100
-3
lines changed

‎07-常用函数.md

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,58 @@ df['close_matches'] = df['close_matches'].apply(lambda x: x[0] if len(x)>0 else
207207
### json解析
208208

209209
```python
210-
from pandas.io.json import json_normalize
211-
import json
210+
import pandas as pd
211+
from pandas import json_normalize
212+
import json
213+
214+
# json.dumps参数设置:转化为标准json格式
215+
dict_data = {'name':'小明', 'age':'18', 'address':'Beijing'}
216+
json_data = json.dumps(dict_data,
217+
ensure_ascii=False, # 中文
218+
indent=2, # 缩进
219+
sort_keys=True # 排序
220+
)
221+
222+
df = pd.DataFrame(dict_data, index=[0])
223+
```
224+
225+
<br/>
226+
227+
```python
228+
# 根据层级解析
229+
data = [{'id':'100001',
230+
'name': '小明',
231+
'info':{
232+
'addr':{'country':'CN',
233+
'province':'Beijing'},
234+
'phone':'133***6069'
235+
}
236+
},
237+
{'id':'100002',
238+
'name': '小兰',
239+
'info':{
240+
'addr':{'country':'CN',
241+
'province':'Shanghai'},
242+
'phone':'133***5050'
243+
}
244+
}]
245+
246+
# 转为json标准格式
247+
data_json = json.dumps(data, ensure_ascii=False, indent=2)
248+
249+
# 根据层级解析
250+
df1 = pd.json_normalize(data, max_level=0)
251+
df2 = pd.json_normalize(data, max_level=1)
252+
df3 = pd.json_normalize(data, max_level=2)
253+
254+
# 取最后一个层级作为列名
255+
df3.columns = [i.split('.')[-1] if len(i.split('.'))>1 else i for i in df3.columns]
256+
```
257+
258+
<br/>
212259

260+
```python
261+
# 单一层级json解析为DataFrame
213262
def _SORTDICTKEY(dictin):
214263
for key in dictin:
215264
if not isinstance(dictin[key],(set,list,dict)):
@@ -233,13 +282,61 @@ def flatten_json(y):
233282
flatten(y)
234283
return out
235284

236-
data = df['report'].tolist()
285+
df = pd.DataFrame({
286+
'id': ['001','002','003'],
287+
'report': [{'语文':80, '数学':85, '外语':90},
288+
{'语文':75, '数学':80, '外语':85},
289+
{'语文':90, '数学':85, '外语':80}]
290+
})
291+
292+
data = df['report'].apply(lambda x: json.dumps(x, ensure_ascii=False)).tolist()
237293
a = [flatten_json(json.loads(d)) for d in data]
238294
res = json_normalize(a)
239295
```
240296

241297
<br/>
242298

299+
```python
300+
# 嵌套式json解析
301+
data = [{'id':'100001',
302+
'name': '小明',
303+
'describe':[{'subject':'', 'score':80},
304+
{'subject':'', 'score':85},
305+
{'subject':'', 'score':90}]
306+
},
307+
{'id':'100002',
308+
'name': '小兰',
309+
'describe':[{'subject':'', 'score':82},
310+
{'subject':'', 'score':88},
311+
{'subject':'', 'score':92}]
312+
}]
313+
314+
# 生成json_normalize所需参数
315+
agr1=[]
316+
agr2=[]
317+
for key, value in data[0].items():
318+
if isinstance(value, list):
319+
agr1.append(key)
320+
elif isinstance(value, dict):
321+
for j in data[0][key].keys():
322+
l=[]
323+
l.append(key)
324+
l.append(j)
325+
agr2.append(l)
326+
else:
327+
agr2.append(key)
328+
print(agr1, agr2)
329+
330+
# 只读取层级嵌套中的部分内容
331+
df = json_normalize(data, 'describe')
332+
333+
# 读取全部内容
334+
df = json_normalize(data, agr1, agr2)
335+
df.columns = [i.split('.')[1] if len(i.split('.'))>1 else i for i in df.columns]
336+
```
337+
338+
<br/>
339+
243340
------
244341

245342
### 日期格式清洗

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /