|
4 | 4 | - [数据读取](#数据读取)
|
5 | 5 | - [读取MySQL](#读取MySQL)
|
6 | 6 | - [读取Hive](#读取Hive)
|
7 | | - - [读取Excel、csv、txt文件](#读取数据文件) |
| 7 | + - [读取Excel、csv、txt、json、zip文件](#读取数据文件) |
8 | 8 | - [数据导出](#数据导出)
|
9 | 9 | - [导出至Excel格式](#导出至Excel格式)
|
10 | 10 | - [导出至csv格式](#导出至csv格式)
|
@@ -129,6 +129,30 @@ df = pd.DataFrame({'id':[1001,1002,1003,1004,1005,1006],
|
129 | 129 | 'price':[1200,np.nan,2133,5433,np.nan,4432]},
|
130 | 130 | columns =['id','date','city','category','age','price'])
|
131 | 131 |
|
| 132 | +# 读取json文件 |
| 133 | +#方法一:占用内存大,适用于小数据量 |
| 134 | +data1 = pd.read_json('data.json',lines=True) |
| 135 | +# 方法二:按行读取 |
| 136 | +import json |
| 137 | +with open('data.json','r') as f: |
| 138 | + lines = f.readlines() |
| 139 | +df1 = [] |
| 140 | +for i in lines: |
| 141 | + i_dict = json.loads(i) |
| 142 | + df1.append(i_dict) |
| 143 | +df2 = pd.DataFrame(df1) |
| 144 | + |
| 145 | +# 按行读取压缩包数据 |
| 146 | +import json |
| 147 | +import zipfile |
| 148 | +data = [] |
| 149 | +with zipfile.ZipFile(r'D:\data\file.zip', 'r') as z: |
| 150 | + with z.open(z.namelist()[0]) as f: |
| 151 | + for line in f: |
| 152 | + j = json.loads(line) |
| 153 | + data.append(j) |
| 154 | +df = pd.DataFrame(data) |
| 155 | + |
132 | 156 | # invalid continuation byte报错问题解决
|
133 | 157 | '''
|
134 | 158 | UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 52: invalid continuation byte
|
|
0 commit comments