Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
This repository was archived by the owner on May 25, 2022. It is now read-only.

Commit 919e5fd

Browse files
committed
Init code
1 parent f4bb6a2 commit 919e5fd

File tree

4 files changed

+253
-0
lines changed

4 files changed

+253
-0
lines changed

‎projects/Baidu_POI_crawl/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Script Title
2+
<!--Remove the below lines and add yours -->
3+
Crawl the POI in the specified area through Baidu map API.
4+
5+
### Prerequisites
6+
<!--Remove the below lines and add yours -->
7+
Modules required to be able to use the script successfully
8+
and how to install them.
9+
(Including a `requirements.txt` file will work.)
10+
11+
### How to run the script
12+
<!--Remove the below lines and add yours -->
13+
Steps on how to run the script along with suitable examples.
14+
15+
### Screenshot/GIF showing the sample use of the script
16+
<!--Remove the below lines and add yours -->
17+
Add a jpeg/png/gif file here.
18+
19+
## *Author Name*
20+
<!--Remove the below lines and add yours -->
21+
[YiZhou Chen](https://github.com/geoyee)

‎projects/Baidu_POI_crawl/main.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import os
2+
import os.path as osp
3+
from util import *
4+
5+
6+
## ---------- config ----------
7+
# 百度AK
8+
baidu_web_ak = 'your AK'
9+
# 范围(左下点经纬度,右上点经纬度,x经度,y纬度)
10+
wgs_l_x = 105.824149
11+
wgs_l_y = 28.524360
12+
wgs_r_x = 111.659451
13+
wgs_r_y = 31.730663
14+
# 滑动窗口大小(默认0.5效果不错)
15+
kernel_x = 0.5
16+
kernel_y = 0.5
17+
# 索引号
18+
rec_index = 1
19+
# 兴趣区关键字
20+
roi_key = '桥'
21+
# 保存目录
22+
output = 'output'
23+
24+
25+
# # 新建文件夹
26+
# ! mkdir -p output
27+
# 获取百度坐标系下的研究区范围
28+
rec_index -= 1
29+
l_x, l_y = wgs84_to_baidu(wgs_l_x, wgs_l_y, baidu_web_ak)
30+
r_x, r_y = wgs84_to_baidu(wgs_r_x, wgs_r_y, baidu_web_ak)
31+
print('左下点经纬度:', l_x, l_y)
32+
print('右上点经纬度:', r_x, r_y)
33+
num_x = math.ceil((r_x - l_x) / kernel_x)
34+
num_y = math.ceil((r_y - l_y) / kernel_y)
35+
num_rec = num_x * num_y
36+
print('网格数:', num_rec)
37+
for idx in range(rec_index, num_rec):
38+
rec_str = get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, idx)
39+
print('第', (idx+1), '块网格,当前区域坐标:', rec_str)
40+
get_baidu_poi(roi_key, rec_str, baidu_web_ak, idx, output)
41+
print('当前区域完成')
42+
time.sleep(1)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
urllib
2+
requests
3+
json

‎projects/Baidu_POI_crawl/util.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
from urllib.request import urlopen, quote
2+
import requests
3+
import json
4+
import time
5+
import math
6+
7+
8+
# WGS84转Baidu坐标系
9+
def wgs84_to_baidu(x, y, baidu_ak):
10+
'''
11+
inputs:
12+
x: WGS84下的经度
13+
y: WGS84下的纬度
14+
baidu_ak: 百度web API的AK秘钥
15+
outputs:
16+
tuple: 百度坐标系下的经纬度
17+
'''
18+
data = str(x) + ',' + str(y)
19+
url = 'http://api.map.baidu.com/geoconv/v1/?coords=' + data + '&from=1&to=5&ak=' + baidu_ak
20+
req = urlopen(url)
21+
res = req.read().decode()
22+
temp = json.loads(res)
23+
baidu_x = 0
24+
baidu_y = 0
25+
if temp['status'] == 0:
26+
baidu_x = temp['result'][0]['x']
27+
baidu_y = temp['result'][0]['y']
28+
else:
29+
print(temp['message'])
30+
return (baidu_x, baidu_y)
31+
32+
33+
# 获取当前小范围区域
34+
def get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, index):
35+
'''
36+
inputs:
37+
l_x: 百度坐标系下的左下角点经度
38+
l_y: 百度坐标系下的左下角点纬度
39+
r_x: 百度坐标系下的右上角点经度
40+
r_y: 百度坐标系下的右上角点纬度
41+
kernel_x: 经度方向核大小
42+
kernel_y: 纬度方向核大小
43+
index: 当前位置索引
44+
outputs:
45+
string: 用于API的滑动窗口范围(左下右上模式)
46+
'''
47+
num_x = math.ceil((r_x - l_x) / kernel_x)
48+
num_y = math.ceil((r_y - l_y) / kernel_y)
49+
left_x = l_x + kernel_x * (index % num_x)
50+
left_y = l_y + kernel_y * (index // num_x)
51+
right_x = (left_x + kernel_x)
52+
right_y = (left_y + kernel_y)
53+
rec_str = str(left_y) + ',' + str(left_x) + ',' + str(right_y) + ',' + str(right_x) # 这里返回是纬度,经度,要符合百度的API要求
54+
return rec_str
55+
56+
57+
# Baidu系坐标到WGS84
58+
x_pi = 3.14159265358979324 * 3000.0 / 180.0
59+
pi = 3.1415926535897932384626 # π
60+
a = 6378245.0 # 长半轴
61+
ee = 0.00669342162296594323 # 偏心率平方
62+
63+
64+
def bd09_to_gcj02(bd_lon, bd_lat):
65+
'''
66+
inputs:
67+
bd_lat: 百度坐标纬度
68+
bd_lon: 百度坐标经度
69+
return: 转换后的坐标列表形式
70+
'''
71+
x = bd_lon - 0.0065
72+
y = bd_lat - 0.006
73+
z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
74+
theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
75+
gg_lng = z * math.cos(theta)
76+
gg_lat = z * math.sin(theta)
77+
return [gg_lng, gg_lat]
78+
79+
80+
def gcj02_to_wgs84(lng, lat):
81+
'''
82+
inputs:
83+
lng: 火星坐标系经度
84+
lat: 火星坐标系纬度
85+
return: 转换后的坐标列表形式
86+
'''
87+
if out_of_china(lng, lat):
88+
return [lng, lat]
89+
dlat = _transformlat(lng - 105.0, lat - 35.0)
90+
dlng = _transformlng(lng - 105.0, lat - 35.0)
91+
radlat = lat / 180.0 * pi
92+
magic = math.sin(radlat)
93+
magic = 1 - ee * magic * magic
94+
sqrtmagic = math.sqrt(magic)
95+
dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
96+
dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
97+
mglat = lat + dlat
98+
mglng = lng + dlng
99+
return [lng * 2 - mglng, lat * 2 - mglat]
100+
101+
102+
def bd09_to_wgs84(bd_lon, bd_lat):
103+
lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
104+
return gcj02_to_wgs84(lon, lat)
105+
106+
107+
def _transformlat(lng, lat):
108+
ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
109+
0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
110+
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
111+
math.sin(2.0 * lng * pi)) * 2.0 / 3.0
112+
ret += (20.0 * math.sin(lat * pi) + 40.0 *
113+
math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
114+
ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
115+
math.sin(lat * pi / 30.0)) * 2.0 / 3.0
116+
return ret
117+
118+
119+
def _transformlng(lng, lat):
120+
ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
121+
0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
122+
ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
123+
math.sin(2.0 * lng * pi)) * 2.0 / 3.0
124+
ret += (20.0 * math.sin(lng * pi) + 40.0 *
125+
math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
126+
ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
127+
math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
128+
return ret
129+
130+
131+
# 判断是否在国内,不在国内不做偏移
132+
def out_of_china(lng, lat):
133+
'''
134+
inputs:
135+
lng: 火星坐标系经度
136+
lat: 火星坐标系纬度
137+
return: 转换后坐标
138+
'''
139+
return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55)
140+
141+
142+
# 对小窗口调用百度API
143+
def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output):
144+
'''
145+
inputs:
146+
roi_key: 兴趣区关键字
147+
rec_str: 滑动小区域坐标
148+
baidu_ak: 百度web API的AK秘钥
149+
index: 滑动小窗索引
150+
output: 文件保存位置
151+
'''
152+
now_time = time.strftime("%Y-%m-%d")
153+
page_num = 0
154+
logfile = open(output + '/' + now_time + ".log", 'a+', encoding='utf-8')
155+
file = open(output + '/' + now_time + ".txt", 'a+', encoding='utf-8')
156+
while True:
157+
try:
158+
URL = "http://api.map.baidu.com/place/v2/search?query=" + roi_key + \
159+
"&bounds=" + rec_str + \
160+
"&output=json" + \
161+
"&ak=" + baidu_ak + \
162+
"&scope=2" + \
163+
"&page_size=20" + \
164+
"&page_num=" + str(page_num)
165+
resp = requests.get(URL)
166+
res = json.loads(resp.text)
167+
# print('获取兴趣区')
168+
if len(res['results']) == 0:
169+
logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " stop " + str(index) + " " + rec_str + " " + str(page_num) + '\n')
170+
break
171+
else:
172+
for r in res['results']:
173+
j_name = r['name'] # 获取名称
174+
# 获取经纬度
175+
j_lat = r['location']['lat']
176+
j_lon = r['location']['lng']
177+
j_area = r['area'] # 获取行政区
178+
j_add = r['address'] # 获取具体地址
179+
j_lon, j_lat = bd09_to_wgs84(j_lon, j_lat) # 坐标转换
180+
j_str = str(j_name) + ',' + str(j_lon) + ',' + str(j_lat) + ',' + str(j_area) + ',' + str(j_add) + '\n'
181+
file.writelines(j_str)
182+
page_num += 1
183+
time.sleep(1)
184+
except:
185+
print("except")
186+
logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " except " + str(index) + " " + rec_str + " " + str(page_num) + '\n')
187+
break

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /