diff --git a/README.md b/README.md
index b2c5dc4..6f96817 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,16 @@
 ```shell
- ( 
- )\ ) ) ) ( ( 
- (()/( ( ( /( ( /( )\ ( ) ( ( )\ ( ( 
- /(_)))\ ) )\()))\()) ( ( (((_) )( ( /( )\))( ((_) ))\ )( 
- (_)) (()/( (_))/((_)\ )\ )\ ) )\___ (()\ )(_))((_)()\ _ /((_)(()\ 
- &#124; _ \ )(_))&#124; &#124;_ &#124; &#124;(_) ((_) _(_/(((/ __&#124; ((_)((_)_ _(()((_)&#124; &#124;(_)) ((_)
- &#124; _/&#124; &#124;&#124; &#124;&#124; _&#124;&#124; ' \ / _ \&#124; ' \))&#124; (__ &#124; '_&#124;/ _` &#124;\ V V /&#124; &#124;/ -_) &#124; '_&#124;
- &#124;_&#124; \_, &#124; \__&#124;&#124;_&#124;&#124;_&#124;\___/&#124;_&#124;&#124;_&#124; \___&#124;&#124;_&#124; \__,_&#124; \_/\_/ &#124;_&#124;\___&#124; &#124;_&#124; 
- &#124;__/ 
- —————— by yanghangfeng
+ ( 
+ )\ ) ) ) ( ( 
+ (()/( ( ( /( ( /( )\ ( ) ( ( )\ ( ( 
+ /(_)))\ ) )\()))\()) ( ( (((_) )( ( /( )\))( ((_) ))\ )( 
+ (_)) (()/( (_))/((_)\ )\ )\ ) )\___ (()\ )(_))((_)()\ _ /((_)(()\ 
+ &#124; _ \ )(_))&#124; &#124;_ &#124; &#124;(_) ((_) _(_/(((/ __&#124; ((_)((_)_ _(()((_)&#124; &#124;(_)) ((_)
+ &#124; _/&#124; &#124;&#124; &#124;&#124; _&#124;&#124; ' \ / _ \&#124; ' \))&#124; (__ &#124; '_&#124;/ _` &#124;\ V V /&#124; &#124;/ -_) &#124; '_&#124;
+ &#124;_&#124; \_, &#124; \__&#124;&#124;_&#124;&#124;_&#124;\___/&#124;_&#124;&#124;_&#124; \___&#124;&#124;_&#124; \__,_&#124; \_/\_/ &#124;_&#124;\___&#124; &#124;_&#124; 
+ &#124;__/ 
+ —————— by yanghangfeng
 ```
-# <p>PythonCrawler: 用 python编写的爬虫项目集合:bug:</p>
+# <p>PythonCrawler: 用 python编写的爬虫项目集合:bug:(本项目代码仅作为爬虫技术学习之用,学习者务必遵循中华人民共和国法律!)</p>
 
 <p>
 <a href="https://github.com/yhangf/PythonCrawler/blob/master/LICENSE">
@@ -30,6 +30,17 @@
 </a>
 </p>
 
+# IPWO全球代理资源 &#124; 为采集、跨境与测试项目提供支持(免费试用,爬虫使用强烈推荐!!!)
+### 官网地址
+[👉 访问 IPWO 官网](https://www.ipwo.net/?code=WSESV2ONN)
+### 产品简介
+* 免费试用,先体验再选择
+* 9000万+真实住宅IP,覆盖220+国家和地区
+* 支持动态住宅代理、静态住宅代理(ISP)
+* 适用于数据抓取、电商、广告验证、SEO监控等场景
+* 支持HTTP/HTTPS/SOCKS5协议,兼容性强
+* 纯净IP池,实时更新,99.9%连接成功率
+* 支持指定国家城市地区访问,保护隐私
 
 # spiderFile模块简介
 
@@ -38,7 +49,7 @@
 3. [get_photos.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/get_photos.py): **抓取百度贴吧某话题下的所有图片。**
 4. [get_web_all_img.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/get_web_all_img.py): **抓取整个网站的图片。**
 5. [lagou_position_spider.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/lagou_position_spider.py): **任意输入关键字,一键抓取与关键字相关的职位招聘信息,并保存到本地文件。**
-6. [student_img.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/student_img.py): **基于本学校官网的url漏洞,获取所有注册学生学籍证件照。**
+6. [student_img.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/student_img.py): **自动化获取自己学籍证件照。**
 7. [JD_spider.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/JD_spider.py): **大批量抓取京东商品id和标签。**
 8. [ECUT_pos_html.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/ECUT_pos_html.py): **抓取学校官网所有校园招聘信息,并保存为html格式,图片也会镶嵌在html中。**
 9. [ECUT_get_grade.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/ECUT_get_grade.py): **模拟登陆学校官网,抓取成绩并计算平均学分绩。**
@@ -50,7 +61,9 @@
 15. [fuckCTF.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/fuckCTF.py): **通过selenium模拟登入合天网站,自动修改原始密码。**
 16. [one_update.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/one_update.py): **更新抓取one文艺网站的代码,添加一句箴言的抓取。**
 17. [get_history_weather.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/get_history_weather.py): **抓取广州市2019年第一季度的天气数据。**
-
+18. [search_useful_camera_ip_address.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/search_useful_camera_ip_address.py): **摄像头弱密码安全科普。**
+19. [get_top_sec_com.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/get_top_sec_com.py): **异步编程获取A股市场网络安全版块公司市值排名情况,并以图片格式保存下来。**
+20. [get_tf_accident_info.py](https://github.com/yhangf/PythonCrawler/blob/master/spiderFile/get_tj_accident_info.py): **同步和异步编程结合获取天津市应急管理局所有事故信息。**
 ---
 # spiderAPI模块简介
 
diff --git a/spiderFile/get_tj_accident_info.py b/spiderFile/get_tj_accident_info.py
new file mode 100644
index 0000000..b8b2237
--- /dev/null
+++ b/spiderFile/get_tj_accident_info.py
@@ -0,0 +1,77 @@
+import re
+import joblib
+import asyncio
+import aiohttp
+import requests as rq
+from bs4 import BeautifulSoup
+
+def yield_all_page_url(root_url, page=51):
+ """生成所有的页面url
+ @param root_url: 首页url
+ type root_url: str
+ @param page: 爬取的页面个数
+ type page: int
+ """
+ # 观察网站翻页结构可知
+ page_url_list = [f"{root_url}index_{i}.html" for i in range(1, page)]
+ # 添加首页url
+ page_url_list.insert(0, root_url)
+ return page_url_list
+
+async def get_info_page_url(url, session):
+ regex = re.compile("<a href='/index.cgi/contrast/https://github.com/lily9202/PythonCrawler/compare/(.*?)'\s+title=") + async with session.get(url) as response: + html = await response.text() + url_part_list = re.findall(regex, html) + return url_part_list + +async def get_all_info_page_url(root_url, page_url_list): + tasks, all_info_page_url_list = [], [] + # 控制协程并发量 + async with asyncio.Semaphore(50) as semaphore: + async with aiohttp.ClientSession() as session: + for url in page_url_list: + tasks.append(get_info_page_url(url, session)) + done, pendding = await asyncio.wait(tasks) + all_info_page_url_list = [root_url+url_part for r in done + for url_part in r.result()] + return all_info_page_url_list + + +def get_data(url): + title_regex = re.compile('<meta name="ArticleTitle" content="(.*?)" />')
+ html = rq.get(url, headers=HEADERS).content.decode("utf-8")
+ soup = BeautifulSoup(html) 
+ title = re.search(title_regex, html)
+ content_1 = soup.find("div", class_="TRS_UEDITOR TRS_WEB")
+ content_2 = soup.find("div", class_="view TRS_UEDITOR trs_paper_default trs_word")
+ content_3 = soup.find("div", class_="view TRS_UEDITOR trs_paper_default trs_web")
+ if content_1:
+ content = content_1.text
+ elif content_2:
+ content = content_2.text
+ elif content_3:
+ content = content_3.text
+ else:
+ content = ""
+ return {"title": title.groups()[0], "content": content}
+
+def get_all_data(all_info_page_url_list):
+ all_data = []
+ for i, url in enumerate(all_info_page_url_list):
+ all_data.append(get_data(url))
+ print(i, url, all_data[-1])
+ joblib.dump(all_data, "all_data.joblib")
+
+
+if __name__ == "__main__":
+ root_url = "http://yjgl.tj.gov.cn/ZWGK6939/SGXX3106/"
+ agent_part_1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+ agent_part_2 = "(KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
+ HEADERS = {"Host": "yjgl.tj.gov.cn",
+ "Connection": "keep-alive",
+ "User-Agent": agent_part_1 + agent_part_2,
+ "Referer": "http://static.bshare.cn/"}
+ page_url_list = yield_all_page_url(root_url, page=51)
+ all_info_page_url_list = asyncio.run(get_all_info_page_url(root_url, page_url_list))
+ joblib.dump("all_info_page_url_list", all_info_page_url_list)
diff --git a/spiderFile/get_top_sec_com.py b/spiderFile/get_top_sec_com.py
new file mode 100644
index 0000000..f1fce0a
--- /dev/null
+++ b/spiderFile/get_top_sec_com.py
@@ -0,0 +1,95 @@
+import re
+import os
+import time
+import joblib
+import asyncio
+import aiohttp
+import requests as rq
+
+import pandas as pd
+import matplotlib.pyplot as plt
+# import nest_asyncio
+# nest_asyncio.apply()
+
+class getTopSecCom:
+ def __init__(self, top=None):
+ self.headers = {"Referer": "http://quote.eastmoney.com/",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"}
+ self.bk_url = "http://71.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124034348162124675374_1612595298605&pn=1&pz=85&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f62&fs=b:BK0655&fields=f12,f14&_=1612595298611"
+ self.shares_api = "https://xueqiu.com/S/"
+ self.top = top
+ if not os.path.exists("./useful_sec_com_list"):
+ self.useful_sec_com_list = self.get_sec_com_code()
+ else:
+ with open("./useful_sec_com_list", "rb") as fp:
+ self.useful_sec_com_list = joblib.load(fp)
+ 
+ def get_sec_com_code(self):
+ html = rq.get(self.bk_url, headers=self.headers).content.decode("utf-8")
+ sec_com_list = eval(re.findall("\[(.*?)\]", html)[0])
+ useful_sec_com_list = [[i["f12"], i["f14"]] for i in sec_com_list if "ST" not in i["f14"]]
+ 
+ # 0和3开头的为深证上市股票前缀为sz,6开头的为上证上市股票前缀为sh
+ for sec_com in useful_sec_com_list:
+ if sec_com[0][0] == "6":
+ sec_com[0] = "sh" + sec_com[0]
+ else:
+ sec_com[0] = "sz" + sec_com[0]
+ with open("useful_sec_com_list", "wb") as fp:
+ joblib.dump(useful_sec_com_list, fp)
+ return useful_sec_com_list
+
+ async def async_get_shares_details(self, sec_com, url):
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url, headers=self.headers) as response:
+ html = await response.text()
+ market_value = re.search(" 总市值:<span>(.*?)亿</span>", html)
+ if market_value:
+ return [*sec_com, market_value.groups()[0]]
+ 
+ async def async_get_all_shares(self):
+ tasks = []
+ for sec_com in self.useful_sec_com_list:
+ url = self.shares_api + sec_com[0]
+ tasks.append(
+ asyncio.create_task(
+ self.async_get_shares_details(sec_com, url)
+ )
+ )
+ done, pendding = await asyncio.wait(tasks)
+ return [share.result() for share in done if share.result()]
+ 
+ def get_shares_details(self):
+ all_shares = []
+ for sec_com in self.useful_sec_com_list:
+ url = self.shares_api + sec_com[0]
+ response = rq.get(url, headers=self.headers).content.decode("utf-8")
+ market_value = re.search(" 总市值:<span>(.*?)亿</span>", response)
+ if market_value:
+ all_shares.append([*sec_com, market_value.groups()[0]])
+ return all_shares
+ 
+ def yield_picture(self, save_path):
+ # all_shares = self.get_shares_details() # 同步代码
+ all_shares = asyncio.run(self.async_get_all_shares()) # 异步代码
+ df = pd.DataFrame(all_shares, columns=["股票代码", "公司", "市值(亿)"])
+ df["市值(亿)"] = df["市值(亿)"].astype(float)
+ date = time.strftime("%Y年%m月%d日", time.localtime())
+ df.sort_values(by="市值(亿)", ascending=False, inplace=True)
+ df.index = range(1, df.shape[0]+1)
+ 
+ plt.rcParams['font.sans-serif'] = ['SimHei'] 
+ plt.rcParams['axes.unicode_minus'] = False
+ 
+ 
+ fig = plt.figure(dpi=400)
+ ax = fig.add_subplot(111, frame_on=False)
+ ax.xaxis.set_visible(False) 
+ ax.yaxis.set_visible(False)
+ _ = pd.plotting.table(ax, df, loc="best", cellLoc="center")
+ ax.set_title(f"{date}A股网安版块公司市值排名", fontsize=10) 
+ plt.savefig(save_path, bbox_inches="tight")
+
+if __name__ == "__main__":
+ m = getTopSecCom()
+ m.yield_picture("rank.png")
diff --git a/spiderFile/search_useful_camera_ip_address.py b/spiderFile/search_useful_camera_ip_address.py
new file mode 100644
index 0000000..652b180
--- /dev/null
+++ b/spiderFile/search_useful_camera_ip_address.py
@@ -0,0 +1,92 @@
+import re
+import tqdm
+import time
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import NoAlertPresentException, TimeoutException
+
+# 扫描网站可自己寻找,代码仅演示逻辑
+country = "IN" #印度
+city = "" 
+login_url = ""
+query_url = ""
+city_url = ""
+USER_NAME = ""
+PASSWORD = ""
+
+# 无头浏览器配置
+chrome_options = Options()
+chrome_options.add_argument("--headless")
+chrome_options.add_argument("--disable-gpu")
+chrome_options.add_argument("log-level=3")
+browser = webdriver.Chrome(chrome_options=chrome_options)
+browser.set_page_load_timeout(10)
+
+#登录模块
+browser.get(login_url)
+WebDriverWait(browser, 30).until(
+ EC.presence_of_element_located((By.XPATH, '//*[@name="login_submit"]'))
+)
+browser.find_element_by_id("username").clear()
+browser.find_element_by_id("username").send_keys(USER_NAME)
+browser.find_element_by_id("password").clear()
+browser.find_element_by_id("password").send_keys(PASSWORD)
+browser.find_element_by_name("login_submit").click()
+
+#抓取潜在的摄像头url,默认抓取两页
+if city:
+ query_url += city_url 
+
+latent_camera_url = []
+browser.get(query_url)
+WebDriverWait(browser, 30).until(
+ EC.presence_of_element_located((By.CLASS_NAME, 'button'))
+)
+html = browser.page_source
+latent_camera_url += re.findall('<a href="(http:.*?:60001)"', html)
+browser.get(query_url + "&page=2")
+WebDriverWait(browser, 30).until(
+ EC.presence_of_element_located((By.CLASS_NAME, 'button'))
+)
+html = browser.page_source
+latent_camera_url += re.findall('<a href="/index.cgi/contrast/https://github.com/lily9202/PythonCrawler/compare/(http:.*?:60001)"', html)
+
+#测试潜在url列表中可登录的url
+useful_camera_address = []
+for camera_url in tqdm.tqdm(latent_camera_url):
+ try:
+ browser.get(camera_url)
+ except:
+ continue
+ try:
+ WebDriverWait(browser, 30).until(
+ EC.presence_of_element_located((By.XPATH, '//*[@lxc_lang="index_Login"]'))
+ )
+ except TimeoutException:
+ continue
+ try:
+ browser.find_element_by_css_selector('[lxc_lang="index_Login"]').click()
+ time.sleep(2)
+ try:
+ browser.switch_to.alert.accept()
+ except TimeoutException:
+ continue
+ except NoAlertPresentException:
+ try:
+ WebDriverWait(browser, 30).until(
+ EC.presence_of_element_located((By.ID, 'flashcontent-container'))
+ )
+ except TimeoutException:
+ continue
+ page_source = browser.page_source
+ print(f"useful camera ip address: {camera_url}")
+ useful_camera_address.append(camera_url)
+
+#保存可访问的摄像头url信息至本地
+if useful_camera_address:
+ for url in useful_camera_address:
+ with open("useful_camera_ip_address.txt", "a+") as fp:
+ fp.write(url + "\n")
diff --git a/spiderFile/student_img.py b/spiderFile/student_img.py
index d3135ea..a66436d 100644
--- a/spiderFile/student_img.py
+++ b/spiderFile/student_img.py
@@ -1,29 +1,6 @@
 import requests
 
 """
-思路:去官网自己的主页,看自己的照片的url然后你懂的。
+思路:去官网自己的主页,看自己的学籍照片的url。
 """
-url = ''
-banji = []
-zhuanye = []
-for a in range(10):
- for b in range(10):
- banji.append(str(a) + '0' + str(b))
-for c in range(10):
- zhuanye.append('20' + str(c))
 
-for year in range(2011, 2015):
- for xh in zhuanye:
- for nj in banji:
- for i in range(1, 35):
- if i < 10:
- xuehao = str(year) + str(xh) + str(nj) + '0' + str(i)
- student_url = url + xuehao
- with open('E:/student_img/%s.jpeg' % xuehao, 'wb') as file:
- file.write(requests.get(student_url).content)
- else:
- xuehao = str(year) + str(xh) + str(nj) + str(i)
- student_url = url + xuehao
- with open('E:/student_img/%s.jpeg' % xuehao, 'wb') as file:
- file.write(requests.get(student_url).content)
-print('OK!')
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://github.com/lily9202/PythonCrawler/compare/master...yhangf:PythonCrawler:master.diff">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://github.com/lily9202/PythonCrawler/compare/master...yhangf:PythonCrawler:master.diff" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>