44import re
55import html
66import json
7- from bs4 import BeautifulSoup
87from enum import Enum
8+ from bs4 import BeautifulSoup
99import aiohttp
1010import requests
1111from fake_useragent import UserAgent
@@ -100,7 +100,7 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
100100 api_key_result = HTMLRequests .send_website_request_getcode (True )
101101 # Make the post request and return the result if is valid
102102 search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
103- resp = requests .post (search_url_with_key , headers = headers , data = payload )
103+ resp = requests .post (search_url_with_key , headers = headers , data = payload , timeout = 60 )
104104 if resp .status_code == 200 :
105105 return resp .text
106106 return None
@@ -129,7 +129,7 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
129129 return None
130130
131131 @staticmethod
132- def __cut_game_title (game_title : str ):
132+ def __cut_game_title (page_source : str ):
133133 """
134134 Function that extract the game title from the html title of the howlongtobeat page
135135 @param game_title: The HowLongToBeat page title of the game
@@ -138,12 +138,15 @@ def __cut_game_title(game_title: str):
138138 (So, in this example: "A Way Out")
139139 """
140140
141- if game_title is None or len (game_title ) == 0 :
141+ if page_source is None or len (page_source ) == 0 :
142142 return None
143143
144- title = re .search ("<title>(.*)<\/title>" , game_title )
144+ soup = BeautifulSoup (page_source , 'html.parser' )
145+ title_tag = soup .title
146+ title_text = title_tag .string
147+ 145148 # The position of start and end of this method may change if the website change
146- cut_title = str ( html . unescape ( title . group ( 1 ) [12 :- 17 ]) )
149+ cut_title = title_text [12 :- 17 ]. strip ( )
147150 return cut_title
148151
149152 @staticmethod
@@ -183,7 +186,7 @@ def get_game_title(game_id: int):
183186 headers = HTMLRequests .get_title_request_headers ()
184187
185188 # Request and extract title
186- contents = requests .get (HTMLRequests .GAME_URL , params = params , headers = headers )
189+ contents = requests .get (HTMLRequests .GAME_URL , params = params , headers = headers , timeout = 60 )
187190 return HTMLRequests .__cut_game_title (contents .text )
188191
189192 @staticmethod
@@ -213,7 +216,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
213216 """
214217 # Make the post request and return the result if is valid
215218 headers = HTMLRequests .get_title_request_headers ()
216- resp = requests .get (HTMLRequests .BASE_URL , headers = headers )
219+ resp = requests .get (HTMLRequests .BASE_URL , headers = headers , timeout = 60 )
217220 if resp .status_code == 200 and resp .text is not None :
218221 # Parse the HTML content using BeautifulSoup
219222 soup = BeautifulSoup (resp .text , 'html.parser' )
@@ -225,7 +228,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
225228 matching_scripts = [script ['src' ] for script in scripts if '_app-' in script ['src' ]]
226229 for script_url in matching_scripts :
227230 script_url = HTMLRequests .BASE_URL + script_url
228- script_resp = requests .get (script_url , headers = headers )
231+ script_resp = requests .get (script_url , headers = headers , timeout = 60 )
229232 if script_resp .status_code == 200 and script_resp .text is not None :
230233 pattern = r'"/api/search/".concat\("([a-zA-Z0-9]+)"\)'
231234 matches = re .findall (pattern , script_resp .text )
0 commit comments