I'm starting a web scraping project and I need to setup a driver. I'm doing it like this:
import logging
import requests
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from RPA.FileSystem import FileSystem
class CustomSelenium:
def __init__(self) -> None:
self.driver = None
self.file = FileSystem()
def set_chrome_options(self):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument("--disable-extensions")
options.add_argument("--disable-gpu")
options.add_argument('--disable-web-security')
options.add_argument("--start-maximized")
options.add_argument('--remote-debugging-port=9222')
options.add_experimental_option("excludeSwitches", ["enable-logging"])
return options
def set_webdriver(self):
# options = self.set_chrome_options()
# service = Service(ChromeDriverManager().install())
# self.driver = webdriver.Chrome(service=service, options=options)
options = webdriver.ChromeOptions()
service = Service(ChromeDriverManager().install())
self.driver = webdriver.Chrome()
print("WebDriver initialized successfully.")
my_driver = CustomSelenium()
my_driver.set_webdriver()
It used to work fine, but now I'm getting the error:
[Previous line repeated 488 more times] RecursionError: maximum recursion depth exceeded while calling a Python object
After some tries I noticed the error didn't occur when I removed the 'service' part:
def set_webdriver(self):
# options = self.set_chrome_options()
# service = Service(ChromeDriverManager().install())
# self.driver = webdriver.Chrome(service=service, options=options)
options = webdriver.ChromeOptions()
# service = Service(ChromeDriverManager().install())
self.driver = webdriver.Chrome()
print("WebDriver initialized successfully.")
However, I'm unsure if this changes something important, I would appreciate some help.
My versions:
dependencies:
- python=3.10.12 # https://pyreadiness.org/3.10
- pip=23.2.1 # https://pip.pypa.io/en/stable/news
- robocorp-truststore=0.8.0 # https://pypi.org/project/robocorp-truststore/
- nodejs=16.20.2 # https://github.com/nodejs/node/blob/main/CHANGELOG.md
- pip:
- robotframework-browser==17.5.2 # https://github.com/MarketSquare/robotframework-browser/releases
edit:
I noticed the same error occurs when I do:
response = requests.get(url)
-
You don't need Service in modern selenium versions. webdriver.Chrome() will suffice (with options if required)jackal– jackal2024年08月13日 08:15:59 +00:00Commented Aug 13, 2024 at 8:15
2 Answers 2
You don't need Service with latest selenium version(s). I suggest:
from selenium import webdriver
from selenium.webdriver import ChromeOptions
class CustomSelenium:
def __init__(self):
self._driver = None
self._options = None
@property
def driver(self):
if self._driver is None:
self._driver = webdriver.Chrome(options=self.options)
return self._driver
@property
def options(self):
if self._options is None:
self._options = ChromeOptions()
self._options.add_argument('--headless')
self._options.add_argument('--no-sandbox')
self._options.add_argument("--disable-extensions")
self._options.add_argument("--disable-gpu")
self._options.add_argument('--disable-web-security')
self._options.add_argument("--start-maximized")
self._options.add_argument('--remote-debugging-port=9222')
self._options.add_experimental_option("excludeSwitches", ["enable-logging"])
return self._options
def close(self):
if self._driver is not None:
self._driver.close()
self._driver = None
cs = CustomSelenium()
try:
driver = cs.driver
# use the driver in some way here
finally:
# ensure that driver is properly closed
cs.close()
Platform:
Python 3.12.5
selenium 4.23.1
Note:
Of course, it would be much better to modify your class so that it can be used as a context manager
There are 2 reasons this happens, first the recursion happens in the
service = Service(ChromeDriverManager().install())
stage, but I'm not sure why. I think it's because the driver is already going to be installed anyway;
And the next reason is that requests.get() tries to contact the given url, but with no success. So you have to use .get(verify=False).
Comments
Explore related questions
See similar questions with these tags.