Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Monkey patch some browser props #3964

Answered by mdmintz
Valleymu asked this question in Q&A
Discussion options

Hi,

I've written code which monkey patch of some browser props, website that I try to scrap takes browser fingerprint with canvas data url etc. I was able to patch it with this code:

PATCH_CANVAS_JS = f"""
(() => {{
 const CHOSEN_CANVAS_HASH = {CANVAS_HASH_JS};
 const NEW_LANGUAGE = '{NEW_LANGUAGE}';
 const NEW_LANGUAGES = {NEW_LANGUAGES_JS};
 const NEW_TZ_OFFSET = {NEW_TZ_OFFSET};
 const NEW_HW_THREADS = {NEW_HW_THREADS};
 const NEW_DEVICE_MEMORY = {NEW_DEVICE_MEMORY};
 const origToDataURL = HTMLCanvasElement.prototype.toDataURL;
 Object.defineProperty(HTMLCanvasElement.prototype, 'toDataURL', {{
 value: function (...args) {{
 try {{
 if (typeof CHOSEN_CANVAS_HASH === 'string' && CHOSEN_CANVAS_HASH.length) {{
 // If it’s already a data: URL, return it; else wrap as text.
 return /^data:/i.test(CHOSEN_CANVAS_HASH)
 ? CHOSEN_CANVAS_HASH
 : 'data:text/plain;charset=utf-8,' + encodeURIComponent(CHOSEN_CANVAS_HASH);
 }}
 }} catch (_) {{}}
 return origToDataURL.apply(this, args);
 }},
 configurable: false,
 writable: false,
 }});
 const overrideNavigatorProp = (prop, getter) => {{
 try {{
 const proto = Navigator && Navigator.prototype;
 const desc = proto ? Object.getOwnPropertyDescriptor(proto, prop) : null;
 if (desc && desc.configurable) {{
 Object.defineProperty(proto, prop, {{ get: getter, configurable: false }});
 return;
 }}
 }} catch (_) {{}}
 try {{
 Object.defineProperty(navigator, prop, {{ get: getter, configurable: false }});
 }} catch (_) {{}}
 }};
 // --- language / languages ---
 overrideNavigatorProp('language', () => NEW_LANGUAGE);
 overrideNavigatorProp('languages', () => NEW_LANGUAGES);
 // --- hardwareConcurrency ---
 overrideNavigatorProp('hardwareConcurrency', () => NEW_HW_THREADS);
 // --- deviceMemory ---
 overrideNavigatorProp('deviceMemory', () => NEW_DEVICE_MEMORY);
 
 // --- platform ---
 overrideNavigatorProp('platform', () => 'Win32');
 // --- userAgent ---
 overrideNavigatorProp('userAgent', () => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36');
 // --- timezoneOffset ---
 const origGetTimezoneOffset = Date.prototype.getTimezoneOffset;
 Object.defineProperty(Date.prototype, 'getTimezoneOffset', {{
 value: function () {{ return NEW_TZ_OFFSET; }},
 writable: false,
 configurable: false,
 }});
 // (Optional) stash originals, non-enumerable
 try {{
 Object.defineProperty(window, '__origFingerprintFns__', {{
 value: Object.freeze({{
 fillText: origFillText,
 getTimezoneOffset: origGetTimezoneOffset,
 }}),
 writable: false,
 configurable: false,
 enumerable: false
 }});
 }} catch (_) {{}}
}})();
"""
 with SB(
 uc=True,
 incognito=True,
 locale_code="en",
 user_data_dir=user_data_dir,
 proxy=proxy,
 ) as sb:
 # Enable the Network domain
 sb.driver.execute_cdp_cmd("Network.enable", {})
 # Override UA + UA-CH
 sb.driver.execute_cdp_cmd("Network.setUserAgentOverride", {
 "userAgent": (
 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
 "AppleWebKit/537.36 (KHTML, like Gecko) "
 "Chrome/138.0.0.0 Safari/537.36"
 ),
 "userAgentMetadata": {
 "brands": [
 {"brand": "Chromium", "version": "138"},
 {"brand": "Google Chrome", "version": "138"},
 {"brand": "Not)A;Brand", "version": "8"}
 ],
 "platform": "Windows", # → sec-ch-ua-platform: "Windows"
 "platformVersion": "10.0.0",
 "architecture": "x86",
 "model": "", # not sent on desktop
 "mobile": False, # → sec-ch-ua-mobile: ?0
 "bitness": "64",
 }
 })
 sb.open(url)
 do_some_magic()

It works fine in IDE, although I get detected in docker container, docker container works when I use cdp mode, it's not being detected although I can't spoof all of those properties with cdp mode as it uses driver. Is there any cdp mode way of monkey patching those properties before website is being loaded?

You must be logged in to vote

There are fingerprints that you can change in CDP Mode, such as timezone and geolocation. Example:
https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/raw_timezone_sb.py

Docker leaves hardcoded fingerprints that can't be changed, so I wouldn't use that if you need stealth.

Replies: 2 comments 2 replies

Comment options

There are fingerprints that you can change in CDP Mode, such as timezone and geolocation. Example:
https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/raw_timezone_sb.py

Docker leaves hardcoded fingerprints that can't be changed, so I wouldn't use that if you need stealth.

You must be logged in to vote
0 replies
Answer selected by mdmintz
Comment options

my problem is canvas fingerprint which is being sent as payload. What I'm trying to do is to override headers and payload with on paused event but I can't make it work, looks like script is stale after log print("New body preview:", preview):

FINGERPRINT = generate_fingerprint()
def _to_header_entries(hdict):
 return [mycdp.fetch.HeaderEntry(name=k, value=v) for k, v in hdict.items()]
async def on_request_paused(evt: mycdp.fetch.RequestPaused, tab):
 req = evt.request
 # Build headers for ALL requests
 h = dict(req.headers or {})
 h["User-Agent"] = UA
 h["sec-ch-ua"] = '"Chromium";v="138", "Google Chrome";v="138", "Not A(Brand";v="8"'
 h["sec-ch-ua-mobile"] = "?0"
 h["sec-ch-ua-platform"] = '"Windows"'
 h["sec-ch-ua-arch"] = '"x86"'
 h["sec-ch-ua-bitness"] = '"64"'
 h["sec-ch-ua-full-version"] = '"138.0.0.0"'
 # Let Chromium compute content-length
 h.pop("Content-Length", None)
 h.pop("content-length", None)
 post_data = None
 try:
 if req.method == "POST" and "someText" in req.url:
 post_data = json.dumps({"ui": FINGERPRINT})
 print("Intercepted POST to", req.url)
 preview = (post_data[:140] + "...") if len(post_data) > 140 else post_data
 print("New body preview:", preview)
 tab.feed_cdp(
 mycdp.fetch.continue_request(
 request_id=evt.request_id,
 post_data=post_data,
 headers=[mycdp.fetch.HeaderEntry(name=k, value=v) for k, v in h.items()],
 )
 )
 except Exception as e:
 try:
 tab.feed_cdp(mycdp.fetch.continue_request(request_id=evt.request_id))
 finally:
 print("continue_request failed:", repr(e))
 sb.activate_cdp_mode("about:blank")
 sb.cdp.add_handler(mycdp.fetch.RequestPaused, on_request_paused)
 tab = sb.cdp.page
 loop = sb.cdp.get_event_loop()
 async def cdp_setup():
 await tab.send(mycdp.target.set_discover_targets(discover=True))
 await tab.send(
 mycdp.target.set_auto_attach(auto_attach=True, wait_for_debugger_on_start=False, flatten=True))
 await tab.send(mycdp.network.enable())
 await tab.send(mycdp.network.set_bypass_service_worker(bypass=True))
 await tab.send(mycdp.fetch.enable(patterns=[
 mycdp.fetch.RequestPattern(url_pattern="*", request_stage=mycdp.fetch.RequestStage.REQUEST)
 ]))
 loop.run_until_complete(cdp_setup())
 sb.cdp.open(url)
You must be logged in to vote
2 replies
Comment options

You can try working with the existing examples that use mycdp.fetch.RequestPaused...
https://github.com/search?q=repo%3Aseleniumbase%2FSeleniumBase%20mycdp.fetch.RequestPaused&type=code

...and the ones that use mycdp.fetch.continue_request:
https://github.com/search?q=repo%3Aseleniumbase%2FSeleniumBase+mycdp.fetch.continue_request&type=code

I'm no CDP expert, but SeleniumBase does provide the tools for using CDP if you understand how it works and how to formulate the proper CDP commands/actions for what you're trying to accomplish, assuming that those actions are allowed by CDP.

Comment options

thanks, will try to dig into cdp

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Category
Q&A
Labels
None yet
2 participants

AltStyle によって変換されたページ (->オリジナル) /