-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
-
Hi,
I've written code which monkey patch of some browser props, website that I try to scrap takes browser fingerprint with canvas data url etc. I was able to patch it with this code:
PATCH_CANVAS_JS = f"""
(() => {{
const CHOSEN_CANVAS_HASH = {CANVAS_HASH_JS};
const NEW_LANGUAGE = '{NEW_LANGUAGE}';
const NEW_LANGUAGES = {NEW_LANGUAGES_JS};
const NEW_TZ_OFFSET = {NEW_TZ_OFFSET};
const NEW_HW_THREADS = {NEW_HW_THREADS};
const NEW_DEVICE_MEMORY = {NEW_DEVICE_MEMORY};
const origToDataURL = HTMLCanvasElement.prototype.toDataURL;
Object.defineProperty(HTMLCanvasElement.prototype, 'toDataURL', {{
value: function (...args) {{
try {{
if (typeof CHOSEN_CANVAS_HASH === 'string' && CHOSEN_CANVAS_HASH.length) {{
// If it’s already a data: URL, return it; else wrap as text.
return /^data:/i.test(CHOSEN_CANVAS_HASH)
? CHOSEN_CANVAS_HASH
: 'data:text/plain;charset=utf-8,' + encodeURIComponent(CHOSEN_CANVAS_HASH);
}}
}} catch (_) {{}}
return origToDataURL.apply(this, args);
}},
configurable: false,
writable: false,
}});
const overrideNavigatorProp = (prop, getter) => {{
try {{
const proto = Navigator && Navigator.prototype;
const desc = proto ? Object.getOwnPropertyDescriptor(proto, prop) : null;
if (desc && desc.configurable) {{
Object.defineProperty(proto, prop, {{ get: getter, configurable: false }});
return;
}}
}} catch (_) {{}}
try {{
Object.defineProperty(navigator, prop, {{ get: getter, configurable: false }});
}} catch (_) {{}}
}};
// --- language / languages ---
overrideNavigatorProp('language', () => NEW_LANGUAGE);
overrideNavigatorProp('languages', () => NEW_LANGUAGES);
// --- hardwareConcurrency ---
overrideNavigatorProp('hardwareConcurrency', () => NEW_HW_THREADS);
// --- deviceMemory ---
overrideNavigatorProp('deviceMemory', () => NEW_DEVICE_MEMORY);
// --- platform ---
overrideNavigatorProp('platform', () => 'Win32');
// --- userAgent ---
overrideNavigatorProp('userAgent', () => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36');
// --- timezoneOffset ---
const origGetTimezoneOffset = Date.prototype.getTimezoneOffset;
Object.defineProperty(Date.prototype, 'getTimezoneOffset', {{
value: function () {{ return NEW_TZ_OFFSET; }},
writable: false,
configurable: false,
}});
// (Optional) stash originals, non-enumerable
try {{
Object.defineProperty(window, '__origFingerprintFns__', {{
value: Object.freeze({{
fillText: origFillText,
getTimezoneOffset: origGetTimezoneOffset,
}}),
writable: false,
configurable: false,
enumerable: false
}});
}} catch (_) {{}}
}})();
"""
with SB(
uc=True,
incognito=True,
locale_code="en",
user_data_dir=user_data_dir,
proxy=proxy,
) as sb:
# Enable the Network domain
sb.driver.execute_cdp_cmd("Network.enable", {})
# Override UA + UA-CH
sb.driver.execute_cdp_cmd("Network.setUserAgentOverride", {
"userAgent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/138.0.0.0 Safari/537.36"
),
"userAgentMetadata": {
"brands": [
{"brand": "Chromium", "version": "138"},
{"brand": "Google Chrome", "version": "138"},
{"brand": "Not)A;Brand", "version": "8"}
],
"platform": "Windows", # → sec-ch-ua-platform: "Windows"
"platformVersion": "10.0.0",
"architecture": "x86",
"model": "", # not sent on desktop
"mobile": False, # → sec-ch-ua-mobile: ?0
"bitness": "64",
}
})
sb.open(url)
do_some_magic()
It works fine in IDE, although I get detected in docker container, docker container works when I use cdp mode, it's not being detected although I can't spoof all of those properties with cdp mode as it uses driver. Is there any cdp mode way of monkey patching those properties before website is being loaded?
Beta Was this translation helpful? Give feedback.
All reactions
There are fingerprints that you can change in CDP Mode, such as timezone and geolocation. Example:
https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/raw_timezone_sb.py
Docker leaves hardcoded fingerprints that can't be changed, so I wouldn't use that if you need stealth.
Replies: 2 comments 2 replies
-
There are fingerprints that you can change in CDP Mode, such as timezone and geolocation. Example:
https://github.com/seleniumbase/SeleniumBase/blob/master/examples/cdp_mode/raw_timezone_sb.py
Docker leaves hardcoded fingerprints that can't be changed, so I wouldn't use that if you need stealth.
Beta Was this translation helpful? Give feedback.
All reactions
-
my problem is canvas fingerprint which is being sent as payload. What I'm trying to do is to override headers and payload with on paused event but I can't make it work, looks like script is stale after log print("New body preview:", preview):
FINGERPRINT = generate_fingerprint()
def _to_header_entries(hdict):
return [mycdp.fetch.HeaderEntry(name=k, value=v) for k, v in hdict.items()]
async def on_request_paused(evt: mycdp.fetch.RequestPaused, tab):
req = evt.request
# Build headers for ALL requests
h = dict(req.headers or {})
h["User-Agent"] = UA
h["sec-ch-ua"] = '"Chromium";v="138", "Google Chrome";v="138", "Not A(Brand";v="8"'
h["sec-ch-ua-mobile"] = "?0"
h["sec-ch-ua-platform"] = '"Windows"'
h["sec-ch-ua-arch"] = '"x86"'
h["sec-ch-ua-bitness"] = '"64"'
h["sec-ch-ua-full-version"] = '"138.0.0.0"'
# Let Chromium compute content-length
h.pop("Content-Length", None)
h.pop("content-length", None)
post_data = None
try:
if req.method == "POST" and "someText" in req.url:
post_data = json.dumps({"ui": FINGERPRINT})
print("Intercepted POST to", req.url)
preview = (post_data[:140] + "...") if len(post_data) > 140 else post_data
print("New body preview:", preview)
tab.feed_cdp(
mycdp.fetch.continue_request(
request_id=evt.request_id,
post_data=post_data,
headers=[mycdp.fetch.HeaderEntry(name=k, value=v) for k, v in h.items()],
)
)
except Exception as e:
try:
tab.feed_cdp(mycdp.fetch.continue_request(request_id=evt.request_id))
finally:
print("continue_request failed:", repr(e))
sb.activate_cdp_mode("about:blank")
sb.cdp.add_handler(mycdp.fetch.RequestPaused, on_request_paused)
tab = sb.cdp.page
loop = sb.cdp.get_event_loop()
async def cdp_setup():
await tab.send(mycdp.target.set_discover_targets(discover=True))
await tab.send(
mycdp.target.set_auto_attach(auto_attach=True, wait_for_debugger_on_start=False, flatten=True))
await tab.send(mycdp.network.enable())
await tab.send(mycdp.network.set_bypass_service_worker(bypass=True))
await tab.send(mycdp.fetch.enable(patterns=[
mycdp.fetch.RequestPattern(url_pattern="*", request_stage=mycdp.fetch.RequestStage.REQUEST)
]))
loop.run_until_complete(cdp_setup())
sb.cdp.open(url)
Beta Was this translation helpful? Give feedback.
All reactions
-
You can try working with the existing examples that use mycdp.fetch.RequestPaused
...
https://github.com/search?q=repo%3Aseleniumbase%2FSeleniumBase%20mycdp.fetch.RequestPaused&type=code
...and the ones that use mycdp.fetch.continue_request
:
https://github.com/search?q=repo%3Aseleniumbase%2FSeleniumBase+mycdp.fetch.continue_request&type=code
I'm no CDP expert, but SeleniumBase does provide the tools for using CDP if you understand how it works and how to formulate the proper CDP commands/actions for what you're trying to accomplish, assuming that those actions are allowed by CDP.
Beta Was this translation helpful? Give feedback.
All reactions
-
thanks, will try to dig into cdp
Beta Was this translation helpful? Give feedback.