0

I'm trying to fetch URLs from a TikTok user page for a research purpose. It works fine with headless=false, but when I set it to headless=true, it triggers a captcha verification. Any ideas on how to fix this? I'm using playwright

import time
import random
from playwright.sync_api import sync_playwright
def fetch_tiktok_video_urls(profile_url, max_videos):
 video_urls = []
 try:
 with sync_playwright() as p:
 browser = p.chromium.launch_persistent_context(
 user_data_dir="./user_data", headless=False
 )
 page = browser.new_page()
 page.goto(profile_url)
 page.wait_for_load_state('load')
 # page.screenshot(path="tiktok_profile_screenshot.png")
 simulate_human_browsing(page)
 # page.screenshot(path="tiktok_profile_scroll.png")
 while len(video_urls) < max_videos:
 page.mouse.wheel(0, random.randint(200, 500))
 simulate_human_browsing(page)
 # page.screenshot(path="tiktok_profile_video_scroll.png")
 video_elements = page.locator("a[href*='/video/']")
 links = video_elements.evaluate_all("elements => elements.map(e => e.href)")
 for link in links:
 if link not in video_urls:
 video_urls.append(link)
 if len(video_urls) >= max_videos:
 break
 browser.close()
 except Exception as e:
 print(f"Error: {e}")
 return video_urls[:max_videos]
def simulate_human_browsing(page):
 # Random scrolling
 scroll_steps = random.randint(3, 8)
 for _ in range(scroll_steps):
 page.mouse.wheel(0, random.randint(200, 500))
 time.sleep(random.uniform(1.5, 3.5))
 # Random mouse movements
 for _ in range(random.randint(2, 5)):
 page.mouse.move(
 random.randint(0, 800), random.randint(0, 600),
 steps=random.randint(5, 15)
 )
 time.sleep(random.uniform(0.5, 1.5))
if __name__ == "__main__":
 tiktok_url = "https://www.tiktok.com/@nail_videos2024"
 urls = fetch_tiktok_video_urls(tiktok_url, max_videos=30)
 print("Fetched URLs:", urls)

I tried adding user agents like this:

browser = p.chromium.launch_persistent_context(
 user_data_dir="./user_data", headless=True,
 args=[
 "--no-sandbox", 
 '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 ]
 )
asked Dec 22, 2024 at 5:21
3
  • 2
    setting brand: 'Google Chrome' works for me in headless mode for these scenarios. Commented Dec 27, 2024 at 17:48
  • Thank you @HemChe I will check using your solution. Commented Jan 10, 2025 at 15:28
  • Related, possibly a dupe: Headless doesn't work using Playwright and BeautifulSoup 4 Commented Sep 11, 2025 at 20:27

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.