SCRAPE-AND-DOWNLOAD.pyw

Najeebsk

Apr 16th, 2025 (edited)

599

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 36.85 KB | None | 0 0

raw download clone embed print report

import tkinter as tk
from tkinter import END
from tkinter import ttk, messagebox, filedialog
import webbrowser
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import yt_dlp
import subprocess
from PIL import Image, ImageTk
import io
import threading
import shutil
# ------------------------------
from tkinter import scrolledtext
stop_download_flag = False
#================ADD-IMAGE-ICON=================
import sys
def resource_path(relative_path):
""" Get the absolute path to the resource, works for PyInstaller. """
if getattr(sys, '_MEIPASS', False):
return os.path.join(sys._MEIPASS, relative_path)
return os.path.join(os.path.abspath("."), relative_path)
# Use this function to load files:
#splash_image = resource_path("splash-1.png")
icon_path = resource_path("D.ico")
#================ADD-IMAGE-ICON=================
# Register browsers with full path
chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
firefox_path = r"C:\Program Files\Mozilla Firefox\firefox.exe"
if os.path.exists(chrome_path):
webbrowser.register("chrome", None, webbrowser.BackgroundBrowser(chrome_path))
if os.path.exists(firefox_path):
webbrowser.register("firefox", None, webbrowser.BackgroundBrowser(firefox_path))
# === Main Window ===
window = tk.Tk()
window.title("NAJEEB SHAH KHAN SCRAPE WEB & Image Search Tool & Media Downloader")
window.geometry("965x700")
#window.configure(bg="#2c3e50")
window.iconbitmap(icon_path)
notebook = ttk.Notebook(window)
tab1 = ttk.Frame(notebook)
tab2 = ttk.Frame(notebook)
notebook.add(tab1, text="Image Search Tool")
notebook.add(tab2, text="Media Downloader")
notebook.pack(expand=True, fill="both")
# ====================
# === Tab 1 Content ===
# ====================
dark_mode_var = tk.BooleanVar()
keyword_var = tk.StringVar()
site_var = tk.StringVar()
extra_format_var = tk.StringVar()
query_preview_var = tk.StringVar()
browser_var = tk.StringVar(value="default")
format_vars = {
"jpg": tk.BooleanVar(value=True),
"png": tk.BooleanVar(value=True),
"gif": tk.BooleanVar(),
"bmp": tk.BooleanVar(),
"webp": tk.BooleanVar(),
}
def update_query_preview():
selected_formats = [f for f, var in format_vars.items() if var.get()]
custom_format = extra_format_var.get().strip()
keyword = keyword_var.get().strip()
site = site_var.get().strip()
all_formats = selected_formats.copy()
if custom_format:
all_formats.append(custom_format)
filetype_str = ' | '.join(all_formats) if all_formats else "jpg | png"
query = 'intitle:"index of"'
if keyword:
query += f' ({keyword})'
query += f' ({filetype_str})'
if site:
query += f' site:{site}'
query_preview_var.set(query)
def perform_search():
query = query_preview_var.get()
if not query:
result_text.delete("1.0", tk.END)
result_text.insert(tk.END, "⚠️ Query is empty.")
return
url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
result_text.delete("1.0", tk.END)
result_text.insert(tk.END, f"🔍 Google Search URL:\n{url}")
browser = browser_var.get()
try:
if browser == "chrome":
webbrowser.get("chrome").open(url)
elif browser == "firefox":
webbrowser.get("firefox").open(url)
else:
webbrowser.open(url)
except webbrowser.Error:
result_text.insert(tk.END, f"\n⚠️ Failed to open {browser}, using default browser instead.")
webbrowser.open(url)
def toggle_dark_mode():
dark = dark_mode_var.get()
bg = "#1e1e1e" if dark else "#ffffff"
fg = "#ffffff" if dark else "#000000"
widgets = [tab1, format_frame, keyword_label, keyword_entry,
site_label, site_entry, extra_label, extra_entry,
preview_label, preview_entry, search_button, dark_mode_check,
browser_label, result_label, result_text]
for widget in widgets:
try:
widget.config(bg=bg, fg=fg)
except:
pass
keyword_entry.config(insertbackground=fg)
site_entry.config(insertbackground=fg)
extra_entry.config(insertbackground=fg)
result_text.config(insertbackground=fg)
# Tab 1 Layout
tk.Label(tab1, text="Select Image Formats:", bg="#ffffff").pack(anchor="w", padx=10, pady=5)
format_frame = tk.Frame(tab1, bg="#ffffff")
format_frame.pack(anchor="w", padx=20)
for fmt, var in format_vars.items():
cb = tk.Checkbutton(format_frame, text=fmt, variable=var, bg="#ffffff", command=update_query_preview)
cb.pack(side="left", padx=5)
extra_label = tk.Label(tab1, text="Type any extra format or word (e.g. tif, raw):", bg="#ffffff")
extra_label.pack(anchor="w", padx=10, pady=5)
extra_entry = tk.Entry(tab1, textvariable=extra_format_var, width=60, bg="#ffffff", fg="#000000")
extra_entry.pack(padx=10)
extra_entry.bind("<KeyRelease>", lambda e: update_query_preview())
keyword_label = tk.Label(tab1, text="Enter Keywords (e.g. wallpaper | backgrounds):", bg="#ffffff")
keyword_label.pack(anchor="w", padx=10, pady=5)
keyword_entry = tk.Entry(tab1, textvariable=keyword_var, width=60, bg="#ffffff", fg="#000000")
keyword_entry.pack(padx=10)
keyword_entry.bind("<KeyRelease>", lambda e: update_query_preview())
site_label = tk.Label(tab1, text="Optional Site Filter (e.g. .edu, example.com):", bg="#ffffff")
site_label.pack(anchor="w", padx=10, pady=5)
site_entry = tk.Entry(tab1, textvariable=site_var, width=60, bg="#ffffff", fg="#000000")
site_entry.pack(padx=10)
site_entry.bind("<KeyRelease>", lambda e: update_query_preview())
preview_label = tk.Label(tab1, text="🔎 Search Query Preview:", bg="#ffffff", font=("Arial", 10, "bold"))
preview_label.pack(anchor="w", padx=10, pady=5)
preview_entry = tk.Entry(tab1, textvariable=query_preview_var, width=80, state="readonly", bg="#eeeeee")
preview_entry.pack(padx=10, pady=5)
browser_label = tk.Label(tab1, text="Select Browser:", bg="#ffffff")
browser_label.pack(anchor="w", padx=10, pady=5)
browser_frame = tk.Frame(tab1, bg="#ffffff")
browser_frame.pack(anchor="w", padx=20)
tk.Radiobutton(browser_frame, text="Default", variable=browser_var, value="default", bg="#ffffff", command=update_query_preview).pack(side="left", padx=10)
tk.Radiobutton(browser_frame, text="Chrome", variable=browser_var, value="chrome", bg="#ffffff", command=update_query_preview).pack(side="left", padx=10)
tk.Radiobutton(browser_frame, text="Firefox", variable=browser_var, value="firefox", bg="#ffffff", command=update_query_preview).pack(side="left", padx=10)
search_button = tk.Button(tab1, text="Search on Google", command=perform_search)
search_button.pack(pady=10)
dark_mode_check = tk.Checkbutton(tab1, text="Dark Mode", variable=dark_mode_var, command=toggle_dark_mode, bg="#ffffff")
dark_mode_check.pack()
result_label = tk.Label(tab1, text="Generated Google Search URL:", bg="#ffffff")
result_label.pack(anchor="w", padx=10, pady=5)
result_text = tk.Text(tab1, height=4, width=80, wrap="word", bg="#f8f8f8")
result_text.pack(padx=10, pady=5)
update_query_preview()
# ====================
# === Tab 2 Content ===
# ====================
media_urls = []
special_sites = ['youtube.com', 'youtu.be', 'facebook.com', 'fb.watch', 'tiktok.com', 'instagram.com']
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']
video_exts = ['.mp4', '.webm', '.ogg', '.mov', '.avi', '.mkv', '.flv', '.3gp', '.wmv', '.m3u', '.m3u8']
stop_download_flag = False
def is_special_site(url):
return any(domain in url for domain in special_sites)
def browse_url_file():
file_path = filedialog.askopenfilename(title="Open URL File", filetypes=[("Text files", "*.txt")])
if file_path:
with open(file_path, 'r') as f:
for line in f:
url = line.strip()
if url and url not in media_urls:
media_urls.append(url)
result_box.insert(tk.END, url + "\n")
def save_urls_to_file():
file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt")])
if file_path:
with open(file_path, 'w') as f:
f.write(result_box.get("1.0", tk.END).strip())
messagebox.showinfo("Saved", f"URLs saved to {file_path}")
def scrape_normal_site(url):
found_urls = set()
try:
response = requests.get(url, timeout=10)
if response.status_code != 200:
return found_urls
soup = BeautifulSoup(response.text, 'html.parser')
for tag in soup.find_all(['img', 'video', 'source', 'a']):
src = tag.get('src') or tag.get('href')
if src:
full_url = urljoin(url, src)
parsed = urlparse(full_url)
ext = os.path.splitext(parsed.path)[1].lower()
if ext in image_exts + video_exts:
found_urls.add(full_url)
except Exception:
pass
return found_urls
def process_url():
url = url_entry.get().strip()
if not url:
messagebox.showwarning("Input Error", "Please enter a valid URL.")
return
media_urls.clear()
result_box.delete("1.0", tk.END)
try:
if is_special_site(url):
ydl_opts = {
'quiet': True,
'skip_download': True,
'force_generic_extractor': False
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
if 'entries' in info:
for entry in info['entries']:
media_urls.append(entry['webpage_url'])
result_box.insert(tk.END, entry['webpage_url'] + "\n")
else:
media_urls.append(info['webpage_url'])
result_box.insert(tk.END, info['webpage_url'] + "\n")
else:
scraped = scrape_normal_site(url)
media_urls.extend(scraped)
for media_url in scraped:
result_box.insert(tk.END, media_url + "\n")
if not media_urls:
messagebox.showinfo("Info", "No media URLs found.")
else:
messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")
except Exception as e:
messagebox.showerror("Error", str(e))
def download_media(url, save_path):
try:
if is_special_site(url):
ytdlp_path = shutil.which("yt-dlp") or r"C:\Windows\yt-dlp.exe"
command = [
ytdlp_path,
"-f", "best",
"--no-playlist",
"--extractor-args", "youtube:player_client=web",
"-o", save_path,
url
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
raise Exception(result.stderr.strip())
else:
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(save_path, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
except Exception as e:
messagebox.showerror("Download Error", f"Failed to download:\n{url}\n{str(e)}")
def download_selected_line():
try:
line_index = result_box.index(tk.INSERT).split(".")[0]
selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
if not selected_url:
raise Exception("No line selected.")
folder = filedialog.askdirectory(title="Select Folder to Save File")
if not folder:
return
parsed = urlparse(selected_url)
filename = os.path.basename(parsed.path)
if not filename:
filename = "downloaded_file"
save_path = os.path.join(folder, filename)
threading.Thread(target=threaded_download, args=(selected_url, save_path), daemon=True).start()
except Exception as e:
messagebox.showerror("Error", str(e))
def download_selected():
selected_urls = result_box.get("1.0", tk.END).strip().splitlines()
if not selected_urls:
messagebox.showwarning("Selection Error", "No URLs to download.")
return
selected = filedialog.askdirectory(title="Select Folder to Save Files")
if not selected:
return
for url in selected_urls:
parsed = urlparse(url)
filename = os.path.basename(parsed.path)
if not filename:
filename = "downloaded_file.mp4"
save_path = os.path.join(selected, filename)
download_media(url, save_path)
messagebox.showinfo("Download Complete", f"Downloaded {len(selected_urls)} media files.")
def threaded_download(url, save_path):
global stop_download_flag
stop_download_flag = False
try:
if is_special_site(url):
ytdlp_path = shutil.which("yt-dlp") or r"C:\Windows\yt-dlp.exe"
command = [
ytdlp_path,
"-f", "mp4",
"--no-part", # Saves directly as .mp4
"--downloader", "ffmpeg",
"--downloader-args", "ffmpeg_i:-movflags +faststart",
"-o", save_path,
url
]
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
while proc.poll() is None:
if stop_download_flag:
proc.kill()
break
else:
response = requests.get(url, stream=True, timeout=10)
if response.status_code == 200:
with open(save_path, 'wb') as f:
for chunk in response.iter_content(1024 * 1024): # 1MB
if stop_download_flag:
break
if chunk:
f.write(chunk)
if stop_download_flag:
fix_partial_video(save_path) # Try to repair it
messagebox.showinfo("Download Stopped", f"Download was stopped by user.\nSaved: {save_path}")
else:
messagebox.showinfo("Download Complete", f"Downloaded successfully to:\n{save_path}")
except Exception as e:
messagebox.showerror("Download Error", str(e))
def stop_download():
global stop_download_flag
stop_download_flag = True
def fix_partial_video(input_path):
try:
if not os.path.exists(input_path) or not input_path.lower().endswith(".mp4"):
return
output_path = input_path.replace(".mp4", "_fixed.mp4")
ffmpeg_path = shutil.which("ffmpeg") or r"C:\Program Files\ffmpeg\bin\ffmpeg.exe"
# Try quick remux
command = [
ffmpeg_path,
"-y",
"-i", input_path,
"-c", "copy",
"-movflags", "+faststart",
output_path
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# Fallback to re-encode if remux fails or small file
if result.returncode != 0 or not os.path.exists(output_path) or os.path.getsize(output_path) < 1024 * 1024:
print("[INFO] Remux failed or file too small, retrying with re-encode...")
command = [
ffmpeg_path,
"-y",
"-i", input_path,
"-preset", "ultrafast",
output_path
]
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# Replace original file if fixed
if os.path.exists(output_path):
os.remove(input_path)
os.rename(output_path, input_path)
except Exception as e:
print(f"[FFmpeg Fix Error] {e}")
def scrape_all_links(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for tag in soup.find_all('a', href=True):
href = tag['href']
full_url = urljoin(url, href)
parsed_url = urlparse(full_url)
if parsed_url.scheme in ['http', 'https']:
links.append(full_url)
return links
except requests.exceptions.RequestException as e:
messagebox.showerror("Network Error", f"Failed to scrape links: {e}")
return []
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {e}")
return []
def scrape_all_button():
url = url_entry.get().strip()
if not url:
messagebox.showwarning("Input Error", "Please enter a valid URL.")
return
result_box.delete("1.0", tk.END)
media_urls.clear()
all_links = scrape_all_links(url)
media_urls.extend(all_links)
for link in all_links:
result_box.insert(tk.END, link + "\n")
messagebox.showinfo("Done", f"{len(all_links)} total link(s) scraped.")
def open_in_vlc():
line_index = result_box.index(tk.INSERT).split(".")[0]
selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
if not selected_url:
messagebox.showwarning("No Selection", "Select a valid media URL.")
return
#vlc_path = shutil.which("vlc")
vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
if not vlc_path:
messagebox.showerror("VLC Error", "VLC is not installed or not found in PATH.")
return
try:
subprocess.Popen([vlc_path, selected_url])
except Exception as e:
messagebox.showerror("VLC Error", f"Could not open VLC: {e}")
def preview_image_popup():
try:
line_index = result_box.index(tk.INSERT).split(".")[0]
selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
if not selected_url.lower().endswith(tuple(image_exts)):
messagebox.showerror("Preview Error", "Selected link is not an image.")
return
response = requests.get(selected_url, timeout=10)
if response.status_code != 200:
messagebox.showerror("Preview Error", "Failed to load image.")
return
image = Image.open(io.BytesIO(response.content))
popup = tk.Toplevel(window)
popup.title("Image Preview")
popup.geometry("600x600")
img_resized = image.resize((500, 500), Image.ANTIALIAS)
img_tk = ImageTk.PhotoImage(img_resized)
label = tk.Label(popup, image=img_tk)
label.image = img_tk
label.pack()
except Exception as e:
messagebox.showerror("Preview Error", str(e))
def load_m3u_file():
file_path = filedialog.askopenfilename(title="Open M3U File", filetypes=[("M3U/M3U8 Files", "*.m3u *.m3u8")])
if file_path:
result_box.delete("1.0", tk.END)
media_urls.clear()
with open(file_path, 'r', encoding="utf-8", errors="ignore") as f:
for line in f:
url = line.strip()
if url and url.startswith("http"):
media_urls.append(url)
result_box.insert(tk.END, url + "\n")
messagebox.showinfo("Loaded", f"{len(media_urls)} media URLs loaded from playlist.")
def load_online_m3u():
url = url_entry.get().strip()
if not url.lower().endswith((".m3u", ".m3u8")):
messagebox.showwarning("URL Error", "Please enter a valid .m3u or .m3u8 URL.")
return
try:
response = requests.get(url, timeout=10)
if response.status_code != 200:
raise Exception("Unable to fetch playlist.")
result_box.delete("1.0", tk.END)
media_urls.clear()
for line in response.text.splitlines():
line = line.strip()
if line and line.startswith("http"):
media_urls.append(line)
result_box.insert(tk.END, line + "\n")
messagebox.showinfo("Online M3U Loaded", f"{len(media_urls)} stream(s) loaded.")
except Exception as e:
messagebox.showerror("Error", str(e))
def scrape_xtream_m3u_url():
url = url_entry.get().strip()
if not url or "get.php" not in url:
messagebox.showwarning("Input Error", "Please enter a valid Xtream M3U URL.")
return
try:
headers = {
"User-Agent": "VLC/3.0.18 LibVLC/3.0.18"
}
response = requests.get(url, headers=headers, timeout=15)
if response.status_code == 404:
raise Exception("404 Not Found — the playlist URL might be wrong or expired.")
if response.status_code != 200:
raise Exception(f"Failed to fetch playlist. Status code: {response.status_code}")
content = response.text
if "#EXTM3U" not in content:
raise Exception("Invalid playlist. No M3U content found.")
result_box.delete("1.0", tk.END)
media_urls.clear()
for line in content.splitlines():
if line.startswith("http"):
media_urls.append(line)
result_box.insert(tk.END, line + "\n")
if media_urls:
messagebox.showinfo("Success", f"Scraped {len(media_urls)} stream URLs from Xtream playlist.")
else:
messagebox.showwarning("No URLs", "Playlist loaded, but no stream URLs found.")
except Exception as e:
messagebox.showerror("Error", str(e))
def search_urls():
query = search_entry.get().strip().lower()
if not query:
return
result_box.tag_remove("highlight", "1.0", tk.END)
lines = result_box.get("1.0", tk.END).splitlines()
for i, line in enumerate(lines, 1):
if query in line.lower():
result_box.tag_add("highlight", f"{i}.0", f"{i}.end")
result_box.tag_config("highlight", background="yellow", foreground="black")
def save_as_m3u():
"""
Saves the contents of the result box as an M3U/M3U8 playlist file.
"""
file_path = filedialog.asksaveasfilename(
defaultextension=".m3u",
filetypes=[("Text File", "*.txt"), ("M3U Playlist", "*.m3u"), ("M3U8 Playlist", "*.m3u8")]
)
if file_path:
try:
with open(file_path, 'w', encoding="utf-8") as f:
# Write content from the result box to the file
f.write(result_box.get("1.0", tk.END).strip())
messagebox.showinfo("Saved", f"Playlist saved to:\n{file_path}")
except Exception as e:
messagebox.showerror("Save Error", f"Failed to save playlist:\n{str(e)}")
def clear_url_field():
"""
Clears the URL entry field.
"""
url_entry.delete(0, tk.END)
def clear_result_box():
"""
Clears the result box and resets the media URLs list.
"""
result_box.delete("1.0", tk.END)
media_urls.clear()
def clear_search():
"""
Clears the search entry field and removes highlights from the result box.
"""
search_entry.delete(0, tk.END)
result_box.tag_remove("highlight", "1.0", tk.END)
def scrape_directory_media(url):
"""
Scrape media URLs from subdirectories of the given URL.
:param url: The base URL to start scraping from.
"""
global media_urls
result_box.delete("1.0", tk.END) # Fix: Replace END with tk.END
media_urls.clear()
def extract_directories(soup, base_url):
"""
Extract directory links from the page.
:param soup: BeautifulSoup object of the page.
:param base_url: Base URL to resolve relative paths.
:return: List of directory URLs.
"""
directories = []
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
if href.endswith("/") and not href.startswith("#"): # Subdirectory link
full_href = urljoin(base_url, href)
if full_href != base_url: # Avoid infinite loops
directories.append(full_href)
return directories
def extract_media_urls(soup, base_url):
"""
Extract media URLs from the page.
:param soup: BeautifulSoup object of the page.
:param base_url: Base URL to resolve relative paths.
:return: Set of media URLs.
"""
media_links = set()
for tag in soup.find_all(['img', 'video', 'source', 'a']):
src = tag.get('src') or tag.get('href')
if src:
full_url = urljoin(base_url, src)
parsed = urlparse(full_url)
ext = os.path.splitext(parsed.path)[1].lower()
if ext in image_exts + video_exts:
media_links.add(full_url)
return media_links
try:
# Fetch the base URL content
response = requests.get(url, timeout=10)
if response.status_code != 200:
messagebox.showerror("Error", f"Failed to fetch {url} (Status Code: {response.status_code})")
return
soup = BeautifulSoup(response.text, 'html.parser')
# Step 1: Extract all subdirectories
directories = extract_directories(soup, url)
# Step 2: Scrape media URLs from each subdirectory
found_media = False
for directory in directories:
try:
dir_response = requests.get(directory, timeout=10)
if dir_response.status_code == 200:
dir_soup = BeautifulSoup(dir_response.text, 'html.parser')
media_links = extract_media_urls(dir_soup, directory)
if media_links:
found_media = True
for media_url in media_links:
if media_url not in media_urls:
media_urls.append(media_url)
result_box.insert(tk.END, media_url + "\n") # Fix: Replace END with tk.END
except Exception as e:
print(f"Error scraping directory {directory}: {e}")
if not found_media:
messagebox.showinfo("Info", "No media URLs found in subdirectories.")
else:
messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")
except Exception as e:
messagebox.showerror("Error", str(e))
# Tab 2 Layout
tk.Label(tab2, text="Enter URL to Scrape Media:").pack(pady=5)
search_frame = tk.Frame(tab2)
search_frame.pack(pady=5)
search_entry = tk.Entry(search_frame, width=40)
search_entry.pack(side=tk.LEFT, padx=5)
tk.Button(search_frame, text="Search", command=search_urls, bg="lightblue").pack(side=tk.LEFT, padx=5)
url_entry = tk.Entry(search_frame, width=100)
url_entry.pack(pady=5)
frame_buttons = tk.Frame(tab2)
frame_buttons.pack(pady=5)
tk.Button(frame_buttons, text="Scrape Media", command=process_url, bg="lightgreen", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_buttons, text="Browse URL File", command=browse_url_file, bg="lightyellow", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_buttons, text="Download All URLs", command=download_selected, bg="lightblue", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_buttons, text="Download Selected URL", command=download_selected_line, bg="orange", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_buttons, text="Save URLs to File", command=save_urls_to_file, bg="lightgray", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_buttons, text="Stop Download", command=stop_download, bg="red", width=20).pack(side=tk.LEFT, padx=5)
frame_button = tk.Frame(tab2)
frame_button.pack(pady=5)
tk.Button(frame_button, text="Scrape All Links", command=scrape_all_button, bg="#e0c3fc", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_button, text="Open in VLC", command=open_in_vlc, bg="#c1f0c1", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_button, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_button, text="Load Online M3U", command=load_online_m3u, bg="#c9f2ff", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_button, text="Scrape Xtream M3U", command=scrape_xtream_m3u_url, bg="#fff0b3", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_button, text="Load M3U File", command=load_m3u_file, bg="#d0f0fd", width=20).pack(side=tk.LEFT, padx=5)
result_frame = tk.Frame(tab2)
result_frame.pack(pady=5)
scrollbar = tk.Scrollbar(result_frame)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
result_box = tk.Text(result_frame, height=30, width=124, yscrollcommand=scrollbar.set)
result_box.pack(side=tk.LEFT, fill=tk.BOTH)
scrollbar.config(command=result_box.yview)
frame_clear = tk.Frame(tab2)
frame_clear.pack(pady=5)
tk.Button(frame_clear, text="Save Result", command=save_as_m3u, bg="#a7ffcc", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_clear, text="Clear Search", command=clear_search, bg="lightgray").pack(side=tk.LEFT, padx=2)
tk.Button(frame_clear, text="Clear URL Field", command=clear_url_field, bg="#ffd580", width=20).pack(side=tk.LEFT, padx=5)
tk.Button(frame_clear, text="Clear Result Field", command=clear_result_box, bg="#ffb3b3", width=20).pack(side=tk.LEFT, padx=5)
# Add a button for scraping subdirectories
tk.Button(frame_clear, text="Scrape Subdirectories", command=lambda: scrape_directory_media(url_entry.get().strip()), bg="#ffcccb", width=20).pack(side=tk.LEFT, padx=5)
# ====================
# === Tab 3 Content ===
# ====================
tab3 = ttk.Frame(notebook)
notebook.add(tab3, text="Web Scraper")
notebook.pack(expand=True, fill="both")
class WebScraperGUI:
def __init__(self, root):
self.root = root
# Configure the style for ttk.Frame
self.style = ttk.Style()
self.style.configure("Background.TFrame", background="#336699") # Define a custom style
self.root.config(style="Background.TFrame") # Apply the style to the root frame
# URL Entry
self.url_label = ttk.Label(root, text="Enter URL:")
self.url_label.grid(column=0, row=0, sticky=tk.W, padx=10, pady=5)
self.url_entry = ttk.Entry(root, width=120)
self.url_entry.grid(column=1, row=0, columnspan=4, sticky=tk.W, padx=10, pady=5)
# Options
self.options_label = ttk.Label(root, text="Select Options:")
self.options_label.grid(column=0, row=1, sticky=tk.W, padx=10, pady=5)
# Checkboxes
self.check_var_html = tk.BooleanVar()
self.check_var_heading = tk.BooleanVar()
self.check_var_paragraph = tk.BooleanVar()
self.check_var_css = tk.BooleanVar()
self.check_var_table = tk.BooleanVar()
self.check_var_links = tk.BooleanVar()
self.check_var_files = tk.BooleanVar()
self.html_check = ttk.Checkbutton(root, text="Full HTML", variable=self.check_var_html)
self.html_check.grid(column=1, row=1, sticky=tk.W, padx=10, pady=5)
self.heading_check = ttk.Checkbutton(root, text="Headings", variable=self.check_var_heading)
self.heading_check.grid(column=2, row=1, sticky=tk.W, padx=10, pady=5)
self.paragraph_check = ttk.Checkbutton(root, text="Paragraphs", variable=self.check_var_paragraph)
self.paragraph_check.grid(column=3, row=1, sticky=tk.W, padx=10, pady=5)
self.css_check = ttk.Checkbutton(root, text="CSS", variable=self.check_var_css)
self.css_check.grid(column=4, row=1, sticky=tk.W, padx=10, pady=5)
self.table_check = ttk.Checkbutton(root, text="Tables", variable=self.check_var_table)
self.table_check.grid(column=1, row=2, sticky=tk.W, padx=10, pady=5)
self.links_check = ttk.Checkbutton(root, text="Links", variable=self.check_var_links)
self.links_check.grid(column=2, row=2, sticky=tk.W, padx=10, pady=5)
self.files_check = ttk.Checkbutton(root, text="Files", variable=self.check_var_files)
self.files_check.grid(column=3, row=2, sticky=tk.W, padx=10, pady=5)
# Result Text Field
self.result_label = ttk.Label(root, text="Scraped Content of Websites:")
self.result_label.grid(column=0, row=4, sticky=tk.W, padx=10, pady=5)
#self.result_text = scrolledtext.ScrolledText(root, width=110, height=33, wrap=tk.WORD)
self.result_text = scrolledtext.ScrolledText(root, width=116, height=33, wrap=tk.WORD, bg="#f0f0f0")
self.result_text.grid(column=0, row=5, columnspan=5)
# Scrape Button
self.scrape_button = ttk.Button(root, text="SCRAPE", command=self.scrape)
self.scrape_button.grid(column=4, row=4, columnspan=8, pady=10)
# Save Result Button
self.save_result_button = ttk.Button(root, text="Save Result", command=self.save_result, style='Red.TButton')
self.save_result_button.grid(column=2, row=4, columnspan=8, pady=10)
# Define style for the "Save Result" button
self.style.configure('Red.TButton', foreground='red')
def scrape(self):
url = self.url_entry.get()
if not url:
messagebox.showwarning("Input Error", "Please enter a valid URL.")
return
options = {
'html': self.check_var_html.get(),
'heading': self.check_var_heading.get(),
'paragraph': self.check_var_paragraph.get(),
'css': self.check_var_css.get(),
'table': self.check_var_table.get(),
'links': self.check_var_links.get(),
'files': self.check_var_files.get()
}
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
result = ""
if options['html']:
result += str(soup) + '\n\n'
if options['heading']:
headings = soup.find_all(re.compile('^h[1-6]$'))
result += "Headings:\n"
for heading in headings:
result += heading.text.strip() + '\n'
result += '\n'
if options['paragraph']:
paragraphs = soup.find_all('p')
result += "Paragraphs:\n"
for paragraph in paragraphs:
result += paragraph.text.strip() + '\n'
result += '\n'
if options['css']:
css_links = [link['href'] for link in soup.find_all('link', rel='stylesheet')]
result += "CSS Links:\n"
for css_link in css_links:
full_url = urljoin(url, css_link)
result += full_url + '\n'
result += '\n'
if options['table']:
tables = soup.find_all('table')
result += "Tables:\n"
for table in tables:
result += str(table) + '\n'
result += '\n'
if options['links']:
links = soup.find_all('a', href=True)
result += "Links:\n"
for link in links:
if link['href'].startswith('http'):
result += f"Text: {link.text.strip()}, URL: {link['href']}\n"
else:
full_url = urljoin(url, link['href'])
result += f"Text: {link.text.strip()}, URL: {full_url}\n"
result += '\n'
if options['files']:
file_links = [link['href'] for link in soup.find_all('a', href=True) if re.search(r'\.[^.]+$', link['href'])]
result += "File Links:\n"
for file_link in file_links:
full_url = urljoin(url, file_link)
result += full_url + '\n'
result += '\n'
self.result_text.delete(1.0, tk.END)
self.result_text.insert(tk.END, result)
except requests.exceptions.RequestException as e:
messagebox.showerror("Network Error", f"Failed to fetch URL: {e}")
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {e}")
def save_result(self):
result_text = self.result_text.get(1.0, tk.END)
if not result_text.strip():
messagebox.showwarning("Empty Result", "No content to save.")
return
file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt")])
if file_path:
try:
with open(file_path, "w", encoding="utf-8") as file:
file.write(result_text)
messagebox.showinfo("Success", f"Result saved to {file_path}")
except Exception as e:
messagebox.showerror("Save Error", f"Failed to save file: {e}")
# Initialize WebScraperGUI in Tab 3
web_scraper_gui = WebScraperGUI(tab3)
# Run
window.mainloop()

Add Comment

Please, Sign In to add comment

Public Pastes

✅ Method to get 13,000$ in 2 days
CSS | 48 min ago | 0.94 KB
✅ Free Money Exploit
CSS | 49 min ago | 0.94 KB
Untitled
1 hour ago | 10.48 KB
Untitled
3 hours ago | 11.62 KB
Untitled
5 hours ago | 16.92 KB
Untitled
7 hours ago | 20.31 KB
Untitled
11 hours ago | 6.74 KB
Untitled
13 hours ago | 8.95 KB

We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand

Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!