SHARE
    TWEET
    Najeebsk

    EXTRACTOR-LINKS.pyw

    May 2nd, 2025
    490
    0
    Never
    Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
    Python 7.88 KB | None | 0 0
    1. import tkinter as tk
    2. from tkinter import messagebox, filedialog
    3. from tkinter import scrolledtext
    4. from tkinter import INSERT
    5. from PIL import Image, ImageTk
    6. import requests
    7. from bs4 import BeautifulSoup
    8. from urllib.parse import urljoin, urlparse
    9. import re
    10. import os
    11. import io # Required for image preview
    12. # Define image extensions for preview
    13. image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
    14. def extract_links(url, exclude_words=None):
    15. try:
    16. response = requests.get(url)
    17. response.raise_for_status()
    18. url = response.url # Update URL to final location after redirect
    19. soup = BeautifulSoup(response.text, 'html.parser')
    20. links = []
    21. for link in soup.find_all('a', href=True):
    22. full_link = urljoin(url, link['href'])
    23. if exclude_words and any(excluded_word in full_link for excluded_word in exclude_words):
    24. continue
    25. links.append(full_link)
    26. return links
    27. except requests.exceptions.RequestException as e:
    28. print(f"Error fetching the URL {url}: {e}")
    29. return []
    30. def extract_username(url):
    31. parsed_url = urlparse(url)
    32. path_parts = parsed_url.path.split('/')
    33. username_patterns = [
    34. r'/user/(\w+)',
    35. r'/profile/(\w+)',
    36. r'/(\w+)$'
    37. ]
    38. for pattern in username_patterns:
    39. match = re.search(pattern, parsed_url.path)
    40. if match:
    41. return match.group(1)
    42. for part in reversed(path_parts):
    43. if part:
    44. return part
    45. return None
    46. def clean_and_log_links(links, filename, mode='a'):
    47. usernames = [extract_username(link) for link in links]
    48. with open(filename, mode) as f:
    49. for link, username in zip(links, usernames):
    50. cleaned_link = re.sub(r',user$', '', link)
    51. f.write(f"{cleaned_link}\n")
    52. def overwrite_links_file(filename):
    53. if os.path.exists(filename):
    54. os.remove(filename)
    55. print(f"Cleared existing content in {filename}")
    56. def extract_and_log_links():
    57. urls = text_input.get("1.0", "end-1c").split()
    58. exclude_words_input = exclude_input.get("1.0", "end-1c").splitlines()
    59. if not urls:
    60. messagebox.showwarning("Input Error", "Please enter at least one URL.")
    61. return
    62. exclude_words = [word.strip() for word in exclude_words_input if word.strip()]
    63. overwrite_links_file('links.txt')
    64. all_links = []
    65. for i, url in enumerate(urls):
    66. base_url = url.split('?')[0]
    67. print(f"Extracting links from: {base_url}")
    68. links = extract_links(base_url, exclude_words)
    69. all_links.extend(links)
    70. if links:
    71. mode = 'w' if i == 0 else 'a'
    72. clean_and_log_links(links, 'links.txt', mode)
    73. else:
    74. messagebox.showerror("Error", f"No links found or an error occurred for {url}")
    75. messagebox.showinfo("Done", f"Total links extracted and cleaned: {len(all_links)}")
    76. display_log()
    77. def display_log():
    78. with open('links.txt', 'r') as file:
    79. log_content = file.read()
    80. log_text.delete("1.0", tk.END)
    81. log_text.insert(tk.INSERT, log_content)
    82. def save_as_settings():
    83. urls = text_input.get("1.0", "end-1c").strip()
    84. exclude_words = exclude_input.get("1.0", "end-1c").strip()
    85. file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
    86. if not file_path:
    87. return
    88. with open(file_path, "w") as file:
    89. file.write(f"URLs:\n{urls}\n")
    90. if exclude_words:
    91. file.write(f"Exclude Words:\n{exclude_words}\n")
    92. messagebox.showinfo("Settings Saved", f"Your settings have been saved to {file_path}.")
    93. def load_settings():
    94. file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")])
    95. if not file_path or not os.path.exists(file_path):
    96. messagebox.showwarning("Load Error", "The selected file does not exist.")
    97. return
    98. with open(file_path, "r") as file:
    99. content = file.read().splitlines()
    100. if len(content) >= 2:
    101. urls = "\n".join(content[1:content.index("Exclude Words:")]).strip() if "Exclude Words:" in content else "\n".join(content[1:]).strip()
    102. exclude_words = "\n".join(content[content.index("Exclude Words:") + 1:]).strip() if "Exclude Words:" in content else ""
    103. text_input.delete("1.0", tk.END)
    104. exclude_input.delete("1.0", tk.END)
    105. text_input.insert(tk.END, urls)
    106. exclude_input.insert(tk.END, exclude_words)
    107. messagebox.showinfo("Settings Loaded", "Your settings have been loaded successfully.")
    108. def preview_image_popup():
    109. try:
    110. line_index = log_text.index(tk.INSERT).split(".")[0]
    111. selected_url = log_text.get(f"{line_index}.0", f"{line_index}.end").strip()
    112. if not any(selected_url.lower().endswith(ext) for ext in image_exts):
    113. raise Exception("Selected link is not an image.")
    114. response = requests.get(selected_url, timeout=10)
    115. image = Image.open(io.BytesIO(response.content))
    116. popup = tk.Toplevel(root)
    117. popup.title("Image Preview")
    118. popup.geometry("600x600")
    119. img_resized = image.resize((500, 500), Image.LANCZOS)
    120. img_tk = ImageTk.PhotoImage(img_resized)
    121. label = tk.Label(popup, image=img_tk)
    122. label.image = img_tk # Keep a reference to prevent garbage collection
    123. label.pack()
    124. except Exception as e:
    125. messagebox.showerror("Preview Error", str(e))
    126. root = tk.Tk()
    127. root.title("Najeeb Shah Khan Link Extractor")
    128. root.configure(bg="#2c3e50")
    129. frame = tk.Frame(root, padx=10, pady=10)
    130. frame.pack(padx=10, pady=10)
    131. lbl = tk.Label(frame, text="Enter URLs (one per line):")
    132. lbl.pack(anchor="w")
    133. text_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
    134. text_input.pack(pady=5)
    135. exclude_lbl = tk.Label(frame, text="Enter words to exclude (one per line) [Optional]:")
    136. exclude_lbl.pack(anchor="w")
    137. exclude_input = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=4)
    138. exclude_input.pack(pady=5)
    139. # Context menus
    140. context_menu = tk.Menu(root, tearoff=0)
    141. context_menu.add_command(label="Copy", command=lambda: text_input.event_generate("<<Copy>>"))
    142. context_menu.add_command(label="Paste", command=lambda: text_input.event_generate("<<Paste>>"))
    143. context_menu_exclude = tk.Menu(root, tearoff=0)
    144. context_menu_exclude.add_command(label="Copy", command=lambda: exclude_input.event_generate("<<Copy>>"))
    145. context_menu_exclude.add_command(label="Paste", command=lambda: exclude_input.event_generate("<<Paste>>"))
    146. # Bindings for context menus
    147. text_input.bind("<Button-3>", lambda e: context_menu.tk_popup(e.x_root, e.y_root))
    148. exclude_input.bind("<Button-3>", lambda e: context_menu_exclude.tk_popup(e.x_root, e.y_root))
    149. # Buttons
    150. button_frame = tk.Frame(root, bg="#2c3e50")
    151. button_frame.pack(pady=5)
    152. tk.Button(button_frame, text="Extract Links", command=extract_and_log_links, bg="#3498db", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
    153. tk.Button(button_frame, text="Save Settings As", command=save_as_settings, bg="#27ae60", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
    154. tk.Button(button_frame, text="Load Settings", command=load_settings, bg="#e74c3c", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
    155. tk.Button(button_frame, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", fg="white", font=("Arial", 12, "bold"), width=12).pack(side=tk.LEFT, padx=5)
    156. # Log output
    157. log_label = tk.Label(frame, text="Log Output:")
    158. log_label.pack(anchor="w")
    159. log_text = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=120, height=10)
    160. log_text.pack(pady=5)
    161. root.mainloop()
    Advertisement
    Add Comment
    Please, Sign In to add comment
    Public Pastes
    We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
    Not a member of Pastebin yet?
    Sign Up, it unlocks many cool features!

    AltStyle によって変換されたページ (->オリジナル) /