Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit bc1d944

Browse files
committed
images in the examples working
1 parent e6fee97 commit bc1d944

File tree

3 files changed

+42
-14
lines changed

3 files changed

+42
-14
lines changed

‎.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ env/
3333

3434
# Project specific
3535
problems/
36+
leetcode_last_solution.py
37+
debug_soup.html
38+
debug_content_html.txt
39+
git_setup_commands.txt

‎leetcode_scraper.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,20 @@ def _process_problem_data(self, question):
9797
}
9898

9999
# Process content with BeautifulSoup to extract description, examples, and constraints
100+
import os
100101
content_html = question.get('content', '')
102+
debug_dir = os.path.dirname(os.path.abspath(__file__))
103+
debug_content_path = os.path.join(debug_dir, 'debug_content_html.txt')
104+
debug_soup_path = os.path.join(debug_dir, 'debug_soup.html')
105+
# Write content_html to a debug file for inspection
106+
with open(debug_content_path, 'w', encoding='utf-8') as f:
107+
f.write(content_html)
108+
print(f"[DEBUG] Wrote content_html to {debug_content_path}")
101109
soup = BeautifulSoup(content_html, 'html.parser')
102-
110+
# Write soup prettified HTML to a debug file for inspection
111+
with open(debug_soup_path, 'w', encoding='utf-8') as f:
112+
f.write(soup.prettify())
113+
print(f"[DEBUG] Wrote soup HTML to {debug_soup_path}")
103114
# Get description (text before the first <strong>Example</strong>)
104115
description = []
105116
current_element = soup.find()
@@ -113,14 +124,23 @@ def _process_problem_data(self, question):
113124

114125
problem_data['description'] = '\n'.join([d for d in description if d])
115126

116-
# Extract examples
127+
# Extract examples and attach the closest preceding image to each
117128
examples = []
118129
example_blocks = soup.find_all('pre')
119130
for i, example in enumerate(example_blocks, 1):
120-
examples.append({
131+
example_dict={
121132
'example_num': i,
122-
'example_text': example.get_text().strip()
123-
})
133+
'example_text': example.get_text().strip(),
134+
'images': []
135+
}
136+
# Find the closest preceding <img> tag before this <pre>
137+
prev = example.previous_element
138+
while prev:
139+
if getattr(prev, 'name', None) == 'img' and prev.has_attr('src'):
140+
example_dict['images'].append(prev['src'])
141+
break
142+
prev = prev.previous_element
143+
examples.append(example_dict)
124144
problem_data['examples'] = examples
125145

126146
# Extract constraints
@@ -212,18 +232,22 @@ def scrape_problem_list(self, limit=10):
212232

213233
return problem_list
214234

215-
if __name__ == "__main__":
216-
scraper = LeetCodeScraper()
235+
# if __name__ == "__main__":
236+
# scraper = LeetCodeScraper()
217237

218238
# Option 1: Scrape a specific problem
219239
# problem_data = scraper.scrape_problem("two-sum")
220240
# print(json.dumps(problem_data, indent=2))
221241

242+
if __name__ == "__main__":
243+
scraper = LeetCodeScraper()
244+
problem_data = scraper.scrape_problem("linked-list-cycle")
245+
print(json.dumps(problem_data, indent=2))
222246
# Option 2: Scrape multiple problems from the list
223-
problem_list = scraper.scrape_problem_list(limit=5)
247+
# problem_list = scraper.scrape_problem_list(limit=5)
224248

225-
# Add a delay between requests to avoid being blocked
226-
for problem in problem_list:
227-
print(f"Scraping problem: {problem['title']} ({problem['slug']})")
228-
scraper.scrape_problem(problem['slug'])
229-
time.sleep(2) # Wait 2 seconds between requests
249+
# # Add a delay between requests to avoid being blocked
250+
# for problem in problem_list:
251+
# print(f"Scraping problem: {problem['title']} ({problem['slug']})")
252+
# scraper.scrape_problem(problem['slug'])
253+
# time.sleep(2) # Wait 2 seconds between requests

‎requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
requests==2.28.2
1+
requests>=2.25.1
22
beautifulsoup4==4.12.2
33
lxml==4.9.2

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /