Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit a114904

Browse files
Checkpoint: Verified for amazon
1 parent 8afa5dd commit a114904

File tree

6 files changed

+5082
-7
lines changed

6 files changed

+5082
-7
lines changed

‎amazon/amazon-30-days/problems.csv

Lines changed: 205 additions & 0 deletions
Large diffs are not rendered by default.

‎amazon/amazon-all/problems.csv

Lines changed: 1855 additions & 0 deletions
Large diffs are not rendered by default.

‎amazon/amazon-more-than-six-months/problems.csv

Lines changed: 1674 additions & 0 deletions
Large diffs are not rendered by default.

‎amazon/amazon-six-months/problems.csv

Lines changed: 800 additions & 0 deletions
Large diffs are not rendered by default.

‎amazon/amazon-three-months/problems.csv

Lines changed: 478 additions & 0 deletions
Large diffs are not rendered by default.

‎src/main/java/Scraper.java

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io.github.bonigarcia.wdm.WebDriverManager;
2+
import lombok.SneakyThrows;
23
import lombok.extern.slf4j.Slf4j;
34
import model.ProblemStatement;
45
import org.jsoup.Jsoup;
@@ -8,10 +9,16 @@
89
import org.openqa.selenium.*;
910
import org.openqa.selenium.edge.EdgeDriver;
1011

12+
import java.io.FileWriter;
1113
import java.io.IOException;
14+
import java.nio.file.Files;
15+
import java.nio.file.Path;
16+
import java.nio.file.Paths;
1217
import java.time.Duration;
1318
import java.util.ArrayList;
1419
import java.util.List;
20+
import java.util.regex.Matcher;
21+
import java.util.regex.Pattern;
1522

1623
@Slf4j
1724
public class Scraper {
@@ -42,13 +49,15 @@ public void setup() throws InterruptedException, IOException {
4249
// companyURLs.add(link);
4350
// }
4451
// for (String companyURL : companyURLs) {
45-
visitCompanies("https://leetcode.com/company/amazon/?favoriteSlug=amazon-all", driver);
52+
for (String recency : new String[]{"amazon-30-days", "amazon-three-months", "amazon-six-months", "amazon-more-than-six-months", "amazon-all" }) {
53+
visitCompanies(String.format("https://leetcode.com/company/amazon/?favoriteSlug=%s", recency), driver, recency);
54+
}
4655
// }
4756
}
4857

49-
private void visitCompanies(String companyURL, WebDriver driver) throws InterruptedException {
50-
String companyName = companyURL.substring(companyURL.lastIndexOf("/") + 1);
51-
log.info("Visiting {}", companyName);
58+
private void visitCompanies(String companyURL, WebDriver driver, Stringrecency) throws InterruptedException {
59+
String companyName = extractCompanyNameWithRegex(companyURL);
60+
log.info("Visiting {} with recency {}", companyName, recency);
5261
driver.get(companyURL);
5362
Thread.sleep(QUESTIONS_PAGE_WAIT_MILLIS);
5463
loadAllProblems(driver);
@@ -60,6 +69,18 @@ private void visitCompanies(String companyURL, WebDriver driver) throws Interrup
6069
for (ProblemStatement problem : problems) {
6170
log.info("{}", problem);
6271
}
72+
exportToCSV(problems, companyName, recency);
73+
}
74+
75+
private String extractCompanyNameWithRegex(String companyURL) {
76+
Pattern pattern = Pattern.compile("/company/([^/?]+)");
77+
Matcher matcher = pattern.matcher(companyURL);
78+
79+
if (matcher.find()) {
80+
return matcher.group(1);
81+
}
82+
83+
throw new IllegalArgumentException("Invalid company URL format: " + companyURL);
6384
}
6485

6586
public static void loadAllProblems(WebDriver driver) {
@@ -104,15 +125,12 @@ public static void loadAllProblems(WebDriver driver) {
104125

105126
private static boolean performScroll(WebDriver driver, JavascriptExecutor js) {
106127
try {
107-
// Find the specific element to scroll inside
108128
WebElement scrollElement = driver.findElement(By.xpath("/html/body/div[1]/div[1]/div[4]/div/div[2]"));
109129

110-
// Scroll down by 5000 pixels inside the element
111130
js.executeScript("arguments[0].scrollTop = arguments[0].scrollHeight;", scrollElement);
112131
Thread.sleep(3000);
113132

114133
return true;
115-
116134
} catch (Exception e) {
117135
log.error("Error during scrolling: ", e);
118136
return false;
@@ -203,4 +221,49 @@ public static List<ProblemStatement> extractProblems(Document doc) {
203221

204222
return problems;
205223
}
224+
225+
@SneakyThrows
226+
private void exportToCSV(List<ProblemStatement> problems, String companyName, String recency) {
227+
// Create directory structure: companyName/recency/
228+
Path outputDir = Paths.get(companyName, recency);
229+
if (!Files.exists(outputDir)) {
230+
Files.createDirectories(outputDir);
231+
}
232+
233+
Path filePath = outputDir.resolve("problems.csv");
234+
235+
try (FileWriter writer = new FileWriter(filePath.toFile())) {
236+
writer.append("ID,URL,Title,Difficulty,Acceptance %,Frequency %\n");
237+
238+
for (ProblemStatement problem : problems) {
239+
writer.append(escapeCsvValue(problem.id()))
240+
.append(',')
241+
.append(escapeCsvValue("https://leetcode.com" + problem.url()))
242+
.append(',')
243+
.append(escapeCsvValue(problem.title()))
244+
.append(',')
245+
.append(escapeCsvValue(problem.difficulty()))
246+
.append(',')
247+
.append(escapeCsvValue(problem.acceptancePercentage()))
248+
.append(',')
249+
.append(escapeCsvValue(problem.frequencyPercentage()))
250+
.append('\n');
251+
}
252+
}
253+
254+
log.info("CSV file created: {}", filePath.toAbsolutePath());
255+
}
256+
257+
private String escapeCsvValue(String value) {
258+
if (value == null) {
259+
return "";
260+
}
261+
262+
// If value contains comma, double quote, or newline, wrap in quotes and escape internal quotes
263+
if (value.contains(",") || value.contains("\"") || value.contains("\n") || value.contains("\r")) {
264+
return "\"" + value.replace("\"", "\"\"") + "\"";
265+
}
266+
267+
return value;
268+
}
206269
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /