Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 12eca83

Browse files
add error handling and retries
1 parent 79b813e commit 12eca83

File tree

3 files changed

+57
-18
lines changed

3 files changed

+57
-18
lines changed

‎operators/contest_problems_ops.py‎

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,32 @@
1+
import logging
12
import pandas as pd
23
import requests
4+
from airflow.exceptions import AirflowFailException
35
from collections import defaultdict
46

57
from utils.queries import contest_problems_query, problem_tags_query
6-
from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME
8+
from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME, MAX_ATTEMPTS
79

810

911
def etl_contest_problems(num_pages):
1012
"""Extracts, transforms, and loads contests' problems in all pages"""
1113
responses = []
1214
for i in range(num_pages):
13-
# Get response for each page
14-
response = requests.post(URL, json=contest_problems_query(i + 1)).json()["data"]["pastContests"]["data"]
15-
for contest in response:
16-
responses.extend(parse_contest_problems(contest)) # Transform response data to optimized format
15+
attempt = 0
16+
while attempt < MAX_ATTEMPTS:
17+
try:
18+
# Get response for each page
19+
response = requests.post(URL, json=contest_problems_query(i + 1)).json()["data"]["pastContests"]["data"]
20+
for contest in response:
21+
responses.extend(parse_contest_problems(contest)) # Transform response data to optimized format
22+
break # Successful operation
23+
except Exception as e:
24+
logger = logging.getLogger("airflow.task")
25+
logger.error(e)
26+
logger.error(f"Attempt {attempt + 1} failed at page: {i + 1}")
27+
attempt += 1
28+
else:
29+
raise AirflowFailException
1730
# output_path = f"{OUTPUT_PATH}/raw/contest_problems.csv"
1831
output_path = f"s3://{BUCKET_NAME}/raw/contest_problems.csv"
1932
pd.DataFrame(responses).to_csv(output_path, index=False) # Load the data to the destination storage
@@ -38,15 +51,26 @@ def etl_problem_tags(task_instance):
3851
counter = defaultdict(int) # Count the number of each topic tag showing up
3952
responses = []
4053
for problem in df["problem"]:
41-
# Get data for each problem
42-
response = requests.post(URL, json=problem_tags_query(problem)).json()["data"]["question"]
43-
tags = parse_problem_tags(response) # Transform data to get the list of tags
44-
responses.append({
45-
"problem": problem,
46-
"tags": tags
47-
})
48-
for tag in tags:
49-
counter[tag] += 1
54+
attempt = 0
55+
while attempt < MAX_ATTEMPTS:
56+
try:
57+
# Get data for each problem
58+
response = requests.post(URL, json=problem_tags_query(problem)).json()["data"]["question"]
59+
tags = parse_problem_tags(response) # Transform data to get the list of tags
60+
responses.append({
61+
"problem": problem,
62+
"tags": tags
63+
})
64+
for tag in tags:
65+
counter[tag] += 1
66+
break # Successful operation
67+
except Exception as e:
68+
logger = logging.getLogger("airflow.task")
69+
logger.error(e)
70+
logger.error(f"Attempt {attempt + 1} failed at problem: {problem}")
71+
attempt += 1
72+
else:
73+
raise AirflowFailException
5074

5175
# Load tags data to the destination storage
5276
# output_path = f"{OUTPUT_PATH}/raw/problem_tags.csv"

‎operators/contest_ranking_ops.py‎

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,31 @@
11
import ast
2+
import logging
23
import pandas as pd
34
import requests
5+
from airflow.exceptions import AirflowFailException
46

57
from utils.queries import contest_ranking_query
6-
from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME
8+
from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME, MAX_ATTEMPTS
79

810

911
def extract_contest_ranking(num_pages):
1012
"""Extracts raw data in all pages"""
1113
responses = []
1214
for i in range(num_pages):
13-
# Get response for each page
14-
response = requests.post(URL, json=contest_ranking_query(i + 1)).json()["data"]["globalRanking"]["rankingNodes"]
15-
responses.extend(response)
15+
attempt = 0
16+
while attempt < MAX_ATTEMPTS:
17+
try:
18+
# Get response for each page
19+
response = requests.post(URL, json=contest_ranking_query(i + 1)).json()["data"]["globalRanking"]["rankingNodes"]
20+
responses.extend(response)
21+
break
22+
except Exception as e:
23+
logger = logging.getLogger("airflow.task")
24+
logger.error(e)
25+
logger.error(f"Attempt {attempt + 1} failed at page: {i + 1}")
26+
attempt += 1
27+
else:
28+
raise AirflowFailException
1629
# output_path = f"{OUTPUT_PATH}/raw/sample_contest_ranking.csv" # Local file path for sample output data
1730
output_path = f"s3://{BUCKET_NAME}/raw/contest_ranking.csv" # Amazon S3 storage path
1831
pd.DataFrame(responses).to_csv(output_path, index=False)

‎utils/constants.py‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
OUTPUT_PATH = "~/leetcode-contest-analytics/sample_data"
44

55
BUCKET_NAME = "leetcode-contest-analytics"
6+
7+
MAX_ATTEMPTS = 3

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /