Commit 12eca83

committed

add error handling and retries

1 parent 79b813e commit 12eca83Copy full SHA for 12eca83

File tree

3 files changed

+57

-18

lines changed

operators
- contest_problems_ops.py
- contest_ranking_ops.py
utils
- constants.py

3 files changed

+57

-18

lines changed

`‎operators/contest_problems_ops.py‎`

Lines changed: 38 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,19 +1,32 @@`
	`1`	`+import logging`
`1`	`2`	`import pandas as pd`
`2`	`3`	`import requests`
	`4`	`+from airflow.exceptions import AirflowFailException`
`3`	`5`	`from collections import defaultdict`
`4`	`6`
`5`	`7`	`from utils.queries import contest_problems_query, problem_tags_query`
`6`		`-from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME`
	`8`	`+from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME, MAX_ATTEMPTS`
`7`	`9`
`8`	`10`
`9`	`11`	`def etl_contest_problems(num_pages):`
`10`	`12`	`"""Extracts, transforms, and loads contests' problems in all pages"""`
`11`	`13`	`responses = []`
`12`	`14`	`for i in range(num_pages):`
`13`		`- # Get response for each page`
`14`		`- response = requests.post(URL, json=contest_problems_query(i + 1)).json()["data"]["pastContests"]["data"]`
`15`		`- for contest in response:`
`16`		`- responses.extend(parse_contest_problems(contest)) # Transform response data to optimized format`
	`15`	`+ attempt = 0`
	`16`	`+ while attempt < MAX_ATTEMPTS:`
	`17`	`+ try:`
	`18`	`+ # Get response for each page`
	`19`	`+ response = requests.post(URL, json=contest_problems_query(i + 1)).json()["data"]["pastContests"]["data"]`
	`20`	`+ for contest in response:`
	`21`	`+ responses.extend(parse_contest_problems(contest)) # Transform response data to optimized format`
	`22`	`+ break # Successful operation`
	`23`	`+ except Exception as e:`
	`24`	`+ logger = logging.getLogger("airflow.task")`
	`25`	`+ logger.error(e)`
	`26`	`+ logger.error(f"Attempt {attempt + 1} failed at page: {i + 1}")`
	`27`	`+ attempt += 1`
	`28`	`+ else:`
	`29`	`+ raise AirflowFailException`
`17`	`30`	`# output_path = f"{OUTPUT_PATH}/raw/contest_problems.csv"`
`18`	`31`	`output_path = f"s3://{BUCKET_NAME}/raw/contest_problems.csv"`
`19`	`32`	`pd.DataFrame(responses).to_csv(output_path, index=False) # Load the data to the destination storage`
`@@ -38,15 +51,26 @@ def etl_problem_tags(task_instance):`
`38`	`51`	`counter = defaultdict(int) # Count the number of each topic tag showing up`
`39`	`52`	`responses = []`
`40`	`53`	`for problem in df["problem"]:`
`41`		`- # Get data for each problem`
`42`		`- response = requests.post(URL, json=problem_tags_query(problem)).json()["data"]["question"]`
`43`		`- tags = parse_problem_tags(response) # Transform data to get the list of tags`
`44`		`- responses.append({`
`45`		`- "problem": problem,`
`46`		`- "tags": tags`
`47`		`- })`
`48`		`- for tag in tags:`
`49`		`- counter[tag] += 1`
	`54`	`+ attempt = 0`
	`55`	`+ while attempt < MAX_ATTEMPTS:`
	`56`	`+ try:`
	`57`	`+ # Get data for each problem`
	`58`	`+ response = requests.post(URL, json=problem_tags_query(problem)).json()["data"]["question"]`
	`59`	`+ tags = parse_problem_tags(response) # Transform data to get the list of tags`
	`60`	`+ responses.append({`
	`61`	`+ "problem": problem,`
	`62`	`+ "tags": tags`
	`63`	`+ })`
	`64`	`+ for tag in tags:`
	`65`	`+ counter[tag] += 1`
	`66`	`+ break # Successful operation`
	`67`	`+ except Exception as e:`
	`68`	`+ logger = logging.getLogger("airflow.task")`
	`69`	`+ logger.error(e)`
	`70`	`+ logger.error(f"Attempt {attempt + 1} failed at problem: {problem}")`
	`71`	`+ attempt += 1`
	`72`	`+ else:`
	`73`	`+ raise AirflowFailException`
`50`	`74`
`51`	`75`	`# Load tags data to the destination storage`
`52`	`76`	`# output_path = f"{OUTPUT_PATH}/raw/problem_tags.csv"`

`‎operators/contest_ranking_ops.py‎`

Lines changed: 17 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,18 +1,31 @@`
`1`	`1`	`import ast`
	`2`	`+import logging`
`2`	`3`	`import pandas as pd`
`3`	`4`	`import requests`
	`5`	`+from airflow.exceptions import AirflowFailException`
`4`	`6`
`5`	`7`	`from utils.queries import contest_ranking_query`
`6`		`-from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME`
	`8`	`+from utils.constants import URL, OUTPUT_PATH, BUCKET_NAME, MAX_ATTEMPTS`
`7`	`9`
`8`	`10`
`9`	`11`	`def extract_contest_ranking(num_pages):`
`10`	`12`	`"""Extracts raw data in all pages"""`
`11`	`13`	`responses = []`
`12`	`14`	`for i in range(num_pages):`
`13`		`- # Get response for each page`
`14`		`- response = requests.post(URL, json=contest_ranking_query(i + 1)).json()["data"]["globalRanking"]["rankingNodes"]`
`15`		`- responses.extend(response)`
	`15`	`+ attempt = 0`
	`16`	`+ while attempt < MAX_ATTEMPTS:`
	`17`	`+ try:`
	`18`	`+ # Get response for each page`
	`19`	`+ response = requests.post(URL, json=contest_ranking_query(i + 1)).json()["data"]["globalRanking"]["rankingNodes"]`
	`20`	`+ responses.extend(response)`
	`21`	`+ break`
	`22`	`+ except Exception as e:`
	`23`	`+ logger = logging.getLogger("airflow.task")`
	`24`	`+ logger.error(e)`
	`25`	`+ logger.error(f"Attempt {attempt + 1} failed at page: {i + 1}")`
	`26`	`+ attempt += 1`
	`27`	`+ else:`
	`28`	`+ raise AirflowFailException`
`16`	`29`	`# output_path = f"{OUTPUT_PATH}/raw/sample_contest_ranking.csv" # Local file path for sample output data`
`17`	`30`	`output_path = f"s3://{BUCKET_NAME}/raw/contest_ranking.csv" # Amazon S3 storage path`
`18`	`31`	`pd.DataFrame(responses).to_csv(output_path, index=False)`

`‎utils/constants.py‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -3,3 +3,5 @@`
`3`	`3`	`OUTPUT_PATH = "~/leetcode-contest-analytics/sample_data"`
`4`	`4`
`5`	`5`	`BUCKET_NAME = "leetcode-contest-analytics"`
	`6`	`+`
	`7`	`+MAX_ATTEMPTS = 3`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 12eca83

File tree

3 files changed

3 files changed

`‎operators/contest_problems_ops.py‎`

`‎operators/contest_ranking_ops.py‎`

`‎utils/constants.py‎`

0 commit comments