Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 58bca84

Browse files
set up and test run transform for contest ranking pipeline
1 parent 778823a commit 58bca84

File tree

2 files changed

+133
-3
lines changed

2 files changed

+133
-3
lines changed

‎operators/contest_ranking_ops.py‎

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import ast
12
import pandas as pd
23
import requests
34

@@ -12,6 +13,34 @@ def extract_contest_ranking(num_pages):
1213
# Get response for each page
1314
response = requests.post(URL, json=contest_ranking_query(i + 1)).json()["data"]["globalRanking"]["rankingNodes"]
1415
responses.extend(response)
15-
file_path = f"{OUTPUT_PATH}/raw/sample_contest_ranking.csv" # Local file path for sample output data
16-
pd.DataFrame(responses).to_csv(file_path, index=False)
17-
return file_path
16+
output_path = f"{OUTPUT_PATH}/raw/sample_contest_ranking.csv" # Local file path for sample output data
17+
pd.DataFrame(responses).to_csv(output_path, index=False)
18+
return output_path
19+
20+
21+
def transform_contest_ranking(task_instance):
22+
"""Processes the raw data and makes necessary changes"""
23+
input_path = task_instance.xcom_pull(task_ids="extract_contest_ranking")
24+
df = pd.read_csv(input_path) # Read the raw data
25+
26+
# Process the ranking column
27+
df["ranking"] = df["ranking"].apply(eval) # Convert from string of list to list
28+
df["contest_count"] = df["ranking"].apply(len) # Number of contest attended
29+
df["avg_ranking"] = df["ranking"].apply(lambda x: round(sum(x) / len(x), 2)) # Avg placement across all contests
30+
31+
# Process the user column
32+
df["user"] = df["user"].apply(ast.literal_eval) # String of dictionary to dictionary
33+
df["username"] = df["user"].apply(lambda x: x["username"])
34+
df["country"] = df["user"].apply(lambda x: x["profile"]["countryName"])
35+
36+
df.loc[df["dataRegion"] == "CN", "country"] = "China" # Fill country for CN users
37+
df["country"] = df["country"].fillna("Unknown")
38+
df = df.drop(columns=["ranking", "user", "dataRegion"]) # Unnecessary columns
39+
df = df.rename(columns={ # Rename columns for readability
40+
"currentRating": "rating",
41+
"currentGlobalRanking": "global_ranking",
42+
})
43+
44+
output_path = f"{OUTPUT_PATH}/processed/sample_contest_ranking.csv"
45+
df.to_csv(output_path, index=False)
46+
return output_path
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
rating,global_ranking,contest_count,avg_ranking,username,country
2+
3702.788,1,26,78.73,fjzzq2002,Unknown
3+
3686.191,2,51,11.67,neal_wu,United States
4+
3644.841,3,84,114.94,Yawn_Sean,Unknown
5+
3611.476,4,107,675.59,ahmed007boss,China
6+
3599.473,5,146,32.77,liming-v,China
7+
3589.337,6,100,80.8,numb3r5,Australia
8+
3559.136681812592,7,81,26.32,getnaukri,China
9+
3539.254,8,131,80.24,Allink,China
10+
3513.227,9,96,42.96,jonathanirvings,Singapore
11+
3506.396,10,88,188.4,PurpleCrayon,Unknown
12+
3499.5468133734294,11,61,143.89,hankray,China
13+
3491.947685601129,12,287,28.04,zhoupeiyun,China
14+
3490.399929174775,13,435,28.76,uwi,Japan
15+
3483.996,14,58,371.31,dnialh,Unknown
16+
3471.487289120698,15,254,119.02,Carefreejs,China
17+
3465.9395323734766,16,30,94.23,HazemAllaham,China
18+
3453.15,17,65,100.89,fmota,Brazil
19+
3443.203,18,40,44.05,pandaforever,Unknown
20+
3425.997,19,52,68.1,xiaowuc1,United States
21+
3420.064807037784,20,241,54.24,cbesalke,China
22+
3416.8420075237727,21,285,39.85,LayCurse,Japan
23+
3406.361,22,39,13.08,Nahin_Imtiaz,China
24+
3401.816,23,82,279.73,qeetcode,United States
25+
3388.898,24,73,300.1,Shandse40,China
26+
3365.111,25,24,60.33,wwwwodddd,Hong Kong
27+
3357.485,26,121,164.1,TheWander,China
28+
3353.933,27,60,162.93,_kevinyang,Canada
29+
3344.044,28,183,70.73,emthrm,Japan
30+
3343.379,29,250,145.03,MisterSoandSo,China
31+
3328.122844128604,30,128,89.97,nek23,China
32+
3324.107,31,30,243.57,dong_liu,Unknown
33+
3323.774090494792,32,178,240.57,Kude,Japan
34+
3322.758173751024,33,233,110.02,hank55663,Taiwan
35+
3322.255,34,101,148.41,penguinhacker,Unknown
36+
3321.355,35,201,331.43,zeusmx,China
37+
3320.449,36,28,60.75,thenymphsofdelphi,Vietnam
38+
3313.144,37,108,1196.36,usephysics,United States
39+
3312.286,38,31,28.0,tmwilliamlin168,Taiwan
40+
3299.177,39,63,153.84,physics0523,Japan
41+
3299.08342635828,40,95,208.75,user8991p,China
42+
3288.252878338263,41,321,327.45,DylanSmith,United States
43+
3274.685,42,27,75.52,satyajeetramnit2708,China
44+
3271.938,43,100,138.13,bucker,China
45+
3266.122379466448,44,32,89.59,mohfadhil,China
46+
3265.907,45,249,38.24,natsugiri,Japan
47+
3259.713798853988,46,42,47.1,waakaaka,United States
48+
3253.248,47,56,103.54,manohar.morthala,China
49+
3246.403,48,62,531.66,tabr,Japan
50+
3242.2504358995225,49,180,216.44,yeongmoj94,China
51+
3237.564,50,90,45.64,hitonanode,Japan
52+
3225.4561630692333,51,33,98.82,user9068ZB,China
53+
3224.907,52,29,110.38,bucketpotato,United States
54+
3222.663,53,139,282.5,Aging,China
55+
3217.72,54,78,106.04,rfpermen,Indonesia
56+
3212.0372236719954,55,126,99.12,AntonRaichuk,Ukraine
57+
3208.421651097493,56,115,511.95,ethanrao,Canada
58+
3205.7186512673547,57,69,228.14,xborui,China
59+
3204.438388180618,58,66,83.42,xy12138,China
60+
3200.465,59,46,73.67,kirika-comp,Japan
61+
3199.485,60,19,52.26,0suoicsnocbus,United States
62+
3198.6053360039214,61,203,59.3,lympanda,
63+
3194.901,62,32,122.59,karate,China
64+
3193.409,63,97,51.64,dreamoon,Taiwan
65+
3192.7024219753944,64,65,202.6,queerAsfolk,China
66+
3190.817,65,25,168.36,plasmatic,Canada
67+
3179.936,66,32,54.41,bilyhurington,United States
68+
3175.1857548074604,67,127,111.23,sheepforever,Unknown
69+
3172.89,68,31,82.55,skpeng,China
70+
3167.005,69,18,121.44,tarek,China
71+
3163.808,70,83,73.92,Poojash18,China
72+
3162.558726573141,71,55,93.24,arignote,Unknown
73+
3161.791128608204,72,76,364.78,green_pig,Unknown
74+
3160.527,73,123,431.28,MichaelPengDeveloper,China
75+
3158.803,74,79,158.65,flashmt,Vietnam
76+
3158.2717621388656,75,186,90.09,hunt67241,China
77+
3152.792,76,45,146.6,gdkat,China
78+
3147.695,77,127,129.2,klion26,China
79+
3141.094,78,9,24.44,pathetic-dog,India
80+
3139.6825661078765,79,247,120.54,Ma_Lin,Unknown
81+
3132.771,80,101,198.26,lachy_136,China
82+
3130.3701251085067,81,157,83.43,cmagnemi,China
83+
3129.972,82,23,132.65,mayur0055,China
84+
3127.495,83,52,353.65,vince1114,China
85+
3126.75,84,27,76.22,smax,Unknown
86+
3125.032938054419,85,28,147.71,kaikaikaikaikai,Kazakhstan
87+
3124.026,86,148,298.89,369488685,China
88+
3120.757576705368,87,343,49.34,doooo,China
89+
3115.105815712118,88,152,285.92,stany,China
90+
3114.46,89,16,81.88,shyamjha12,China
91+
3113.218,90,8,17.38,moransky,Hong Kong
92+
3112.070798859508,91,78,184.59,sqqqqqq,China
93+
3109.803,92,64,233.69,friedall,Hong Kong
94+
3107.668,93,31,65.26,theone7,China
95+
3105.171408426144,94,339,171.35,tiankonguse001,China
96+
3104.0517255618247,95,175,1528.51,danielchandg,United States
97+
3103.591,96,139,1072.99,R3mix,United States
98+
3100.278439309714,97,194,92.49,skywalkert,China
99+
3093.692,98,231,134.0,cai_lw,United States
100+
3093.6476224904977,99,329,153.83,kmjp,Unknown
101+
3091.155,100,30,90.57,youtube_aryanc403,India

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /