Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit d8410c9

Browse files
committed
Minimize network access
This implements caching mechanism with 15-minute expiry for upstream commits. It add rate limit detection and exponential back-off for GitHub API calls, prioritizing git ls-remote over web scraping to reduce network overhead. Change-Id: I40e3bab73a0351ebcbeecd56d41ca570b54d415b
1 parent b979dab commit d8410c9

File tree

1 file changed

+187
-47
lines changed

1 file changed

+187
-47
lines changed

‎scripts/check-repo.sh‎

Lines changed: 187 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
#!/usr/bin/env bash
22

3+
# Parse command line arguments
4+
FORCE_REFRESH=false
5+
QUIET_MODE=false
6+
while [[ $# -gt 0 ]]; do
7+
case "1ドル" in
8+
--force-refresh|-f)
9+
FORCE_REFRESH=true
10+
shift
11+
;;
12+
--quiet|-q)
13+
QUIET_MODE=true
14+
shift
15+
;;
16+
--help|-h)
17+
echo "Usage: 0ドル [--force-refresh|-f] [--quiet|-q] [--help|-h]"
18+
echo " --force-refresh, -f Force refresh of cached data"
19+
echo " --quiet, -q Suppress progress and informational output"
20+
echo " --help, -h Show this help message"
21+
exit 0
22+
;;
23+
*)
24+
echo "Unknown option: 1ドル"
25+
echo "Use --help for usage information"
26+
exit 1
27+
;;
28+
esac
29+
done
30+
331
# Ensure that the common script exists and is readable, then verify it has no
432
# syntax errors and defines the required function.
533
common_script="$(dirname "0ドル")/common.sh"
@@ -12,6 +40,22 @@ set_colors
1240

1341
check_github_actions
1442

43+
# Override progress function if in quiet mode
44+
if [ "$QUIET_MODE" = true ]; then
45+
progress() {
46+
# Do nothing in quiet mode
47+
:
48+
}
49+
fi
50+
51+
# Cache configuration
52+
CACHE_DIR="$HOME/.cache/lab0-c"
53+
CACHE_FILE="$CACHE_DIR/upstream_commit"
54+
CACHE_EXPIRY=900 # Cache for 15 minutes (in seconds)
55+
56+
# Create cache directory if it doesn't exist
57+
mkdir -p "$CACHE_DIR"
58+
1559
TOTAL_STEPS=6
1660
CURRENT_STEP=0
1761

@@ -46,14 +90,65 @@ fi
4690
((CURRENT_STEP++))
4791
progress "$CURRENT_STEP" "$TOTAL_STEPS"
4892

49-
# Generate a random integer in [0..999].
50-
random_ms=$((RANDOM % 1000))
93+
# Check if cache exists and is still valid
94+
use_cache=false
95+
if [ "$FORCE_REFRESH" = true ]; then
96+
if [ "$QUIET_MODE" = false ]; then
97+
printf "\r%80s\r" " "
98+
echo "Force refresh requested. Clearing cache..."
99+
fi
100+
rm -f "$CACHE_FILE" "$RATE_LIMIT_FILE"
101+
elif [ -f "$CACHE_FILE" ]; then
102+
cache_age=$(($(date +%s) - $(stat -f %m "$CACHE_FILE" 2>/dev/null || stat -c %Y "$CACHE_FILE" 2>/dev/null || echo 0)))
103+
if [ "$cache_age" -lt "$CACHE_EXPIRY" ]; then
104+
upstream_hash=$(cat "$CACHE_FILE")
105+
if [ -n "$upstream_hash" ]; then
106+
use_cache=true
107+
if [ "$QUIET_MODE" = false ]; then
108+
printf "\r%80s\r" " "
109+
echo "Using cached upstream commit (${cache_age}s old, expires in $((CACHE_EXPIRY - cache_age))s)"
110+
fi
111+
fi
112+
else
113+
if [ "$QUIET_MODE" = false ]; then
114+
printf "\r%80s\r" " "
115+
echo "Cache expired (${cache_age}s old). Refreshing..."
116+
fi
117+
fi
118+
fi
119+
120+
# Only sleep and fetch if not using cache
121+
if [ "$use_cache" = false ]; then
122+
# Generate a random integer in [0..999].
123+
random_ms=$((RANDOM % 1000))
124+
125+
# Add exponential backoff if we've been rate limited recently
126+
RATE_LIMIT_FILE="$CACHE_DIR/rate_limited"
127+
if [ -f "$RATE_LIMIT_FILE" ]; then
128+
last_limited=$(($(date +%s) - $(stat -f %m "$RATE_LIMIT_FILE" 2>/dev/null || stat -c %Y "$RATE_LIMIT_FILE" 2>/dev/null || echo 0)))
129+
if [ "$last_limited" -lt 300 ]; then # If rate limited in last 5 minutes
130+
random_ms=$((random_ms + 2000)) # Add 2 seconds
131+
if [ "$QUIET_MODE" = false ]; then
132+
printf "\r%80s\r" " "
133+
echo "Rate limit detected. Adding delay..."
134+
fi
135+
fi
136+
fi
137+
138+
# Convert that to a decimal of the form 0.xxx so that 'sleep' interprets it as seconds.
139+
# e.g., if random_ms is 5, we convert that to 0.005 (i.e. 5 ms).
140+
# Use printf for portability (bc might not be installed)
141+
sleep_time="0.$(printf "%03d" "$((random_ms % 1000))")"
51142

52-
# Convert that to a decimal of the form 0.xxx so that 'sleep' interprets it as seconds.
53-
# e.g., if random_ms is 5, we convert that to 0.005 (i.e. 5 ms).
54-
sleep_time="0.$(printf "%03d" "$random_ms")"
143+
# For delays > 1 second, handle separately
144+
if [ "$random_ms" -ge 1000 ]; then
145+
sleep_seconds=$((random_ms / 1000))
146+
sleep_ms=$((random_ms % 1000))
147+
sleep_time="${sleep_seconds}.$(printf "%03d" "$sleep_ms")"
148+
fi
55149

56-
sleep "$sleep_time"
150+
sleep "$sleep_time"
151+
fi
57152

58153
# 2. Fetch latest commit from GitHub
59154
((CURRENT_STEP++))
@@ -62,53 +157,95 @@ progress "$CURRENT_STEP" "$TOTAL_STEPS"
62157
REPO_OWNER=$(git config -l | grep -w remote.origin.url | sed -E 's%^.*github.com[/:]([^/]+)/lab0-c.*%1円%')
63158
REPO_NAME="lab0-c"
64159

65-
repo_html=$(curl -s "https://github.com/${REPO_OWNER}/${REPO_NAME}")
66-
67-
# Extract the default branch name from data-default-branch="..."
68-
DEFAULT_BRANCH=$(echo "$repo_html" | sed -nE "s#.*${REPO_OWNER}/${REPO_NAME}/blob/([^/]+)/LICENSE.*#1円#p" | head -n 1)
160+
# Only fetch from network if not using cache
161+
if [ "$use_cache" = false ]; then
162+
# First try using git ls-remote (much faster and less likely to be rate limited)
163+
if [ "$QUIET_MODE" = false ]; then
164+
printf "\r%80s\r" " "
165+
echo "Checking upstream repository..."
166+
fi
167+
upstream_hash=$(git ls-remote --heads origin master 2>/dev/null | cut -f1)
69168

70-
if [ "$DEFAULT_BRANCH" != "master" ]; then
71-
echo "$DEFAULT_BRANCH"
72-
throw "The default branch for $REPO_OWNER/$REPO_NAME is not 'master'."
73-
fi
169+
# If git ls-remote fails or returns empty, fall back to web scraping
170+
if [ -z "$upstream_hash" ]; then
171+
if [ "$QUIET_MODE" = false ]; then
172+
printf "\r%80s\r" " "
173+
echo "git ls-remote failed. Falling back to web scraping..."
174+
fi
74175

75-
# Construct the URL to the commits page for the default branch
76-
COMMITS_URL="https://github.com/${REPO_OWNER}/${REPO_NAME}/commits/${DEFAULT_BRANCH}"
77-
78-
temp_file=$(mktemp)
79-
curl -sSL -o "$temp_file" "$COMMITS_URL"
80-
81-
# general grep pattern that finds commit links
82-
upstream_hash=$(
83-
sed -nE 's/.*href="[^"]*\/commit\/([0-9a-f]{40}).*/1円/p' "$temp_file" | head -n 1
84-
)
85-
86-
rm -f "$temp_file"
87-
88-
# If HTML parsing fails, fallback to using GitHub REST API
89-
if [ -z "$upstream_hash" ]; then
90-
API_URL="https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/commits"
91-
92-
# Try to use cached GitHub credentials from GitHub CLI
93-
# https://docs.github.com/en/get-started/git-basics/caching-your-github-credentials-in-git
94-
if command -v gh >/dev/null 2>&1; then
95-
TOKEN=$(gh auth token 2>/dev/null)
96-
if [ -n "$TOKEN" ]; then
97-
response=$(curl -sSL -H "Authorization: token $TOKEN" "$API_URL")
176+
# Add User-Agent header to avoid being blocked
177+
USER_AGENT="Mozilla/5.0 (compatible; lab0-c-checker/1.0)"
178+
179+
# Try with rate limit detection
180+
repo_html=$(curl -s -w "\n%{http_code}" -H "User-Agent: $USER_AGENT" "https://github.com/${REPO_OWNER}/${REPO_NAME}")
181+
http_code=$(echo "$repo_html" | tail -n 1)
182+
repo_html=$(echo "$repo_html" | sed '$d')
183+
184+
# Check for rate limiting (HTTP 429 or 403)
185+
if [ "$http_code" = "429" ] || [ "$http_code" = "403" ]; then
186+
touch "$RATE_LIMIT_FILE"
187+
if [ "$QUIET_MODE" = false ]; then
188+
printf "\r%80s\r" " "
189+
echo "GitHub rate limit detected (HTTP $http_code). Using fallback..."
190+
fi
191+
192+
# Try to use last known good commit from git log
193+
upstream_hash=$(git ls-remote origin master 2>/dev/null | cut -f1)
194+
if [ -z "$upstream_hash" ]; then
195+
throw "Rate limited by GitHub and no fallback available. Please try again later."
196+
fi
197+
else
198+
# Extract the default branch name from data-default-branch="..."
199+
DEFAULT_BRANCH=$(echo "$repo_html" | sed -nE "s#.*${REPO_OWNER}/${REPO_NAME}/blob/([^/]+)/LICENSE.*#1円#p" | head -n 1)
200+
201+
if [ "$DEFAULT_BRANCH" != "master" ]; then
202+
echo "$DEFAULT_BRANCH"
203+
throw "The default branch for $REPO_OWNER/$REPO_NAME is not 'master'."
204+
fi
205+
206+
# Construct the URL to the commits page for the default branch
207+
COMMITS_URL="https://github.com/${REPO_OWNER}/${REPO_NAME}/commits/${DEFAULT_BRANCH}"
208+
209+
temp_file=$(mktemp)
210+
curl -sSL -H "User-Agent: $USER_AGENT" -o "$temp_file" "$COMMITS_URL"
211+
212+
# general grep pattern that finds commit links
213+
upstream_hash=$(
214+
sed -nE 's/.*href="[^"]*\/commit\/([0-9a-f]{40}).*/1円/p' "$temp_file" | head -n 1
215+
)
216+
217+
rm -f "$temp_file"
218+
219+
# If HTML parsing fails, fallback to using GitHub REST API
220+
if [ -z "$upstream_hash" ]; then
221+
API_URL="https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/commits"
222+
223+
# Try to use cached GitHub credentials from GitHub CLI
224+
# https://docs.github.com/en/get-started/git-basics/caching-your-github-credentials-in-git
225+
if command -v gh >/dev/null 2>&1; then
226+
TOKEN=$(gh auth token 2>/dev/null)
227+
if [ -n "$TOKEN" ]; then
228+
response=$(curl -sSL -H "Authorization: token $TOKEN" -H "User-Agent: $USER_AGENT" "$API_URL")
229+
fi
230+
fi
231+
232+
# If response is empty (i.e. token not available or failed), use unauthenticated request.
233+
if [ -z "$response" ]; then
234+
response=$(curl -sSL -H "User-Agent: $USER_AGENT" "$API_URL")
235+
fi
236+
237+
# Extract the latest commit SHA from the JSON response
238+
upstream_hash=$(echo "$response" | grep -m 1 '"sha":' | sed -E 's/.*"sha": "([^"]+)".*/1円/')
239+
fi
98240
fi
99241
fi
100242

101-
# If response is empty (i.e. token not available or failed), use unauthenticated request.
102-
if [ -z "$response" ]; then
103-
response=$(curl -sSL "$API_URL")
243+
if [ -z "$upstream_hash" ]; then
244+
throw "Failed to retrieve upstream commit hash from GitHub.\n"
104245
fi
105246

106-
# Extract the latest commit SHA from the JSON response
107-
upstream_hash=$(echo "$response" | grep -m 1 '"sha":' | sed -E 's/.*"sha": "([^"]+)".*/1円/')
108-
fi
109-
110-
if [ -z "$upstream_hash" ]; then
111-
throw "Failed to retrieve upstream commit hash from GitHub.\n"
247+
# Cache the result
248+
echo "$upstream_hash" > "$CACHE_FILE"
112249
fi
113250

114251
# 3. Check local repository awareness
@@ -167,6 +304,9 @@ if [ $failed -ne 0 ]; then
167304
exit 1
168305
fi
169306

170-
echo "Fingerprint: $(make_random_string 24 "$REPO_OWNER")"
307+
if [ "$QUIET_MODE" = false ]; then
308+
printf "\r%80s\r" " "
309+
echo "Fingerprint: $(make_random_string 24 "$REPO_OWNER")"
310+
fi
171311

172312
exit 0

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /