Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit d228c47

Browse files
idk
1 parent 4561fc1 commit d228c47

File tree

2 files changed

+88
-69
lines changed

2 files changed

+88
-69
lines changed

‎src/gitingest/clone.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010
from gitingest.config import DEFAULT_TIMEOUT
1111
from gitingest.utils.git_utils import (
12-
_add_token_to_url,
1312
check_repo_exists,
1413
checkout_partial_clone,
1514
create_git_repo,
1615
ensure_git_installed,
16+
git_auth_context,
1717
is_github_host,
1818
resolve_commit,
1919
)
@@ -86,12 +86,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8686
commit = await resolve_commit(config, token=token)
8787
logger.debug("Resolved commit", extra={"commit": commit})
8888

89-
# Prepare URL with authentication if needed
90-
clone_url = url
91-
if token and is_github_host(url):
92-
clone_url = _add_token_to_url(url, token)
93-
94-
# Clone the repository using GitPython
89+
# Clone the repository using GitPython with proper authentication
9590
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
9691
try:
9792
clone_kwargs = {
@@ -100,17 +95,20 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
10095
"depth": 1,
10196
}
10297

103-
if partial_clone:
104-
# GitPython doesn't directly support --filter and --sparse in clone
105-
# We'll need to use git.Git() for the initial clone with these options
106-
git_cmd = git.Git()
107-
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
98+
with git_auth_context(url, token) as (git_cmd, auth_url):
10899
if partial_clone:
100+
# For partial clones, use git.Git() with filter and sparse options
101+
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
109102
cmd_args.extend(["--filter=blob:none", "--sparse"])
110-
cmd_args.extend([clone_url, local_path])
111-
git_cmd.clone(*cmd_args)
112-
else:
113-
git.Repo.clone_from(clone_url, local_path, **clone_kwargs)
103+
cmd_args.extend([auth_url, local_path])
104+
git_cmd.clone(*cmd_args)
105+
elif token and is_github_host(url):
106+
# For authenticated GitHub repos, use git_cmd with auth URL
107+
cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path]
108+
git_cmd.clone(*cmd_args)
109+
else:
110+
# For non-authenticated repos, use the standard GitPython method
111+
git.Repo.clone_from(url, local_path, **clone_kwargs)
114112

115113
logger.info("Git clone completed successfully")
116114
except git.GitCommandError as exc:

‎src/gitingest/utils/git_utils.py

Lines changed: 74 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import os
88
import re
99
import sys
10+
from contextlib import contextmanager
1011
from pathlib import Path
11-
from typing import TYPE_CHECKING, Final, Iterable
12+
from typing import TYPE_CHECKING, Final, Generator, Iterable
1213
from urllib.parse import urlparse, urlunparse
1314

1415
import git
@@ -217,13 +218,6 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
217218

218219
# Use GitPython to get remote references
219220
try:
220-
git_cmd = git.Git()
221-
222-
# Prepare authentication if needed
223-
if token and is_github_host(url):
224-
auth_url = _add_token_to_url(url, token)
225-
url = auth_url
226-
227221
fetch_tags = ref_type == "tags"
228222
to_fetch = "tags" if fetch_tags else "heads"
229223

@@ -233,8 +227,11 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
233227
cmd_args.append("--refs") # Filter out peeled tag objects
234228
cmd_args.append(url)
235229

236-
# Run the command using git_cmd.ls_remote() method
237-
output = git_cmd.ls_remote(*cmd_args)
230+
# Run the command with proper authentication
231+
with git_auth_context(url, token) as (git_cmd, auth_url):
232+
# Replace the URL in cmd_args with the authenticated URL
233+
cmd_args[-1] = auth_url # URL is the last argument
234+
output = git_cmd.ls_remote(*cmd_args)
238235

239236
# Parse output
240237
return [
@@ -318,6 +315,70 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str:
318315
return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}"
319316

320317

318+
def create_authenticated_url(url: str, token: str | None = None) -> str:
319+
"""Create an authenticated URL for Git operations.
320+
321+
This is the safest approach for multi-user environments - no global state.
322+
323+
Parameters
324+
----------
325+
url : str
326+
The repository URL.
327+
token : str | None
328+
GitHub personal access token (PAT) for accessing private repositories.
329+
330+
Returns
331+
-------
332+
str
333+
The URL with authentication embedded (for GitHub) or original URL.
334+
335+
"""
336+
if not (token and is_github_host(url)):
337+
return url
338+
339+
parsed = urlparse(url)
340+
# Add token as username in URL (GitHub supports this)
341+
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
342+
if parsed.port:
343+
netloc += f":{parsed.port}"
344+
345+
return urlunparse(
346+
(
347+
parsed.scheme,
348+
netloc,
349+
parsed.path,
350+
parsed.params,
351+
parsed.query,
352+
parsed.fragment,
353+
),
354+
)
355+
356+
357+
@contextmanager
358+
def git_auth_context(url: str, token: str | None = None) -> Generator[tuple[git.Git, str]]:
359+
"""Context manager that provides Git command and authenticated URL.
360+
361+
Returns both a Git command object and the authenticated URL to use.
362+
This avoids any global state contamination between users.
363+
364+
Parameters
365+
----------
366+
url : str
367+
The repository URL to check if authentication is needed.
368+
token : str | None
369+
GitHub personal access token (PAT) for accessing private repositories.
370+
371+
Yields
372+
------
373+
Generator[tuple[git.Git, str]]
374+
Tuple of (Git command object, authenticated URL to use).
375+
376+
"""
377+
git_cmd = git.Git()
378+
auth_url = create_authenticated_url(url, token)
379+
yield git_cmd, auth_url
380+
381+
321382
def validate_github_token(token: str) -> None:
322383
"""Validate the format of a GitHub Personal Access Token.
323384
@@ -419,15 +480,9 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None)
419480
420481
"""
421482
try:
422-
git_cmd = git.Git()
423-
424-
# Prepare authentication if needed
425-
auth_url = url
426-
if token and is_github_host(url):
427-
auth_url = _add_token_to_url(url, token)
428-
429-
# Execute ls-remote command
430-
output = git_cmd.ls_remote(auth_url, pattern)
483+
# Execute ls-remote command with proper authentication
484+
with git_auth_context(url, token) as (git_cmd, auth_url):
485+
output = git_cmd.ls_remote(auth_url, pattern)
431486
lines = output.splitlines()
432487

433488
sha = _pick_commit_sha(lines)
@@ -475,37 +530,3 @@ def _pick_commit_sha(lines: Iterable[str]) -> str | None:
475530
first_non_peeled = sha
476531

477532
return first_non_peeled # branch or lightweight tag (or None)
478-
479-
480-
def _add_token_to_url(url: str, token: str) -> str:
481-
"""Add authentication token to GitHub URL.
482-
483-
Parameters
484-
----------
485-
url : str
486-
The original GitHub URL.
487-
token : str
488-
The GitHub token to add.
489-
490-
Returns
491-
-------
492-
str
493-
The URL with embedded authentication.
494-
495-
"""
496-
parsed = urlparse(url)
497-
# Add token as username in URL (GitHub supports this)
498-
netloc = f"x-oauth-basic:{token}@{parsed.hostname}"
499-
if parsed.port:
500-
netloc += f":{parsed.port}"
501-
502-
return urlunparse(
503-
(
504-
parsed.scheme,
505-
netloc,
506-
parsed.path,
507-
parsed.params,
508-
parsed.query,
509-
parsed.fragment,
510-
),
511-
)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /