Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 932bfef

Browse files
ix-56hNicolasIRAGNE
andauthored
fix: remove logarithm conversion from the backend and correctly process max file size in kb (#464)
Co-authored-by: Nicolas Iragne <nicoragne@hotmail.fr>
1 parent 05b44d9 commit 932bfef

File tree

9 files changed

+58
-47
lines changed

9 files changed

+58
-47
lines changed

‎src/gitingest/schemas/ingestion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
4141
tag : str | None
4242
The tag of the repository.
4343
max_file_size : int
44-
The maximum file size to ingest (default: 10 MB).
44+
The maximum file size to ingest in bytes (default: 10 MB).
4545
ignore_patterns : set[str]
4646
The patterns to ignore (default: ``set()``).
4747
include_patterns : set[str] | None

‎src/server/models.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
from __future__ import annotations
44

55
from enum import Enum
6-
from typing import Union
6+
from typing import TYPE_CHECKING, Union
77

88
from pydantic import BaseModel, Field, field_validator
99

1010
from gitingest.utils.compat_func import removesuffix
11+
from server.server_config import MAX_FILE_SIZE_KB
1112

1213
# needed for type checking (pydantic)
13-
from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import)
14+
if TYPE_CHECKING:
15+
from server.form_types import IntForm, OptStrForm, StrForm
1416

1517

1618
class PatternType(str, Enum):
@@ -39,7 +41,7 @@ class IngestRequest(BaseModel):
3941
"""
4042

4143
input_text: str = Field(..., description="Git repository URL or slug to ingest")
42-
max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
44+
max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB")
4345
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
4446
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
4547
token: str | None = Field(default=None, description="GitHub PAT for private repositories")

‎src/server/query_processor.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
1414
from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
1515
from server.server_config import MAX_DISPLAY_SIZE
16-
from server.server_utils import Colors, log_slider_to_size
16+
from server.server_utils import Colors
1717

1818

1919
async def process_query(
2020
input_text: str,
21-
slider_position: int,
21+
max_file_size: int,
2222
pattern_type: PatternType,
2323
pattern: str,
2424
token: str | None = None,
@@ -32,8 +32,8 @@ async def process_query(
3232
----------
3333
input_text : str
3434
Input text provided by the user, typically a Git repository URL or slug.
35-
slider_position : int
36-
Position of the slider, representing the maximum file size in the query.
35+
max_file_size : int
36+
Max file size in KB to be include in the digest.
3737
pattern_type : PatternType
3838
Type of pattern to use (either "include" or "exclude")
3939
pattern : str
@@ -55,8 +55,6 @@ async def process_query(
5555
if token:
5656
validate_github_token(token)
5757

58-
max_file_size = log_slider_to_size(slider_position)
59-
6058
try:
6159
query = await parse_remote_repo(input_text, token=token)
6260
except Exception as exc:
@@ -65,7 +63,7 @@ async def process_query(
6563
return IngestErrorResponse(error=str(exc))
6664

6765
query.url = cast("str", query.url)
68-
query.max_file_size = max_file_size
66+
query.max_file_size = max_file_size*1024# Convert to bytes since we currently use KB in higher levels
6967
query.ignore_patterns, query.include_patterns = process_patterns(
7068
exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,
7169
include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
@@ -142,7 +140,7 @@ async def process_query(
142140
digest_url=digest_url,
143141
tree=tree,
144142
content=content,
145-
default_max_file_size=slider_position,
143+
default_max_file_size=max_file_size,
146144
pattern_type=pattern_type,
147145
pattern=pattern,
148146
)

‎src/server/routers/ingest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from server.models import IngestRequest
1212
from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion
1313
from server.s3_utils import is_s3_enabled
14-
from server.server_config import MAX_DISPLAY_SIZE
14+
from server.server_config import DEFAULT_FILE_SIZE_KB
1515
from server.server_utils import limiter
1616

1717
ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"])
@@ -58,7 +58,7 @@ async def api_ingest_get(
5858
request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
5959
user: str,
6060
repository: str,
61-
max_file_size: int = MAX_DISPLAY_SIZE,
61+
max_file_size: int = DEFAULT_FILE_SIZE_KB,
6262
pattern_type: str = "exclude",
6363
pattern: str = "",
6464
token: str = "",
@@ -74,7 +74,7 @@ async def api_ingest_get(
7474
- **repository** (`str`): GitHub repository name
7575
7676
**Query Parameters**
77-
- **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB)
77+
- **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)
7878
- **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
7979
- **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
8080
- **token** (`str`, optional): GitHub personal access token for private repositories (default: "")

‎src/server/routers_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async def _perform_ingestion(
3333

3434
result = await process_query(
3535
input_text=input_text,
36-
slider_position=max_file_size,
36+
max_file_size=max_file_size,
3737
pattern_type=pattern_type,
3838
pattern=pattern,
3939
token=token,

‎src/server/server_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)
1111

1212
# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
13-
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB
14-
MAX_SLIDER_POSITION: int = 500# Maximum slider position
13+
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
14+
MAX_FILE_SIZE_KB: int = 100*1024# 100 mb
1515

1616
EXAMPLE_REPOS: list[dict[str, str]] = [
1717
{"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"},

‎src/server/server_utils.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Utility functions for the server."""
22

33
import asyncio
4-
import math
54
import shutil
65
import time
76
from contextlib import asynccontextmanager, suppress
@@ -15,7 +14,7 @@
1514
from slowapi.util import get_remote_address
1615

1716
from gitingest.config import TMP_BASE_PATH
18-
from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION
17+
from server.server_config import DELETE_REPO_AFTER
1918

2019
# Initialize a rate limiter
2120
limiter = Limiter(key_func=get_remote_address)
@@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None:
161160
fp.write(f"{line}\n")
162161

163162

164-
def log_slider_to_size(position: int) -> int:
165-
"""Convert a slider position to a file size in bytes using a logarithmic scale.
166-
167-
Parameters
168-
----------
169-
position : int
170-
Slider position ranging from 0 to 500.
171-
172-
Returns
173-
-------
174-
int
175-
File size in bytes corresponding to the slider position.
176-
177-
"""
178-
maxv = math.log(MAX_FILE_SIZE_KB)
179-
return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024
180-
181-
182163
## Color printing utility
183164
class Colors:
184165
"""ANSI color codes."""

‎src/server/templates/components/git_form.jinja

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@
7676
</label>
7777
<input type="range"
7878
id="file_size"
79-
name="max_file_size"
80-
min="0"
79+
min="1"
8180
max="500"
8281
required
8382
value="{{ default_max_file_size }}"
8483
class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000]">
84+
<input type="hidden" id="max_file_size_kb" name="max_file_size" value="">
8585
</div>
8686
<!-- PAT checkbox with PAT field below -->
8787
<div class="flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5">

‎src/static/js/utils.js

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ function collectFormData(form) {
126126
const json_data = {};
127127
const inputText = form.querySelector('[name="input_text"]');
128128
const token = form.querySelector('[name="token"]');
129-
const slider = document.getElementById('file_size');
129+
const hiddenInput = document.getElementById('max_file_size_kb');
130130
const patternType = document.getElementById('pattern_type');
131131
const pattern = document.getElementById('pattern');
132132

133133
if (inputText) {json_data.input_text = inputText.value;}
134134
if (token) {json_data.token = token.value;}
135-
if (slider) {json_data.max_file_size = slider.value;}
135+
if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}
136136
if (patternType) {json_data.pattern_type = patternType.value;}
137137
if (pattern) {json_data.pattern = pattern.value;}
138138

@@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) {
206206

207207
if (!form) {return;}
208208

209+
// Ensure hidden input is updated before collecting form data
210+
const slider = document.getElementById('file_size');
211+
const hiddenInput = document.getElementById('max_file_size_kb');
212+
213+
if (slider && hiddenInput) {
214+
hiddenInput.value = logSliderToSize(slider.value);
215+
}
216+
209217
if (showLoadingSpinner) {
210218
showLoading();
211219
}
@@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) {
226234
headers: { 'Content-Type': 'application/json' },
227235
body: JSON.stringify(json_data)
228236
})
229-
.then((response) => response.json())
230-
.then( (data) => {
231-
// Hide loading overlay
237+
.then(async (response) => {
238+
let data;
239+
240+
try {
241+
data = await response.json();
242+
} catch {
243+
data = {};
244+
}
232245
setButtonLoadingState(submitButton, false);
233246

234-
// Handle error
247+
if (!response.ok) {
248+
// Show all error details if present
249+
if (Array.isArray(data.detail)) {
250+
const details = data.detail.map((d) => `<li>${d.msg || JSON.stringify(d)}</li>`).join('');
251+
252+
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'><b>Error(s):</b><ul>${details}</ul></div>`);
253+
254+
return;
255+
}
256+
// Other errors
257+
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error || JSON.stringify(data) || 'An error occurred.'}</div>`);
258+
259+
return;
260+
}
261+
262+
// Handle error in data
235263
if (data.error) {
236264
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error}</div>`);
237265

@@ -327,14 +355,16 @@ function logSliderToSize(position) {
327355
function initializeSlider() {
328356
const slider = document.getElementById('file_size');
329357
const sizeValue = document.getElementById('size_value');
358+
const hiddenInput = document.getElementById('max_file_size_kb');
330359

331-
if (!slider || !sizeValue) {return;}
360+
if (!slider || !sizeValue||!hiddenInput) {return;}
332361

333362
function updateSlider() {
334363
const value = logSliderToSize(slider.value);
335364

336365
sizeValue.textContent = formatSize(value);
337366
slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`;
367+
hiddenInput.value = value; // Set hidden input to KB value
338368
}
339369

340370
// Update on slider change

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /