Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

refactor: Refactor output formatting #467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
NicolasIRAGNE wants to merge 17 commits into main
base: main
Choose a base branch
Loading
from refactor/formatter
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
ab6eb5f
test
NicolasIRAGNE Jul 24, 2025
b82624c
wip before jinja
NicolasIRAGNE Jul 25, 2025
0765f0e
idk
NicolasIRAGNE Jul 25, 2025
78695f9
idk
NicolasIRAGNE Jul 25, 2025
e1f687c
idk
NicolasIRAGNE Jul 27, 2025
0e6c5bf
commit avant la catastrophe
NicolasIRAGNE Jul 27, 2025
6d2941b
commit post catastrophe
NicolasIRAGNE Jul 27, 2025
fcb7e01
wip
NicolasIRAGNE Jul 28, 2025
8e7070d
feat: add GitRepository Type
NicolasIRAGNE Jul 28, 2025
8a46848
fix: clean up Jinja templates for better readability
NicolasIRAGNE Jul 28, 2025
3abac0b
feat: add unit tests for output formatting and enhance file system sc...
NicolasIRAGNE Jul 28, 2025
1f3f3af
feat: extract formatter templates to files and rename Context to Cont...
NicolasIRAGNE Jul 31, 2025
9a717c7
feat: move contextv1 to its own file
NicolasIRAGNE Jul 31, 2025
175d5c2
fix: resolve all pre-commit hook issues and lint warnings
NicolasIRAGNE Jul 31, 2025
6b680c7
typo
NicolasIRAGNE Jul 31, 2025
f4580c0
typo
NicolasIRAGNE Jul 31, 2025
9a4e6f6
wip
NicolasIRAGNE Aug 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ repos:
rev: v1.36.4
hooks:
- id: djlint-reformat-jinja
exclude: ^src/gitingest/format/

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.45.0
Expand Down
40 changes: 19 additions & 21 deletions src/gitingest/entrypoint.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gitingest.clone import clone_repo
from gitingest.config import MAX_FILE_SIZE
from gitingest.ingestion import ingest_query
from gitingest.output_formatter import DefaultFormatter
from gitingest.query_parser import parse_local_dir_path, parse_remote_repo
from gitingest.utils.auth import resolve_token
from gitingest.utils.compat_func import removesuffix
Expand Down Expand Up @@ -44,12 +45,13 @@ async def ingest_async(
include_submodules: bool = False,
token: str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
) -> str:
"""Ingest a source and process its contents.

This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
and processes its files according to the specified query parameters. It returns a summary, a tree-like
structure of the files, and the content of the files. The results can optionally be written to an output file.
and processes its files according to the specified query parameters. It returns a single digest string.

The output is generated lazily using a ContextV1 object and the DefaultFormatter class.

Parameters
----------
Expand Down Expand Up @@ -79,11 +81,8 @@ async def ingest_async(

Returns
-------
tuple[str, str, str]
A tuple containing:
- A summary string of the analyzed repository or directory.
- A tree-like string representation of the file structure.
- The content of the files in the repository or directory.
str
The full digest string.

"""
logger.info("Starting ingestion process", extra={"source": source})
Expand Down Expand Up @@ -138,14 +137,15 @@ async def ingest_async(
_apply_gitignores(query)

logger.info("Processing files and generating output")
summary, tree, content = ingest_query(query)

if output:
logger.debug("Writing output to file", extra={"output_path": output})
await _write_output(tree, content=content, target=output)

context = ingest_query(query)
formatter = DefaultFormatter()
digest = formatter.format(context, context.query)
await _write_output(digest, content=None, target=output)
logger.info("Ingestion completed successfully")
return summary, tree, content
return digest


def ingest(
Expand All @@ -160,12 +160,13 @@ def ingest(
include_submodules: bool = False,
token: str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
) -> str:
"""Provide a synchronous wrapper around ``ingest_async``.

This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
and processes its files according to the specified query parameters. It returns a summary, a tree-like
structure of the files, and the content of the files. The results can optionally be written to an output file.
and processes its files according to the specified query parameters. It returns a single digest string.

The output is generated lazily using a ContextV1 object and the DefaultFormatter class.

Parameters
----------
Expand Down Expand Up @@ -195,11 +196,8 @@ def ingest(

Returns
-------
tuple[str, str, str]
A tuple containing:
- A summary string of the analyzed repository or directory.
- A tree-like string representation of the file structure.
- The content of the files in the repository or directory.
str
The full digest string.

See Also
--------
Expand All @@ -208,7 +206,7 @@ def ingest(
"""
return asyncio.run(
ingest_async(
source=source,
source,
max_file_size=max_file_size,
include_patterns=include_patterns,
exclude_patterns=exclude_patterns,
Expand Down
4 changes: 4 additions & 0 deletions src/gitingest/format/DebugFormatter/Source.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ SEPARATOR }}
DEBUG: {{ class_name }}
Fields: {{ fields_str }}
{{ SEPARATOR }}
11 changes: 11 additions & 0 deletions src/gitingest/format/DefaultFormatter/ContextV1.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Generated using https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}

Sources used:
{%- for src in source %}
- {{ src.name }}: {{ src.__class__.__name__ }}
{% endfor %}

{%- for src in source.sources %}
{{ formatter.format(src, source.query) }}
{%- endfor %}
# End of https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}
7 changes: 7 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemDirectory.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{%- if source.depth == 0 %}{{ source.name }}:
{{ source.tree }}

{% endif -%}
{%- for child in source.children -%}
{{ formatter.format(child, query) }}
{%- endfor -%}
4 changes: 4 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemFile.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ SEPARATOR }}
{{ source.name }}
{{ SEPARATOR }}
{{ source.content }}
3 changes: 3 additions & 0 deletions src/gitingest/format/DefaultFormatter/FileSystemSymlink.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{ SEPARATOR }}
{{ source.name }}{% if source.target %} -> {{ source.target }}{% endif %}
{{ SEPARATOR }}
7 changes: 7 additions & 0 deletions src/gitingest/format/DefaultFormatter/GitRepository.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{%- if source.depth == 0 %}🔗 Git Repository: {{ source.name }}
{{ source.tree }}

{% endif -%}
{%- for child in source.children -%}
{{ formatter.format(child, query) }}
{%- endfor -%}
5 changes: 5 additions & 0 deletions src/gitingest/format/SummaryFormatter/ContextV1.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Repository: {{ source.query.user_name }}/{{ source.query.repo_name }}
Commit: {{ source.query.commit }}
Files analyzed: {{ source.file_count }}

Estimated tokens: {{ source.token_count }}
2 changes: 2 additions & 0 deletions src/gitingest/format/SummaryFormatter/FileSystemDirectory.j2
View file Open in desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Directory structure:
{{ source.tree }}
90 changes: 52 additions & 38 deletions src/gitingest/ingestion.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from typing import TYPE_CHECKING

from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
from gitingest.output_formatter import format_node
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
from gitingest.schemas import ContextV1, FileSystemNode, FileSystemStats
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink, GitRepository
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
from gitingest.utils.logging_config import get_logger

Expand All @@ -18,12 +18,18 @@
logger = get_logger(__name__)


def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
def _is_git_repository(path: Path) -> bool:
"""Check if a directory contains a .git folder."""
return (path / ".git").exists()


def ingest_query(query: IngestionQuery) -> ContextV1:
"""Run the ingestion process for a parsed query.

This is the main entry point for analyzing a codebase directory or single file. It processes the query
parameters, reads the file or directory content, and generates a summary, directory structure, and file content,
along with token estimations.
This is the main entry point for analyzing a codebase directory or single file.

It processes the query parameters, reads the file or directory content, and returns
a ContextV1 object that can generate the final output digest on demand.

Parameters
----------
Expand All @@ -32,8 +38,10 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:

Returns
-------
tuple[str, str, str]
A tuple containing the summary, directory structure, and file contents.
ContextV1
A ContextV1 object representing the ingested file system nodes.
Use str(DefaultFormatter(context)) to get the summary, directory structure,
and file contents.

Raises
------
Expand Down Expand Up @@ -70,11 +78,8 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:

relative_path = path.relative_to(query.local_path)

file_node = FileSystemNode(
file_node = FileSystemFile(
name=path.name,
type=FileSystemNodeType.FILE,
size=path.stat().st_size,
file_count=1,
path_str=str(relative_path),
path=path,
)
Expand All @@ -91,16 +96,21 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
"file_size": file_node.size,
},
)
return format_node(file_node, query=query)

logger.info("Processing directory", extra={"directory_path": str(path)})
return ContextV1(sources=[file_node], query=query)

root_node = FileSystemNode(
name=path.name,
type=FileSystemNodeType.DIRECTORY,
path_str=str(path.relative_to(query.local_path)),
path=path,
)
# Check if this is a git repository and create appropriate node type
if _is_git_repository(path):
root_node = GitRepository(
name=path.name,
path_str=str(path.relative_to(query.local_path)),
path=path,
)
else:
root_node = FileSystemDirectory(
name=path.name,
path_str=str(path.relative_to(query.local_path)),
path=path,
)

stats = FileSystemStats()

Expand All @@ -117,10 +127,10 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
},
)

return format_node(root_node, query=query)
return ContextV1(sources=[root_node], query=query)


def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None: # noqa: C901
"""Process a file or directory item within a directory.

This function handles each file or directory item, checking if it should be included or excluded based on the
Expand Down Expand Up @@ -161,13 +171,21 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
continue
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
elif sub_path.is_dir():
child_directory_node = FileSystemNode(
name=sub_path.name,
type=FileSystemNodeType.DIRECTORY,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)
# Check if this subdirectory is a git repository
if _is_git_repository(sub_path):
child_directory_node = GitRepository(
name=sub_path.name,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)
else:
child_directory_node = FileSystemDirectory(
name=sub_path.name,
path_str=str(sub_path.relative_to(query.local_path)),
path=sub_path,
depth=node.depth + 1,
)

_process_node(node=child_directory_node, query=query, stats=stats)

Expand Down Expand Up @@ -201,9 +219,8 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
The base path of the repository or directory being processed.

"""
child = FileSystemNode(
child = FileSystemSymlink(
name=path.name,
type=FileSystemNodeType.SYMLINK,
path_str=str(path.relative_to(local_path)),
path=path,
depth=parent_node.depth + 1,
Expand All @@ -213,7 +230,7 @@ def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemS
parent_node.file_count += 1


def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSystemStats, local_path: Path) -> None:
"""Process a file in the file system.

This function checks the file's size, increments the statistics, and reads its content.
Expand All @@ -223,7 +240,7 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
----------
path : Path
The full path of the file.
parent_node : FileSystemNode
parent_node : FileSystemDirectory
The dictionary to accumulate the results.
stats : FileSystemStats
Statistics tracking object for the total file count and size.
Expand Down Expand Up @@ -258,11 +275,8 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
stats.total_files += 1
stats.total_size += file_size

child = FileSystemNode(
child = FileSystemFile(
name=path.name,
type=FileSystemNodeType.FILE,
size=file_size,
file_count=1,
path_str=str(path.relative_to(local_path)),
path=path,
depth=parent_node.depth + 1,
Expand Down
Loading
Loading

AltStyle によって変換されたページ (->オリジナル) /