Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ccb8f9e

Browse files
feat: add GitRepository Type
1 parent 7b95c0f commit ccb8f9e

File tree

4 files changed

+124
-62
lines changed

4 files changed

+124
-62
lines changed

‎src/gitingest/ingestion.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,19 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9-
from gitingest.output_formatter import DefaultFormatter, DebugFormatter, SummaryFormatter
10-
from gitingest.schemas import FileSystemNode, FileSystemStats, Context
11-
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink
9+
from gitingest.schemas import Context, FileSystemNode, FileSystemStats
10+
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink, GitRepository
1211
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1312

1413
if TYPE_CHECKING:
1514
from gitingest.schemas import IngestionQuery
1615

1716

17+
def _is_git_repository(path: Path) -> bool:
18+
"""Check if a directory contains a .git folder."""
19+
return (path / ".git").exists()
20+
21+
1822
def ingest_query(query: IngestionQuery) -> Context:
1923
"""Run the ingestion process for a parsed query.
2024
@@ -64,17 +68,19 @@ def ingest_query(query: IngestionQuery) -> Context:
6468

6569
return Context([file_node], query)
6670

67-
# root_node = FileSystemNode(
68-
# name=path.name,
69-
# type=FileSystemNodeType.DIRECTORY,
70-
# path_str=str(path.relative_to(query.local_path)),
71-
# path=path,
72-
# )
73-
root_node = FileSystemDirectory(
74-
name=path.name,
75-
path_str=str(path.relative_to(query.local_path)),
76-
path=path,
77-
)
71+
# Check if this is a git repository and create appropriate node type
72+
if _is_git_repository(path):
73+
root_node = GitRepository(
74+
name=path.name,
75+
path_str=str(path.relative_to(query.local_path)),
76+
path=path,
77+
)
78+
else:
79+
root_node = FileSystemDirectory(
80+
name=path.name,
81+
path_str=str(path.relative_to(query.local_path)),
82+
path=path,
83+
)
7884

7985
stats = FileSystemStats()
8086

@@ -117,12 +123,21 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
117123
continue
118124
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
119125
elif sub_path.is_dir():
120-
child_directory_node = FileSystemDirectory(
121-
name=sub_path.name,
122-
path_str=str(sub_path.relative_to(query.local_path)),
123-
path=sub_path,
124-
depth=node.depth + 1,
125-
)
126+
# Check if this subdirectory is a git repository
127+
if _is_git_repository(sub_path):
128+
child_directory_node = GitRepository(
129+
name=sub_path.name,
130+
path_str=str(sub_path.relative_to(query.local_path)),
131+
path=sub_path,
132+
depth=node.depth + 1,
133+
)
134+
else:
135+
child_directory_node = FileSystemDirectory(
136+
name=sub_path.name,
137+
path_str=str(sub_path.relative_to(query.local_path)),
138+
path=sub_path,
139+
depth=node.depth + 1,
140+
)
126141

127142
_process_node(node=child_directory_node, query=query, stats=stats)
128143

@@ -197,7 +212,6 @@ def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSyste
197212
stats.total_files += 1
198213
stats.total_size += file_size
199214

200-
201215
child = FileSystemFile(
202216
name=path.name,
203217
path_str=str(path.relative_to(local_path)),

‎src/gitingest/output_formatter.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22

33
from __future__ import annotations
44

5+
from functools import singledispatchmethod
56
from typing import TYPE_CHECKING
67

78
import tiktoken
9+
from jinja2 import BaseLoader, Environment
810

9-
from gitingest.schemas import FileSystemNode
11+
from gitingest.schemas import FileSystemDirectory, FileSystemFile, FileSystemNode, FileSystemSymlink, Source
12+
from gitingest.schemas.filesystem import SEPARATOR, Context, FileSystemNodeType, GitRepository
1013
from gitingest.utils.compat_func import readlink
11-
from functools import singledispatchmethod
12-
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink
13-
from gitingest.schemas.filesystem import SEPARATOR, Context, FileSystemNodeType
14-
from jinja2 import Environment, BaseLoader
1514

1615
if TYPE_CHECKING:
1716
from gitingest.schemas import IngestionQuery
@@ -24,6 +23,7 @@
2423

2524
# Backward compatibility
2625

26+
2727
def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:
2828
"""Create a prefix string for summarizing a repository or local directory.
2929
@@ -167,6 +167,7 @@ def _format_token_count(text: str) -> str | None:
167167

168168
return str(total_tokens)
169169

170+
170171
class DefaultFormatter:
171172
def __init__(self):
172173
self.separator = SEPARATOR
@@ -178,8 +179,7 @@ def format(self, node: Source, query):
178179

179180
@format.register
180181
def _(self, node: FileSystemFile, query):
181-
template = \
182-
"""
182+
template = """
183183
{{ SEPARATOR }}
184184
{{ node.name }}
185185
{{ SEPARATOR }}
@@ -191,8 +191,7 @@ def _(self, node: FileSystemFile, query):
191191

192192
@format.register
193193
def _(self, node: FileSystemDirectory, query):
194-
template = \
195-
"""
194+
template = """
196195
{% if node.depth == 0 %}
197196
{{ node.name }}:
198197
{{ node.tree }}
@@ -206,9 +205,23 @@ def _(self, node: FileSystemDirectory, query):
206205
return dir_template.render(node=node, query=query, formatter=self)
207206

208207
@format.register
209-
def _(self, node: FileSystemSymlink, query):
210-
template = \
208+
def _(self, node: GitRepository, query):
209+
template = """
210+
{% if node.depth == 0 %}
211+
🔗 Git Repository: {{ node.name }}
212+
{{ node.tree }}
213+
214+
{% endif %}
215+
{% for child in node.children %}
216+
{{ formatter.format(child, query) }}
217+
{% endfor %}
211218
"""
219+
git_template = self.env.from_string(template)
220+
return git_template.render(node=node, query=query, formatter=self)
221+
222+
@format.register
223+
def _(self, node: FileSystemSymlink, query):
224+
template = """
212225
{{ SEPARATOR }}
213226
{{ node.name }}{% if node.target %} -> {{ node.target }}{% endif %}
214227
{{ SEPARATOR }}
@@ -219,8 +232,7 @@ def _(self, node: FileSystemSymlink, query):
219232
@format.register
220233
def _(self, context: Context, query):
221234
"""Format a Context by formatting all its sources."""
222-
template = \
223-
"""
235+
template = """
224236
# Generated using https://gitingest.com/{{ context.query.user_name }}/{{ context.query.repo_name }}
225237
Sources used:
226238
{% for source in context.sources %}
@@ -252,20 +264,19 @@ def format(self, node: Source, query):
252264

253265
# Try to get dataclass fields first
254266
try:
255-
if hasattr(node, '__dataclass_fields__') and hasattr(node.__dataclass_fields__, 'keys'):
267+
if hasattr(node, "__dataclass_fields__") and hasattr(node.__dataclass_fields__, "keys"):
256268
field_names.extend(node.__dataclass_fields__.keys())
257269
else:
258270
raise AttributeError # Fall through to backup method
259271
except (AttributeError, TypeError):
260272
# Fall back to getting all non-private attributes
261-
field_names = [attrforattrindir(node)
262-
if not attr.startswith('_')
263-
andnotcallable(getattr(node, attr, None))]
273+
field_names = [
274+
attrforattrindir(node) if not attr.startswith("_") andnotcallable(getattr(node, attr, None))
275+
]
264276

265277
# Format the debug output
266278
fields_str = ", ".join(field_names)
267-
template = \
268-
"""
279+
template = """
269280
{{ SEPARATOR }}
270281
DEBUG: {{ class_name }}
271282
Fields: {{ fields_str }}
@@ -275,7 +286,7 @@ def format(self, node: Source, query):
275286
return debug_template.render(
276287
SEPARATOR=SEPARATOR,
277288
class_name=class_name,
278-
fields_str=fields_str
289+
fields_str=fields_str,
279290
)
280291

281292

@@ -291,20 +302,17 @@ def summary(self, node: Source, query):
291302

292303
@summary.register
293304
def _(self, node: FileSystemDirectory, query):
294-
template = \
295-
"""
305+
template = """
296306
Directory structure:
297307
{{ node.tree }}
298308
"""
299309
summary_template = self.env.from_string(template)
300310
return summary_template.render(node=node, query=query)
301311

302-
303312
@summary.register
304313
def _(self, context: Context, query):
305-
template = \
306-
"""
314+
template = """
307315
{{ context.summary }}
308316
"""
309317
summary_template = self.env.from_string(template)
310-
return summary_template.render(context=context, query=query)
318+
return summary_template.render(context=context, query=query)

‎src/gitingest/schemas/__init__.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,26 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemStats, Context, Source
4+
from gitingest.schemas.filesystem import (
5+
Context,
6+
FileSystemDirectory,
7+
FileSystemFile,
8+
FileSystemNode,
9+
FileSystemStats,
10+
FileSystemSymlink,
11+
GitRepository,
12+
Source,
13+
)
514
from gitingest.schemas.ingestion import IngestionQuery
615

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemStats", "IngestionQuery", "Context"]
16+
__all__ = [
17+
"CloneConfig",
18+
"Context",
19+
"FileSystemDirectory",
20+
"FileSystemFile",
21+
"FileSystemNode",
22+
"FileSystemStats",
23+
"FileSystemSymlink",
24+
"GitRepository",
25+
"IngestionQuery",
26+
]

‎src/gitingest/schemas/filesystem.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,19 @@
22

33
from __future__ import annotations
44

5-
import os
5+
fromabcimport ABC
66
from dataclasses import dataclass, field
77
from enum import Enum, auto
88
from typing import TYPE_CHECKING
9-
from abc import ABC
10-
from functools import singledispatchmethod
11-
12-
from gitingest.utils.compat_func import readlink
13-
from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk
14-
from gitingest.utils.notebook import process_notebook
159

1610
if TYPE_CHECKING:
1711
from pathlib import Path
12+
1813
from gitingest.schemas import IngestionQuery
19-
from gitingest.output_formatter import Formatter
2014

2115
SEPARATOR = "=" * 48 # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
2216

17+
2318
class FileSystemNodeType(Enum):
2419
"""Enum representing the type of a file system node (directory or file)."""
2520

@@ -35,10 +30,11 @@ class FileSystemStats:
3530
total_files: int = 0
3631
total_size: int = 0
3732

33+
3834
@dataclass
3935
class Source(ABC):
4036
"""Abstract base class for all sources (files, directories, etc)."""
41-
pass
37+
4238

4339
@dataclass
4440
class FileSystemNode(Source):
@@ -52,13 +48,14 @@ class FileSystemNode(Source):
5248
def tree(self):
5349
return self.name
5450

51+
5552
@dataclass
5653
class FileSystemFile(FileSystemNode):
5754
@property
5855
def content(self):
5956
# read the file
6057
try:
61-
with open(self.path, "r") as f:
58+
with open(self.path) as f:
6259
return f.read()
6360
except Exception as e:
6461
return f"Error reading content of {self.name}: {e}"
@@ -70,28 +67,30 @@ def render_tree(self, prefix="", is_last=True):
7067

7168
@dataclass
7269
class FileSystemDirectory(FileSystemNode):
73-
children: list['FileSystemNode'] = field(default_factory=list)
70+
children: list[FileSystemNode] = field(default_factory=list)
7471
file_count: int = 0
7572
dir_count: int = 0
7673
type: FileSystemNodeType = FileSystemNodeType.DIRECTORY
7774

7875
def sort_children(self) -> None:
7976
"""Sort the children nodes of a directory according to a specific order."""
77+
8078
def _sort_key(child: FileSystemNode) -> tuple[int, str]:
8179
name = child.name.lower()
82-
if hasattr(child, 'type') and getattr(child, 'type', None) == FileSystemNodeType.FILE:
80+
if hasattr(child, "type") and getattr(child, "type", None) == FileSystemNodeType.FILE:
8381
if name == "readme" or name.startswith("readme."):
8482
return (0, name)
8583
return (1 if not name.startswith(".") else 2, name)
8684
return (3 if not name.startswith(".") else 4, name)
85+
8786
self.children.sort(key=_sort_key)
8887

8988
def render_tree(self, prefix="", is_last=True):
9089
lines = []
9190
current_prefix = "└── " if is_last else "├── "
9291
display_name = self.name + "/"
9392
lines.append(f"{prefix}{current_prefix}{display_name}")
94-
if hasattr(self, 'children') and self.children:
93+
if hasattr(self, "children") and self.children:
9594
new_prefix = prefix + (" " if is_last else "│ ")
9695
for i, child in enumerate(self.children):
9796
is_last_child = i == len(self.children) - 1
@@ -102,6 +101,27 @@ def render_tree(self, prefix="", is_last=True):
102101
def tree(self):
103102
return "\n".join(self.render_tree())
104103

104+
105+
@dataclass
106+
class GitRepository(FileSystemDirectory):
107+
"""A directory that contains a .git folder, representing a Git repository."""
108+
109+
git_info: dict = field(default_factory=dict) # Store git metadata like branch, commit, etc.
110+
111+
def render_tree(self, prefix="", is_last=True):
112+
lines = []
113+
current_prefix = "└── " if is_last else "├── "
114+
# Mark as git repo in the tree
115+
display_name = f"{self.name}/ (git repository)"
116+
lines.append(f"{prefix}{current_prefix}{display_name}")
117+
if hasattr(self, "children") and self.children:
118+
new_prefix = prefix + (" " if is_last else "│ ")
119+
for i, child in enumerate(self.children):
120+
is_last_child = i == len(self.children) - 1
121+
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
122+
return lines
123+
124+
105125
@dataclass
106126
class FileSystemSymlink(FileSystemNode):
107127
target: str = ""
@@ -122,6 +142,7 @@ class Context(Source):
122142
The list of source objects to format.
123143
query : IngestionQuery
124144
The query context.
145+
125146
"""
126147

127148
def __init__(self, sources: list[Source], query: IngestionQuery):

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /