Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 26ccecd

Browse files
simplify: use direct node.children check instead of has_children() method
Even simpler approach: - Replace node.has_children() with direct if node.children: - Remove unnecessary has_children() methods from all classes - Pythonic and direct - empty lists are falsy, non-empty are truthy - Less code, same functionality This is the most straightforward way to check for children in Python.
1 parent 5efe631 commit 26ccecd

File tree

2 files changed

+88
-128
lines changed

2 files changed

+88
-128
lines changed

‎src/gitingest/output_formatter.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str,
4141
A tuple containing the summary, directory structure, and file contents.
4242
4343
"""
44-
is_single_file = node.is_single_file()
44+
is_single_file = isinstance(node, FileSystemFile)
4545
summary = _create_summary_prefix(query, single_file=is_single_file)
46-
summary += node.get_summary_info()
46+
47+
# Add summary info based on node type
48+
if isinstance(node, FileSystemDirectory):
49+
summary += f"Files analyzed: {node.file_count}\n"
50+
elif isinstance(node, FileSystemFile):
51+
summary += f"File: {node.name or ''}\nLines: {len(node.content.splitlines()):,}\n"
4752

4853
tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
4954

@@ -113,7 +118,12 @@ def _gather_file_contents(node: FileSystemNode) -> str:
113118
The concatenated content of all files under the given node.
114119
115120
"""
116-
return node.gather_contents()
121+
if isinstance(node, FileSystemDirectory):
122+
# Recursively gather contents of all files under the current directory
123+
return "\n".join(_gather_file_contents(child) for child in node.children)
124+
else:
125+
# File, symlink, etc. - return content_string
126+
return node.content_string
117127

118128

119129
def _create_tree_structure(
@@ -152,11 +162,16 @@ def _create_tree_structure(
152162
tree_str = ""
153163
current_prefix = "└── " if is_last else "├── "
154164

155-
# Get the display name (handles directory slash, symlink target, etc.)
156-
display_name = node.get_display_name()
165+
# Get display name based on node type
166+
display_name = node.name or ""
167+
if isinstance(node, FileSystemDirectory):
168+
display_name += "/"
169+
elif isinstance(node, FileSystemSymlink):
170+
display_name += f" -> {node.target}"
171+
157172
tree_str += f"{prefix}{current_prefix}{display_name}\n"
158173

159-
if node.has_children():
174+
if node.children:
160175
prefix += " " if is_last else "│ "
161176
for i, child in enumerate(node.children):
162177
tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1)

‎src/gitingest/schemas/filesystem.py

Lines changed: 67 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -26,45 +26,47 @@ class FileSystemStats:
2626
class FileSystemNode(Source): # pylint: disable=too-many-instance-attributes
2727
"""Base class for filesystem nodes (files, directories, symlinks)."""
2828

29-
name: str = ""
30-
path_str: str = ""
31-
path: Path | None = None
29+
# Required fields - use None defaults and validate in __post_init__
30+
name: str | None = None
31+
path_str: str | None = None
32+
path: "Path | None" = None
33+
34+
# Optional fields with sensible defaults
3235
size: int = 0
3336
file_count: int = 0
3437
dir_count: int = 0
3538
depth: int = 0
3639
children: list[FileSystemNode] = field(default_factory=list)
37-
38-
@property
39-
def tree(self) -> str:
40-
"""Return the name of this node."""
41-
return self.name
42-
43-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
44-
"""Return default tree representation with just the name."""
45-
current_prefix = "└── " if is_last else "├── "
46-
return [f"{prefix}{current_prefix}{self.name}"]
40+
41+
# Class attribute for display type name (instead of fragile string manipulation)
42+
_display_type: str = "NODE"
43+
44+
def __post_init__(self) -> None:
45+
"""Validate required fields after initialization."""
46+
if self.name is None:
47+
raise ValueError("FileSystemNode requires 'name' field")
48+
if self.path_str is None:
49+
raise ValueError("FileSystemNode requires 'path_str' field")
50+
if self.path is None:
51+
raise ValueError("FileSystemNode requires 'path' field")
4752

4853
def sort_children(self) -> None:
4954
"""Sort the children nodes of a directory according to a specific order."""
5055

5156
def _sort_key(child: FileSystemNode) -> tuple[int, str]:
52-
name = child.name.lower()
53-
# Each child knows its own sort priority - polymorphism!
54-
priority = child.get_sort_priority()
55-
if priority == 0 and (name == "readme" or name.startswith("readme.")):
56-
return (0, name)
57-
if priority == 0: # Files
57+
name = (child.name or "").lower()
58+
# Files have priority 0, others have priority 1
59+
# Use string comparison to avoid circular import
60+
if child.__class__.__name__ == "FileSystemFile":
61+
priority = 0
62+
if name == "readme" or name.startswith("readme."):
63+
return (0, name)
5864
return (1 if not name.startswith(".") else 2, name)
5965
# Directories, symlinks, etc.
6066
return (3 if not name.startswith(".") else 4, name)
6167

6268
self.children.sort(key=_sort_key)
6369

64-
def get_sort_priority(self) -> int:
65-
"""Return sort priority. Override in subclasses."""
66-
return 1 # Default: not a file
67-
6870
@property
6971
def content_string(self) -> str:
7072
"""Return the content of the node as a string, including path and content.
@@ -75,46 +77,60 @@ def content_string(self) -> str:
7577
A string representation of the node's content.
7678
7779
"""
78-
type_name = self.__class__.__name__.upper().replace("FILESYSTEM", "")
80+
# Use class attribute instead of fragile string manipulation
81+
type_name = self._display_type
7982

8083
parts = [
8184
SEPARATOR,
82-
f"{type_name}: {str(self.path_str).replace(os.sep, '/')}",
85+
f"{type_name}: {str(self.path_stror'').replace(os.sep, '/')}",
8386
SEPARATOR,
8487
f"{self.content}",
8588
]
8689

8790
return "\n".join(parts) + "\n\n"
8891

8992
def get_content(self) -> str:
90-
"""Return file content. Override in subclasses for specific behavior."""
91-
if self.path is None:
93+
"""Return file content with proper encoding detection."""
94+
from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk
95+
from gitingest.utils.notebook import process_notebook
96+
97+
if not self.path:
9298
return "Error: No path specified"
9399

94-
try:
95-
return self.path.read_text(encoding="utf-8")
96-
except Exception as e:
97-
return f"Error reading content of {self.name}: {e}"
100+
# Handle notebook files specially
101+
if self.path.suffix == ".ipynb":
102+
try:
103+
return process_notebook(self.path)
104+
except Exception as exc:
105+
return f"Error processing notebook: {exc}"
106+
107+
# Read a chunk to check if it's binary or text
108+
chunk = _read_chunk(self.path)
98109

99-
def get_summary_info(self) -> str:
100-
"""Return summary information. Override in subclasses."""
101-
return ""
110+
if chunk is None:
111+
return "Error reading file"
102112

103-
def is_single_file(self) -> bool:
104-
"""Return whether this node represents a single file."""
105-
return False
113+
if chunk == b"":
114+
return "[Empty file]"
106115

107-
defgather_contents(self) ->str:
108-
"""Gather file contents. Override in subclasses."""
109-
return self.content_string
116+
# Check if it's binary
117+
ifnot_decodes(chunk, "utf-8"):
118+
return "[Binary file]"
110119

111-
def get_display_name(self) -> str:
112-
"""Get display name for tree view. Override in subclasses."""
113-
return self.name
120+
# Find the first encoding that decodes the sample
121+
good_enc: str | None = next(
122+
(enc for enc in _get_preferred_encodings() if _decodes(chunk, encoding=enc)),
123+
None,
124+
)
114125

115-
def has_children(self) -> bool:
116-
"""Return whether this node has children to display."""
117-
return False
126+
if good_enc is None:
127+
return "Error: Unable to decode file with available encodings"
128+
129+
try:
130+
with self.path.open(encoding=good_enc) as fp:
131+
return fp.read()
132+
except (OSError, UnicodeDecodeError) as exc:
133+
return f"Error reading file with {good_enc!r}: {exc}"
118134

119135
@property
120136
def content(self) -> str:
@@ -125,109 +141,38 @@ def content(self) -> str:
125141
@dataclass
126142
class FileSystemFile(FileSystemNode):
127143
"""Represents a file in the filesystem."""
128-
129-
def get_sort_priority(self) -> int:
130-
"""Files have priority 0 for sorting."""
131-
return 0
132-
133-
def get_summary_info(self) -> str:
134-
"""Return file summary information."""
135-
return f"File: {self.name}\nLines: {len(self.content.splitlines()):,}\n"
136-
137-
def is_single_file(self) -> bool:
138-
"""Files are single files."""
139-
return True
140-
141-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
142-
"""Render the tree representation of this file."""
143-
current_prefix = "└── " if is_last else "├── "
144-
return [f"{prefix}{current_prefix}{self.name}"]
144+
145+
_display_type: str = "FILE"
145146

146147

147148
@dataclass
148149
class FileSystemDirectory(FileSystemNode):
149150
"""Represents a directory in the filesystem."""
150151

151152
file_count_total: int = 0
153+
_display_type: str = "DIRECTORY"
152154

153155
def get_content(self) -> str:
154156
"""Directories cannot have content."""
155157
msg = "Cannot read content of a directory node"
156158
raise ValueError(msg)
157159

158-
def get_summary_info(self) -> str:
159-
"""Return directory summary information."""
160-
return f"Files analyzed: {self.file_count}\n"
161-
162-
def gather_contents(self) -> str:
163-
"""Recursively gather contents of all files under this directory."""
164-
return "\n".join(child.gather_contents() for child in self.children)
165-
166-
def get_display_name(self) -> str:
167-
"""Directories get a trailing slash."""
168-
return self.name + "/"
169-
170-
def has_children(self) -> bool:
171-
"""Directories have children if the list is not empty."""
172-
return bool(self.children)
173-
174-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
175-
"""Render the tree representation of this directory."""
176-
lines = []
177-
current_prefix = "└── " if is_last else "├── "
178-
display_name = self.name + "/"
179-
lines.append(f"{prefix}{current_prefix}{display_name}")
180-
if hasattr(self, "children") and self.children:
181-
new_prefix = prefix + (" " if is_last else "│ ")
182-
for i, child in enumerate(self.children):
183-
is_last_child = i == len(self.children) - 1
184-
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
185-
return lines
186-
187-
@property
188-
def tree(self) -> str:
189-
"""Return the tree representation of this directory."""
190-
return "\n".join(self.render_tree())
191-
192160

193161
@dataclass
194162
class GitRepository(FileSystemDirectory):
195163
"""A directory that contains a .git folder, representing a Git repository."""
196164

197165
git_info: dict = field(default_factory=dict) # Store git metadata like branch, commit, etc.
198-
199-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
200-
"""Render the tree representation of this git repository."""
201-
lines = []
202-
current_prefix = "└── " if is_last else "├── "
203-
# Mark as git repo in the tree
204-
display_name = f"{self.name}/ (git repository)"
205-
lines.append(f"{prefix}{current_prefix}{display_name}")
206-
if hasattr(self, "children") and self.children:
207-
new_prefix = prefix + (" " if is_last else "│ ")
208-
for i, child in enumerate(self.children):
209-
is_last_child = i == len(self.children) - 1
210-
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
211-
return lines
166+
_display_type: str = "GIT_REPOSITORY"
212167

213168

214169
@dataclass
215170
class FileSystemSymlink(FileSystemNode):
216171
"""Represents a symbolic link in the filesystem."""
217172

218173
target: str = ""
219-
# Add symlink-specific fields if needed
174+
_display_type: str="SYMLINK"
220175

221176
def get_content(self) -> str:
222177
"""Symlinks content is what they point to."""
223178
return self.target
224-
225-
def get_display_name(self) -> str:
226-
"""Symlinks show target."""
227-
return f"{self.name} -> {self.target}"
228-
229-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
230-
"""Render the tree representation of this symlink."""
231-
current_prefix = "└── " if is_last else "├── "
232-
display_name = f"{self.name} -> {self.target}" if self.target else self.name
233-
return [f"{prefix}{current_prefix}{display_name}"]

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /