-
Notifications
You must be signed in to change notification settings - Fork 257
-
It would be handy to filter different packages based on if it's using the heap or not.
This could be done by either adding a tag field to clib.json
on the maintainer side or by adding tags to https://github.com/clibs/clib/wiki/Packages
Would it also be possible to scan the source code for calls to 'malloc()'? Certainly not a guarantee... but it would allow for adding a 'heap required' tag
Beta Was this translation helpful? Give feedback.
All reactions
Replies: 2 comments
-
I like that idea!
Beta Was this translation helpful? Give feedback.
All reactions
-
In case anyone is interested, I've written a source scanner to look for keywords like malloc, goto etc...
https://briankhuu.com/blog/2025/01/26/clibs-source-scanner-for-keyword-suggestion/
Maybe we could integrate something like this as a tag suggester.
tcc -run ./source_keyword_suggester.c 2>&- << HEREDOC #include <stdlib.h> int main() { int *ptr = malloc(100); free(ptr); goto end; end: return 0; } HEREDOC
Which would output:
["heap used", "malloc", "free", "goto used"]
This is because internally it sees:
include
stdlib.h
int
main()
int
ptr
malloc()
free()
ptr
goto
end
end
return
So we could try extending it further to see if a source is using certain includes like stdlib.h
C Source
/* Clibs Source Scanner For Keyword Suggestion Author: Brian Khuu (2025) This idea is to provide a mechanism for scanning a source code and giving some keyword suggestions based on the properties of the source code. In this case, I would like to mark a source code as using dynamic memory or not. This won't be perfect, but it may help encourage library writers to use keywords if it's already provided. # MIT License Copyright (c) 2025 Brian Khuu Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include <stdbool.h> #include <stdio.h> #include <string.h> #define TOKEN_MAX 256 typedef struct SourceTokenScanner { int prev; int quote_char; bool in_single_line_comment; bool in_multi_line_comment; bool token_ready; bool token_is_function; char token[TOKEN_MAX + 1]; size_t token_size; } SourceTokenScanner; typedef struct SourceTagger { bool uses_malloc; bool uses_free; bool uses_realloc; bool uses_goto; } SourceTagger; bool sourceTokenScanner(SourceTokenScanner *context, const char ch) { if (context->token_ready) { context->token[0] = '0円'; context->token_size = 0; context->token_ready = false; } if (ch == '\'' || ch == '"' || context->quote_char) { if (context->quote_char) { if (ch == context->quote_char && context->prev != '\\') { context->quote_char = '0円'; } else { context->prev = ch; } } else { context->quote_char = ch; context->prev = 0; context->token[0] = '0円'; context->token_size = 0; } return false; } if (context->prev == '/' && ch == '/' || context->in_single_line_comment) { if (context->in_single_line_comment) { if (ch == '\n') { context->in_single_line_comment = false; } } else { context->in_single_line_comment = true; context->token[0] = '0円'; context->token_size = 0; } return false; } if (context->prev == '/' && ch == '*' || context->in_multi_line_comment) { if (context->in_multi_line_comment) { if (context->prev == '*' && ch == '/') { context->in_multi_line_comment = false; context->prev = '0円'; } else { context->prev = ch; } } else { context->in_multi_line_comment = true; context->prev = 0; context->token[0] = '0円'; context->token_size = 0; } return false; } context->prev = ch; if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '_') || (ch == '.')) { if (context->token_size < TOKEN_MAX) { context->token[context->token_size++] = ch; context->token[context->token_size] = '0円'; } return false; } if (context->token_size > 0) { if (context->token_size <= 2 || ('0' <= context->token[0] && context->token[0] <= '9') || (context->token[0] == '.')) { // Exclude short tokens or invalid starting characters // Dev Note: Shortest function name is 3 characters long (e.g. sin, cos, tan, log etc...) context->token[0] = '0円'; context->token_size = 0; return false; } else { // Heuristic Token Found context->token_is_function = (ch == '(') ? true : false; context->token_ready = true; return true; } } return false; } void add_tags(char **tags, size_t *tag_count, size_t max_tags, char *tag) { // Check if tag already added for (int i = 0; i < max_tags; i++) { if (tags[i] != NULL && (strcmp(tags[i], tag) == 0)) { return; } } // Add tag if (*tag_count < max_tags) { tags[*tag_count] = tag; *tag_count = *tag_count + 1; } } int main() { char ch; SourceTokenScanner sourceTokenScannerState = {0}; SourceTagger sourceTagger = {0}; /* Scan Source For Indicator Tokens */ while ((ch = getchar()) != EOF) { if (sourceTokenScanner(&sourceTokenScannerState, ch)) { if (sourceTokenScannerState.token_is_function) { // Function if (strcmp(sourceTokenScannerState.token, "malloc") == 0) { sourceTagger.uses_malloc = true; } else if (strcmp(sourceTokenScannerState.token, "free") == 0) { sourceTagger.uses_free = true; } else if (strcmp(sourceTokenScannerState.token, "realloc") == 0) { sourceTagger.uses_realloc = true; } } else { if (strcmp(sourceTokenScannerState.token, "goto") == 0) { sourceTagger.uses_goto = true; } } fprintf(stderr, "%s%s\n", sourceTokenScannerState.token, sourceTokenScannerState.token_is_function ? "()" : ""); } } /* Generate Tag Suggestions */ char *tags[100] = {NULL}; size_t tag_count = 0; if (!sourceTagger.uses_malloc && !sourceTagger.uses_free && !sourceTagger.uses_realloc) { add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "no heap"); add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heapless"); add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "no malloc"); } if (sourceTagger.uses_malloc) { add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used"); add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "malloc"); } if (sourceTagger.uses_free) { add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used"); add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "free"); } if (sourceTagger.uses_realloc) { add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used"); add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "realloc"); } if (sourceTagger.uses_goto) { add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "goto used"); } /* Print Tag Suggestions */ printf("["); for (int i = 0; i < tag_count; i++) { if (i > 0) { printf(", "); } printf("\"%s\"", tags[i]); } printf("]"); return 0; }
Beta Was this translation helpful? Give feedback.