On Linux there is the cat
command which outputs files concatenated but on Windows there exists no such command. As a result I decided to attempt to recreate a simple version of it but with challenge which was that I could not use any part of the C runtime library.
#include <windows.h>
/* global variables */
HANDLE stdout = NULL;
HANDLE stdin = NULL;
char *input_buffer = NULL;
CONSOLE_READCONSOLE_CONTROL crc = { .nLength = sizeof(crc), .dwCtrlWakeupMask = 1 << '\n' };
char *output_buffer = NULL;
DWORD output_capacity = 0;
/* There is only CommandLineToArgvW so a version for ascii is needed */
LPSTR *CommandLineToArgvA(LPWSTR lpWideCmdLine, INT *pNumArgs)
{
int retval;
int numArgs;
LPWSTR *args;
args = CommandLineToArgvW(lpWideCmdLine, &numArgs);
if (args == NULL)
return NULL;
int storage = numArgs * sizeof(LPSTR);
for (int i = 0; i < numArgs; ++i) {
BOOL lpUsedDefaultChar = FALSE;
retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, NULL, 0, NULL, &lpUsedDefaultChar);
if (!SUCCEEDED(retval)) {
LocalFree(args);
return NULL;
}
storage += retval;
}
LPSTR *result = (LPSTR *)LocalAlloc(LMEM_FIXED, storage);
if (result == NULL) {
LocalFree(args);
return NULL;
}
int bufLen = storage - numArgs * sizeof(LPSTR);
LPSTR buffer = ((LPSTR)result) + numArgs * sizeof(LPSTR);
for (int i = 0; i < numArgs; ++i) {
BOOL lpUsedDefaultChar = FALSE;
retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, buffer, bufLen, NULL, &lpUsedDefaultChar);
if (!SUCCEEDED(retval)) {
LocalFree(result);
LocalFree(args);
return NULL;
}
result[i] = buffer;
buffer += retval;
bufLen -= retval;
}
LocalFree(args);
*pNumArgs = numArgs;
return result;
}
static void lmemcpy(char *dest, const char *src, DWORD len)
{
/* copy 4 bytes at once */
for (; len > 3; len -= 4, dest += 4, src += 4)
*(long *)dest = *(long *)src;
while (len--)
*dest++ = *src++;
}
static void catstdin(void)
{
DWORD chars_read = 0;
ReadConsoleA(stdin, input_buffer, 2048, &chars_read, &crc);
WriteConsoleA(stdout, input_buffer, chars_read, NULL, NULL);
}
static void catfile(char *filepath)
{
HANDLE filehandle = CreateFileA(filepath, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (filehandle == INVALID_HANDLE_VALUE) {
WriteConsoleA(stdout, "Error could not open file: ", 27, NULL, NULL);
WriteConsoleA(stdout, filepath, lstrlenA(filepath), NULL, NULL);
ExitProcess(GetLastError());
}
DWORD filelength = GetFileSize(filehandle, NULL);
if (filelength > output_capacity) { /* see if we need to allocate more memory */
char *new_buffer = HeapAlloc(GetProcessHeap(), 0, filelength * 2); /* copy the data from the old memory to the new memory */
lmemcpy(new_buffer, output_buffer, output_capacity);
HeapFree(GetProcessHeap(), 0, output_buffer); /* free old memory */
output_capacity = filelength * 2;
output_buffer = new_buffer;
}
ReadFile(filehandle, output_buffer, filelength, NULL, NULL);
WriteConsoleA(stdout, output_buffer, filelength, NULL, NULL);
CloseHandle(filehandle); /* close file */
}
void __cdecl mainCRTStartup(void)
{
/* setup global variables */
stdout = GetStdHandle(STD_OUTPUT_HANDLE);
stdin = GetStdHandle(STD_INPUT_HANDLE);
input_buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 2048);
output_buffer = HeapAlloc(GetProcessHeap(), 0, 2048);
output_capacity = 2048;
/* get argc and argv */
int argc;
char **argv = CommandLineToArgvA(GetCommandLineW(), &argc) + 1;
argc--; /* the first arg is always the program name */
switch (argc) {
case 0:
for (;;) catstdin();
break;
default:
for (int i = 0; i < argc; ++i) {
if (!lstrcmpA(argv[i], "-"))
catstdin();
else
catfile(argv[i]);
}
}
/* free memory */
HeapFree(GetProcessHeap(), 0, input_buffer);
HeapFree(GetProcessHeap(), 0, output_buffer);
LocalFree(argv);
/* exit */
ExitProcess(0);
}
```
1 Answer 1
Avoid converting command line arguments to ASCII
There is no good reason to convert the command line arguments to ASCII. All the functions you use that take pointers to ASCII strings also have variants that handle wide strings, for example lstrcmpW()
and CreateFileW()
. This way, you can get rid of CommandLineToArgvA()
.
Use stderr
to report errors
Consider that it is not unlikely that the user of your cat
implementation redirects standard output to another file. If there is an error, instead of printing it to the console, you are writing the error message to that file instead. Just add stderr = GetStdHandle(STD_ERROR_HANDLE)
, and use that for the error messages.
Avoid allocating a buffer as large as each input file
Disk space is typically at least an order of magnitude larger than RAM. If you want to cat a file larger than the amount of free RAM available, your program will fail. It is better to allocate a buffer with a fixed size of say 64 KiB, and use multiple calls to ReadFile()
if necessary to read the input as chunks of up to 64 KiB. On one hand, it means more overhead from multiple calls to ReadFile()
, on the other hand you will likely stay within the L2 cache of your CPU. In any case, I expect performance will not be changed dramatically by this, but now your program handles arbitrarily sized files.
This will also simplify your code: you no longer have to get the file size and resize the buffer if necessary. Instead, just read until you reach the end of the file.
Use a loop to read from stdin
until your reach EOF
If you specify -
as an argument, you read only up to 2048 bytes from stdin
before continuing to the next command line argument. And if you don't specify any arguments at all, you have an infinite loop that reads from stdin
, even if there is nothing to read anymore.
Keep in mind that stdin
might also have been redirected, and will actually read from a file, or reads the output from another program.
Use the same buffer for stdin
as for files
There's no need to have two separate buffers, as you only handle either a file or stdin
at a time. Just ensure it is large enough.
Handle read and write errors
Things can go wrong. If there is an error reading a file or writing to stdout
, you should print an error message to stderr
and then immediately exit with a non-zero exit code. This will notify the user of errors. Also, if your cat
implementation is used in a batch script, the non-zero exit code will allow that script to detect the error, instead of blindly continuing with invalid data.
type
command does what you want, one file at a time. \$\endgroup\$