Simple Windows implementation of the cat command

Question 1

On Linux there is the cat command which outputs files concatenated but on Windows there exists no such command. As a result I decided to attempt to recreate a simple version of it but with challenge which was that I could not use any part of the C runtime library.

#include <windows.h>
/* global variables */
HANDLE stdout = NULL;
HANDLE stdin = NULL;
char *input_buffer = NULL;
CONSOLE_READCONSOLE_CONTROL crc = { .nLength = sizeof(crc), .dwCtrlWakeupMask = 1 << '\n' };
char *output_buffer = NULL;
DWORD output_capacity = 0;
/* There is only CommandLineToArgvW so a version for ascii is needed */
LPSTR *CommandLineToArgvA(LPWSTR lpWideCmdLine, INT *pNumArgs)
{
 int retval;
 int numArgs;
 LPWSTR *args;
 args = CommandLineToArgvW(lpWideCmdLine, &numArgs);
 if (args == NULL)
 return NULL;
 int storage = numArgs * sizeof(LPSTR);
 for (int i = 0; i < numArgs; ++i) {
 BOOL lpUsedDefaultChar = FALSE;
 retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, NULL, 0, NULL, &lpUsedDefaultChar);
 if (!SUCCEEDED(retval)) {
 LocalFree(args);
 return NULL;
 }
 storage += retval;
 }
 LPSTR *result = (LPSTR *)LocalAlloc(LMEM_FIXED, storage);
 if (result == NULL) {
 LocalFree(args);
 return NULL;
 }
 int bufLen = storage - numArgs * sizeof(LPSTR);
 LPSTR buffer = ((LPSTR)result) + numArgs * sizeof(LPSTR);
 for (int i = 0; i < numArgs; ++i) {
 BOOL lpUsedDefaultChar = FALSE;
 retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, buffer, bufLen, NULL, &lpUsedDefaultChar);
 if (!SUCCEEDED(retval)) {
 LocalFree(result);
 LocalFree(args);
 return NULL;
 }
 result[i] = buffer;
 buffer += retval;
 bufLen -= retval;
 }
 LocalFree(args);
 *pNumArgs = numArgs;
 return result;
}
static void lmemcpy(char *dest, const char *src, DWORD len)
{
 /* copy 4 bytes at once */
 for (; len > 3; len -= 4, dest += 4, src += 4)
 *(long *)dest = *(long *)src;
 while (len--)
 *dest++ = *src++;
}
static void catstdin(void)
{
 DWORD chars_read = 0;
 ReadConsoleA(stdin, input_buffer, 2048, &chars_read, &crc);
 WriteConsoleA(stdout, input_buffer, chars_read, NULL, NULL);
}
static void catfile(char *filepath)
{
 HANDLE filehandle = CreateFileA(filepath, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
 if (filehandle == INVALID_HANDLE_VALUE) {
 WriteConsoleA(stdout, "Error could not open file: ", 27, NULL, NULL);
 WriteConsoleA(stdout, filepath, lstrlenA(filepath), NULL, NULL);
 ExitProcess(GetLastError());
 }
 DWORD filelength = GetFileSize(filehandle, NULL);
 if (filelength > output_capacity) { /* see if we need to allocate more memory */
 char *new_buffer = HeapAlloc(GetProcessHeap(), 0, filelength * 2); /* copy the data from the old memory to the new memory */
 lmemcpy(new_buffer, output_buffer, output_capacity);
 HeapFree(GetProcessHeap(), 0, output_buffer); /* free old memory */
 output_capacity = filelength * 2;
 output_buffer = new_buffer;
 }
 ReadFile(filehandle, output_buffer, filelength, NULL, NULL);
 WriteConsoleA(stdout, output_buffer, filelength, NULL, NULL);
 CloseHandle(filehandle); /* close file */
}
void __cdecl mainCRTStartup(void)
{
 /* setup global variables */
 stdout = GetStdHandle(STD_OUTPUT_HANDLE);
 stdin = GetStdHandle(STD_INPUT_HANDLE);
 input_buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 2048);
 output_buffer = HeapAlloc(GetProcessHeap(), 0, 2048);
 output_capacity = 2048;
 /* get argc and argv */
 int argc;
 char **argv = CommandLineToArgvA(GetCommandLineW(), &argc) + 1;
 argc--; /* the first arg is always the program name */
 switch (argc) {
 case 0:
 for (;;) catstdin();
 break;
 default:
 for (int i = 0; i < argc; ++i) {
 if (!lstrcmpA(argv[i], "-"))
 catstdin();
 else
 catfile(argv[i]);
 }
 }
 /* free memory */
 HeapFree(GetProcessHeap(), 0, input_buffer);
 HeapFree(GetProcessHeap(), 0, output_buffer);
 LocalFree(argv);
 /* exit */
 ExitProcess(0);
}
```

Question 2

the type command does what you want, one file at a time.

Question 3

Avoid converting command line arguments to ASCII

There is no good reason to convert the command line arguments to ASCII. All the functions you use that take pointers to ASCII strings also have variants that handle wide strings, for example lstrcmpW() and CreateFileW(). This way, you can get rid of CommandLineToArgvA().

Use `stderr` to report errors

Consider that it is not unlikely that the user of your cat implementation redirects standard output to another file. If there is an error, instead of printing it to the console, you are writing the error message to that file instead. Just add stderr = GetStdHandle(STD_ERROR_HANDLE), and use that for the error messages.

Avoid allocating a buffer as large as each input file

Disk space is typically at least an order of magnitude larger than RAM. If you want to cat a file larger than the amount of free RAM available, your program will fail. It is better to allocate a buffer with a fixed size of say 64 KiB, and use multiple calls to ReadFile() if necessary to read the input as chunks of up to 64 KiB. On one hand, it means more overhead from multiple calls to ReadFile(), on the other hand you will likely stay within the L2 cache of your CPU. In any case, I expect performance will not be changed dramatically by this, but now your program handles arbitrarily sized files.

This will also simplify your code: you no longer have to get the file size and resize the buffer if necessary. Instead, just read until you reach the end of the file.

Use a loop to read from `stdin` until your reach EOF

If you specify - as an argument, you read only up to 2048 bytes from stdin before continuing to the next command line argument. And if you don't specify any arguments at all, you have an infinite loop that reads from stdin, even if there is nothing to read anymore.

Keep in mind that stdin might also have been redirected, and will actually read from a file, or reads the output from another program.

Use the same buffer for `stdin` as for files

There's no need to have two separate buffers, as you only handle either a file or stdin at a time. Just ensure it is large enough.

Handle read and write errors

Things can go wrong. If there is an error reading a file or writing to stdout, you should print an error message to stderr and then immediately exit with a non-zero exit code. This will notify the user of errors. Also, if your cat implementation is used in a batch script, the non-zero exit code will allow that script to detect the error, instead of blindly continuing with invalid data.

G. Sliepen G. Sliepen 68.7k3 gold badges74 silver badges179 bronze badges · Accepted Answer · 2020-08-22 21:22:39Z

Avoid converting command line arguments to ASCII

There is no good reason to convert the command line arguments to ASCII. All the functions you use that take pointers to ASCII strings also have variants that handle wide strings, for example lstrcmpW() and CreateFileW(). This way, you can get rid of CommandLineToArgvA().

Use `stderr` to report errors

Consider that it is not unlikely that the user of your cat implementation redirects standard output to another file. If there is an error, instead of printing it to the console, you are writing the error message to that file instead. Just add stderr = GetStdHandle(STD_ERROR_HANDLE), and use that for the error messages.

Avoid allocating a buffer as large as each input file

Disk space is typically at least an order of magnitude larger than RAM. If you want to cat a file larger than the amount of free RAM available, your program will fail. It is better to allocate a buffer with a fixed size of say 64 KiB, and use multiple calls to ReadFile() if necessary to read the input as chunks of up to 64 KiB. On one hand, it means more overhead from multiple calls to ReadFile(), on the other hand you will likely stay within the L2 cache of your CPU. In any case, I expect performance will not be changed dramatically by this, but now your program handles arbitrarily sized files.

This will also simplify your code: you no longer have to get the file size and resize the buffer if necessary. Instead, just read until you reach the end of the file.

Use a loop to read from `stdin` until your reach EOF

If you specify - as an argument, you read only up to 2048 bytes from stdin before continuing to the next command line argument. And if you don't specify any arguments at all, you have an infinite loop that reads from stdin, even if there is nothing to read anymore.

Keep in mind that stdin might also have been redirected, and will actually read from a file, or reads the output from another program.

Use the same buffer for `stdin` as for files

There's no need to have two separate buffers, as you only handle either a file or stdin at a time. Just ensure it is large enough.

Handle read and write errors

Things can go wrong. If there is an error reading a file or writing to stdout, you should print an error message to stderr and then immediately exit with a non-zero exit code. This will notify the user of errors. Also, if your cat implementation is used in a batch script, the non-zero exit code will allow that script to detect the error, instead of blindly continuing with invalid data.

Stack Exchange Network

Simple Windows implementation of the cat command

1 Answer 1

Avoid converting command line arguments to ASCII

Use `stderr` to report errors

Avoid allocating a buffer as large as each input file

Use a loop to read from `stdin` until your reach EOF

Use the same buffer for `stdin` as for files

Handle read and write errors

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

Simple Windows implementation of the cat command

1 Answer 1

Avoid converting command line arguments to ASCII

Use stderr to report errors

Avoid allocating a buffer as large as each input file

Use a loop to read from stdin until your reach EOF

Use the same buffer for stdin as for files

Handle read and write errors

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions

Use `stderr` to report errors

Use a loop to read from `stdin` until your reach EOF

Use the same buffer for `stdin` as for files