I'm a newbie and I wrote code that gets a WAV file and exports a RAW (only PCM data) to *.raw file. But I am confused about the structure. The code is written as one procedure (LISP style) and there are no functions. Can I get some advice how to reorganize the code?
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "wave.h"
byte buf[5]; // 4 char string buffer
char filename[200]; //
FILE *fp;
FILE *pcm_data;
int main(int argc, char *argv[])
{
strcat(filename, argv[1]);
struct riff_chunk riff = {"RIFF", 0, "WAVE"};
struct fmt_chunk fmt = {"fmt "};
struct data_chunk data = {"data"};
if ((fp = fopen(argv[1], "rb"))==NULL) {
printf("Can't open the file. Exit.\n");
return 1;
}
// Reading RIFF section
if (fread(&riff.id, sizeof(byte), 12, fp)!= 12) {
printf("Can't read RIFF chunk or EOF is met. Exit.\n");
return 1;
} else {
memccpy(buf, riff.id, '0円', 4);
if (strcmp(buf, "RIFF")!=0) {
printf("File format is not RIFF. Exit.\n");
return 1;
}
memccpy(buf, riff.type, '0円', 4);
if (strcmp(buf, "WAVE")!=0) {
printf("File format is not WAVE. Exit.\n");
return 1;
}
};
// Reading fmt.id and fmt.size
if (fread(&fmt, sizeof(byte), 8, fp)!=8) {
printf("Can't read fmt chunk or EOF is met. Exit.\n");
return 1;
} else {
memccpy(buf, fmt.id, '0円', 4);
if (strcmp(buf, "fmt ")!=0) {
printf("File have no fmt chunk. Exit.\n");
return 1;
}
}
// Reading fmt Sample Format Info
if (fread(&fmt.compression, sizeof(byte), fmt.size, fp) != fmt.size) {
printf("Can't read Sample Format Info in fmt chunk or EOF is met. Exit.\n");
return 1;
}
printf("Compression: %d\n", fmt.compression);
printf("Channels: %d\n", fmt.chanels);
printf("Sample Rate: %d\n", fmt.sample_rate);
printf("Bit Rate: %d\n", fmt.bit_per_sample);
// Reading data/some chunk
if (fread(&data, sizeof(byte), 8, fp)!=8) {
printf("Error of reading data chunk. Exit.\n");
return 1;
} else {
while (memccpy(buf, data.id, '0円', 4), strcmp(buf, "data")!=0) {
fseek(fp, data.size, 1); // перемещаем указатель файла на конец чанка (его размер)
fread(&data, sizeof(byte), 8, fp);
}
}
// Reading PCM
byte *dump = (byte*)malloc(data.size);
if (dump == NULL) {
printf("Allocation memory error");
return 1;
}
if (fmt.compression == 1) {
fmt.number_of_blocks = data.size / fmt.block_align;
if ((fread(dump, fmt.block_align, fmt.number_of_blocks, fp))!=fmt.number_of_blocks) {
printf("Readin PCM data error.\n");
return 1;
} else {
strcat(filename, ".raw");
if ((pcm_data = fopen(filename, "wb"))==NULL) {
printf("Can't open the PCM file for write. Exit.\n");
return 1;
}
if(fwrite(dump, fmt.block_align, fmt.number_of_blocks, pcm_data)!=fmt.number_of_blocks) {
printf("Can't write PCM file. Exit.\n");
return 1;
}
printf("------------\nDone. PCM data writing in PCM file. Exit.\n");
}
} else {
printf("Compression type is not PCM. Exit.\n");
return 1;
}
free(dump);
fclose(fp);
fclose(pcm_data);
return 0;
}
wave.h
typedef char byte; // 1 byte \ 8 bit
typedef short int word; // 2 byte \ 16 bit
typedef unsigned int dword; // 4 byte \ 32 bit
struct riff_chunk
{
byte id[4]; // 4b "RIFF" string >|
dword size; // 4b |-> 12 byte
byte type[4]; // 4b "WAVE" string >|
};
struct fmt_chunk
{
byte id[4]; // 4b "fmt" string
dword size; // 4b _____
word compression; // 2b
word chanels; // 2b
dword sample_rate; // 4b _____
dword byte_per_sec; // 4b
word block_align; // 2b
word bit_per_sample; // 2b _____
word extra_format_size; // 2b _____
byte* extra_format_data; // 8b _____
dword number_of_blocks; // 4b _____
};
struct data_chunk
{
byte id[4]; // 4 "data" string
dword size; // 4
};
int dump(word*, dword*);
1 Answer 1
I don't see a great need to re-organise the structure of the code, other than to eliminate the unnecessary global variables (most can be moved to function scope).
There are a few portability problems that need to be addressed. I'll start with the assumption about the width of integer types:
typedef char byte; // 1 byte \ 8 bit typedef short int word; // 2 byte \ 16 bit typedef unsigned int dword; // 4 byte \ 32 bit
The comments are assumptions that will need to be verified for every platform you build this for. There's a portable way to get what you want:
typedef uint8_t byte;
typedef uint16_t word;
typedef uint32_t dword;
Or just use the standard fixed-width types throughout - that's less confusing than inventing your own terminology.
Note that when using printf()
and family, we need to use the correct specifiers. This is wrong:
printf("Compression: %d\n", fmt.compression); printf("Channels: %d\n", fmt.chanels); printf("Sample Rate: %d\n", fmt.sample_rate); printf("Bit Rate: %d\n", fmt.bit_per_sample);
With your original definitions, we should be using %hu
for the first two of these and %u
for the last two. Using the standard fixed-width types, we get macros we can use for formatting:
printf("Compression: %" PRIu16 "\n", fmt.compression);
printf("Channels: %" PRIu16 "\n", fmt.chanels);
printf("Sample Rate: %" PRIu32 "\n", fmt.sample_rate);
printf("Bit Rate: %" PRIu32 "\n", fmt.bit_per_sample);
Here's an inefficiency:
memccpy(buf, riff.id, '0円', 4);
if (strcmp(buf, "RIFF")!=0) {
Firstly, memccpy
isn't standard C (though it is defined by POSIX), so there's a small reduction in portability. But there's no need to copy to buf
here - just compare in place using strncmp()
:
if (strncmp("RIFF", riff.id, sizeof riff.id)) {
There's a lot of hard-coded sizes. For example:
if (fread(&riff.id, sizeof(byte), 12, fp)!= 12) {
It's strange that we write sizeof (byte)
, even though byte
is just an alias for char
(and so has size of 1 char), but then use 12
where we could more meaningfully write sizeof riff
:
if (fread(&riff, 1, sizeof riff, fp) != sizeof riff) {
(I also changed &riff.id
to just &riff
to make it clear that we're writing to the whole structure, not just the id
member.)
When allocating, there's no need for (and possibly slight harm in) casting the returned void
pointer to a more specific pointer size. It's also idiomatic to use the inherent truthiness of a non-null pointer, rather that testing explicitly against NULL
:
byte *dump = malloc(data.size);
if (!dump) {
I'm glad you didn't forget error checking here, and when opening files - that's an important part of programming in C, and you've got that right. One small improvement: error messages should go to stderr
, not stdout
:
fprintf(stderr, "Memory allocation error\n");
BTW, don't forget when compiling to ensure that the structures are packed, without any padding between members.
An enhancement suggestion: allow the user to specify where to write the output (or just send it to standard out, and let them use their shell to redirect it). We'll fail if the input file is in a read-only directory at present.
-
\$\begingroup\$ Thank You so much for detailed answer. \$\endgroup\$Nikita– Nikita2019年06月11日 12:21:26 +00:00Commented Jun 11, 2019 at 12:21
-
1\$\begingroup\$ Use
strncmp()
instead ofstrcmp()
- this is how we compare sequences that might not be null terminated. The extra argument is a maximum number of characters to compare if no NUL is reached. BTW, welcome to the wonderful world of C programming. It's frustrating at times, but rewarding eventually! \$\endgroup\$Toby Speight– Toby Speight2019年06月11日 12:42:57 +00:00Commented Jun 11, 2019 at 12:42 -
1\$\begingroup\$ Well, we could explicitly cast, but it's probably better to change the type of
id
tochar[4]
, since we want to treat them as characters rather than unsigned small values. \$\endgroup\$Toby Speight– Toby Speight2019年06月11日 13:19:48 +00:00Commented Jun 11, 2019 at 13:19 -
1\$\begingroup\$
char
is the smallest addressable unit of memory. On any machine that has auint8_t
, thenchar
must be 8 bits (it's not allowed to be less than that, and if it were greater, then there would be a smaller addressable type, which isn't allowed). On systems wherechar
is wider than 8 bits,uint8_t
will be an error (rather than just silently wrong). \$\endgroup\$Toby Speight– Toby Speight2019年06月11日 13:29:23 +00:00Commented Jun 11, 2019 at 13:29 -
1\$\begingroup\$ Just to add - systems with
char
between 9 and 15 are now quite rare; some specialised platforms (e.g. certain DSPs) have 16-bitchar
, but practically all general-purpose computers have 8-bitchar
. The widths ofint
,long
, etc. are more varied. \$\endgroup\$Toby Speight– Toby Speight2019年06月11日 13:54:06 +00:00Commented Jun 11, 2019 at 13:54