Parse a WAV file and export PCM data

Question 1

I'm a newbie and I wrote code that gets a WAV file and exports a RAW (only PCM data) to *.raw file. But I am confused about the structure. The code is written as one procedure (LISP style) and there are no functions. Can I get some advice how to reorganize the code?

main.c

#include <stdio.h>
#include <stdlib.h> 
#include <string.h> 
#include "wave.h"
byte buf[5]; // 4 char string buffer
char filename[200]; //
FILE *fp;
FILE *pcm_data;
int main(int argc, char *argv[])
{
 strcat(filename, argv[1]);
 struct riff_chunk riff = {"RIFF", 0, "WAVE"};
 struct fmt_chunk fmt = {"fmt "};
 struct data_chunk data = {"data"};
 if ((fp = fopen(argv[1], "rb"))==NULL) {
 printf("Can't open the file. Exit.\n");
 return 1;
 }
 // Reading RIFF section
 if (fread(&riff.id, sizeof(byte), 12, fp)!= 12) {
 printf("Can't read RIFF chunk or EOF is met. Exit.\n");
 return 1;
 } else {
 memccpy(buf, riff.id, '0円', 4);
 if (strcmp(buf, "RIFF")!=0) {
 printf("File format is not RIFF. Exit.\n");
 return 1;
 }
 memccpy(buf, riff.type, '0円', 4);
 if (strcmp(buf, "WAVE")!=0) {
 printf("File format is not WAVE. Exit.\n");
 return 1;
 }
 };
 // Reading fmt.id and fmt.size
 if (fread(&fmt, sizeof(byte), 8, fp)!=8) {
 printf("Can't read fmt chunk or EOF is met. Exit.\n");
 return 1;
 } else {
 memccpy(buf, fmt.id, '0円', 4);
 if (strcmp(buf, "fmt ")!=0) {
 printf("File have no fmt chunk. Exit.\n");
 return 1;
 }
 }
 // Reading fmt Sample Format Info
 if (fread(&fmt.compression, sizeof(byte), fmt.size, fp) != fmt.size) {
 printf("Can't read Sample Format Info in fmt chunk or EOF is met. Exit.\n");
 return 1;
 }
 printf("Compression: %d\n", fmt.compression);
 printf("Channels: %d\n", fmt.chanels);
 printf("Sample Rate: %d\n", fmt.sample_rate);
 printf("Bit Rate: %d\n", fmt.bit_per_sample);
 // Reading data/some chunk
 if (fread(&data, sizeof(byte), 8, fp)!=8) {
 printf("Error of reading data chunk. Exit.\n");
 return 1;
 } else {
 while (memccpy(buf, data.id, '0円', 4), strcmp(buf, "data")!=0) {
 fseek(fp, data.size, 1); // перемещаем указатель файла на конец чанка (его размер)
 fread(&data, sizeof(byte), 8, fp);
 }
 }
 // Reading PCM
 byte *dump = (byte*)malloc(data.size);
 if (dump == NULL) {
 printf("Allocation memory error");
 return 1;
 }
 if (fmt.compression == 1) {
 fmt.number_of_blocks = data.size / fmt.block_align;
 if ((fread(dump, fmt.block_align, fmt.number_of_blocks, fp))!=fmt.number_of_blocks) { 
 printf("Readin PCM data error.\n");
 return 1;
 } else {
 strcat(filename, ".raw");
 if ((pcm_data = fopen(filename, "wb"))==NULL) {
 printf("Can't open the PCM file for write. Exit.\n");
 return 1;
 }
 if(fwrite(dump, fmt.block_align, fmt.number_of_blocks, pcm_data)!=fmt.number_of_blocks) {
 printf("Can't write PCM file. Exit.\n");
 return 1;
 }
 printf("------------\nDone. PCM data writing in PCM file. Exit.\n");
 }
 } else {
 printf("Compression type is not PCM. Exit.\n");
 return 1;
 }
 free(dump);
 fclose(fp);
 fclose(pcm_data);
 return 0;
}

wave.h

typedef char byte; // 1 byte \ 8 bit 
typedef short int word; // 2 byte \ 16 bit
typedef unsigned int dword; // 4 byte \ 32 bit
struct riff_chunk
{
 byte id[4]; // 4b "RIFF" string >|
 dword size; // 4b |-> 12 byte
 byte type[4]; // 4b "WAVE" string >|
};
struct fmt_chunk 
{
 byte id[4]; // 4b "fmt" string
 dword size; // 4b _____
 word compression; // 2b
 word chanels; // 2b
 dword sample_rate; // 4b _____
 dword byte_per_sec; // 4b
 word block_align; // 2b
 word bit_per_sample; // 2b _____
 word extra_format_size; // 2b _____
 byte* extra_format_data; // 8b _____
 dword number_of_blocks; // 4b _____
};
struct data_chunk
{
 byte id[4]; // 4 "data" string
 dword size; // 4
};
int dump(word*, dword*);

Question 2

I don't see a great need to re-organise the structure of the code, other than to eliminate the unnecessary global variables (most can be moved to function scope).

There are a few portability problems that need to be addressed. I'll start with the assumption about the width of integer types:

typedef char byte; // 1 byte \ 8 bit 
typedef short int word; // 2 byte \ 16 bit
typedef unsigned int dword; // 4 byte \ 32 bit

The comments are assumptions that will need to be verified for every platform you build this for. There's a portable way to get what you want:

typedef uint8_t byte;
typedef uint16_t word;
typedef uint32_t dword;

Or just use the standard fixed-width types throughout - that's less confusing than inventing your own terminology.

Note that when using printf() and family, we need to use the correct specifiers. This is wrong:

printf("Compression: %d\n", fmt.compression);
printf("Channels: %d\n", fmt.chanels);
printf("Sample Rate: %d\n", fmt.sample_rate);
printf("Bit Rate: %d\n", fmt.bit_per_sample);

With your original definitions, we should be using %hu for the first two of these and %u for the last two. Using the standard fixed-width types, we get macros we can use for formatting:

printf("Compression: %" PRIu16 "\n", fmt.compression);
printf("Channels: %" PRIu16 "\n", fmt.chanels);
printf("Sample Rate: %" PRIu32 "\n", fmt.sample_rate);
printf("Bit Rate: %" PRIu32 "\n", fmt.bit_per_sample);

Here's an inefficiency:

 memccpy(buf, riff.id, '0円', 4);
 if (strcmp(buf, "RIFF")!=0) {

Firstly, memccpy isn't standard C (though it is defined by POSIX), so there's a small reduction in portability. But there's no need to copy to buf here - just compare in place using strncmp():

 if (strncmp("RIFF", riff.id, sizeof riff.id)) {

There's a lot of hard-coded sizes. For example:

if (fread(&riff.id, sizeof(byte), 12, fp)!= 12) {

It's strange that we write sizeof (byte), even though byte is just an alias for char (and so has size of 1 char), but then use 12 where we could more meaningfully write sizeof riff:

if (fread(&riff, 1, sizeof riff, fp) != sizeof riff) {

(I also changed &riff.id to just &riff to make it clear that we're writing to the whole structure, not just the id member.)

When allocating, there's no need for (and possibly slight harm in) casting the returned void pointer to a more specific pointer size. It's also idiomatic to use the inherent truthiness of a non-null pointer, rather that testing explicitly against NULL:

byte *dump = malloc(data.size);
if (!dump) {

I'm glad you didn't forget error checking here, and when opening files - that's an important part of programming in C, and you've got that right. One small improvement: error messages should go to stderr, not stdout:

 fprintf(stderr, "Memory allocation error\n");

BTW, don't forget when compiling to ensure that the structures are packed, without any padding between members.

An enhancement suggestion: allow the user to specify where to write the output (or just send it to standard out, and let them use their shell to redirect it). We'll fail if the input file is in a read-only directory at present.

Question 3

Thank You so much for detailed answer.

Question 4

Use strncmp() instead of strcmp() - this is how we compare sequences that might not be null terminated. The extra argument is a maximum number of characters to compare if no NUL is reached. BTW, welcome to the wonderful world of C programming. It's frustrating at times, but rewarding eventually!

Question 5

Well, we could explicitly cast, but it's probably better to change the type of id to char[4], since we want to treat them as characters rather than unsigned small values.

Question 6

char is the smallest addressable unit of memory. On any machine that has a uint8_t, then char must be 8 bits (it's not allowed to be less than that, and if it were greater, then there would be a smaller addressable type, which isn't allowed). On systems where char is wider than 8 bits, uint8_t will be an error (rather than just silently wrong).

Question 7

Just to add - systems with char between 9 and 15 are now quite rare; some specialised platforms (e.g. certain DSPs) have 16-bit char, but practically all general-purpose computers have 8-bit char. The widths of int, long, etc. are more varied.

Toby Speight Toby Speight 87.1k14 gold badges104 silver badges322 bronze badges · Accepted Answer · 2019-06-11 09:04:11Z

I don't see a great need to re-organise the structure of the code, other than to eliminate the unnecessary global variables (most can be moved to function scope).

There are a few portability problems that need to be addressed. I'll start with the assumption about the width of integer types:

typedef char byte; // 1 byte \ 8 bit 
typedef short int word; // 2 byte \ 16 bit
typedef unsigned int dword; // 4 byte \ 32 bit

The comments are assumptions that will need to be verified for every platform you build this for. There's a portable way to get what you want:

typedef uint8_t byte;
typedef uint16_t word;
typedef uint32_t dword;

Or just use the standard fixed-width types throughout - that's less confusing than inventing your own terminology.

Note that when using printf() and family, we need to use the correct specifiers. This is wrong:

printf("Compression: %d\n", fmt.compression);
printf("Channels: %d\n", fmt.chanels);
printf("Sample Rate: %d\n", fmt.sample_rate);
printf("Bit Rate: %d\n", fmt.bit_per_sample);

With your original definitions, we should be using %hu for the first two of these and %u for the last two. Using the standard fixed-width types, we get macros we can use for formatting:

printf("Compression: %" PRIu16 "\n", fmt.compression);
printf("Channels: %" PRIu16 "\n", fmt.chanels);
printf("Sample Rate: %" PRIu32 "\n", fmt.sample_rate);
printf("Bit Rate: %" PRIu32 "\n", fmt.bit_per_sample);

Here's an inefficiency:

 memccpy(buf, riff.id, '0円', 4);
 if (strcmp(buf, "RIFF")!=0) {

Firstly, memccpy isn't standard C (though it is defined by POSIX), so there's a small reduction in portability. But there's no need to copy to buf here - just compare in place using strncmp():

 if (strncmp("RIFF", riff.id, sizeof riff.id)) {

There's a lot of hard-coded sizes. For example:

if (fread(&riff.id, sizeof(byte), 12, fp)!= 12) {

It's strange that we write sizeof (byte), even though byte is just an alias for char (and so has size of 1 char), but then use 12 where we could more meaningfully write sizeof riff:

if (fread(&riff, 1, sizeof riff, fp) != sizeof riff) {

(I also changed &riff.id to just &riff to make it clear that we're writing to the whole structure, not just the id member.)

When allocating, there's no need for (and possibly slight harm in) casting the returned void pointer to a more specific pointer size. It's also idiomatic to use the inherent truthiness of a non-null pointer, rather that testing explicitly against NULL:

byte *dump = malloc(data.size);
if (!dump) {

I'm glad you didn't forget error checking here, and when opening files - that's an important part of programming in C, and you've got that right. One small improvement: error messages should go to stderr, not stdout:

 fprintf(stderr, "Memory allocation error\n");

BTW, don't forget when compiling to ensure that the structures are packed, without any padding between members.

An enhancement suggestion: allow the user to specify where to write the output (or just send it to standard out, and let them use their shell to redirect it). We'll fail if the input file is in a read-only directory at present.

Use strncmp() instead of strcmp() - this is how we compare sequences that might not be null terminated. The extra argument is a maximum number of characters to compare if no NUL is reached. BTW, welcome to the wonderful world of C programming. It's frustrating at times, but rewarding eventually!
Well, we could explicitly cast, but it's probably better to change the type of id to char[4], since we want to treat them as characters rather than unsigned small values.
char is the smallest addressable unit of memory. On any machine that has a uint8_t, then char must be 8 bits (it's not allowed to be less than that, and if it were greater, then there would be a smaller addressable type, which isn't allowed). On systems where char is wider than 8 bits, uint8_t will be an error (rather than just silently wrong).
Just to add - systems with char between 9 and 15 are now quite rare; some specialised platforms (e.g. certain DSPs) have 16-bit char, but practically all general-purpose computers have 8-bit char. The widths of int, long, etc. are more varied.

Stack Exchange Network

Parse a WAV file and export PCM data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Parse a WAV file and export PCM data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions