0

I'm developing a voice control system using an ESP32 with an INMP441 microphone. The system records 5-second audio clips, transmits them to a Node.js server, which then uses Google Speech-to-Text API for transcription. The transcribed text is forwarded to a Java client for ChatGPT processing, and the response is sent back to the ESP32. I'm encountering a critical issue with the audio file upload. My ESP32 successfully records audio (reporting 20,480 bytes recorded), but when attempting to upload the file, it reports "Uploading file, size: 8 bytes" followed by "Upload failed, error code: -11". This suggests the file isn't being properly saved to SPIFFS or there's an issue with the HTTP client connection. I've verified my WiFi credentials are correct and can see the ESP32 connecting to the network. My server is running on 192.168.1.180:8888 and shows no errors, but it's not receiving the audio files. I need help debugging the file storage and upload process to identify whether this is a SPIFFS writing issue, a file handle problem, or a network connectivity issue.

Here are my code for the esp32 audio transmission:

#include <Arduino.h>
#include <WiFi.h>
#include <driver/i2s.h>
#include <SPIFFS.h>
#include <HTTPClient.h>
// Wi-Fi credentials
const char* ssid = "4B";
const char* password = "88886666@feihe";
const char* serverHost = "http://xxx.180:8888"; // Server address with port
const char* uploadEndpoint = "/uploadAudio";
// I2S pin configuration
#define I2S_WS 4
#define I2S_SD 5
#define I2S_SCK 6
#define I2S_PORT I2S_NUM_0
// Audio parameters
#define SAMPLE_RATE 16000 // Sample rate in Hz
#define I2S_SAMPLE_BITS 16
#define I2S_BUFFER_SIZE (1024*16) // Size of each I2S buffer (in 16-bit samples)
#define I2S_NUM_BUFFERS 4 // Number of buffers
#define I2S_READ_LEN (I2S_BUFFER_SIZE * I2S_NUM_BUFFERS) // Total buffer size
#define RECORD_TIME 5 // Record duration in seconds
// Calculating the size of the recorded audio
const int AUDIO_DATA_SIZE = (I2S_SAMPLE_BITS / 8) * SAMPLE_RATE * RECORD_TIME;
const int FLASH_RECORD_SIZE = AUDIO_DATA_SIZE + 44; // Including WAV header
// File management
const char filename[] = "/audio_recording.wav";
const int headerSize = 44;
// Status flags
volatile bool recordingComplete = false;
volatile bool uploadComplete = false;
// Function prototypes
void i2sInit();
void recordAudio();
void writeWavHeader(File &file, uint32_t dataSize);
void updateWavHeader(const char* filepath, uint32_t dataSize);
void uploadFile(const char* filepath);
void setup() {
 Serial.begin(115200);
 delay(1000);
 Serial.println("\nESP32 Voice Control System Starting...");
 // Initialize SPIFFS
 if(!SPIFFS.begin(true)){
 Serial.println("SPIFFS initialization failed!");
 while(1); // Infinite loop if SPIFFS initialization fails
 }
 Serial.println("SPIFFS initialized successfully");
 // Connect to Wi-Fi
 WiFi.begin(ssid, password);
 Serial.print("Connecting to WiFi");
 while(WiFi.status() != WL_CONNECTED){
 delay(500);
 Serial.print(".");
 }
 
 if(WiFi.status() == WL_CONNECTED) {
 Serial.println("\nWiFi connected!");
 Serial.print("IP address: ");
 Serial.println(WiFi.localIP());
 } else {
 Serial.println("\nWiFi connection failed. Continuing without network...");
 }
 // Initialize I2S
 i2sInit();
 Serial.println("I2S initialized");
 // Start the recording process
 recordAudio();
 
 // Upload the recorded file
 if(recordingComplete && WiFi.status() == WL_CONNECTED) {
 uploadFile(filename);
 }
 
 Serial.println("Operation complete. Going to sleep...");
}
void loop() {
 // Nothing to do in the loop
 delay(1000);
}
// I2S initialization
void i2sInit() {
 i2s_config_t i2s_config = {
 .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
 .sample_rate = SAMPLE_RATE,
 .bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
 .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
 .communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_STAND_I2S),
 .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
 .dma_buf_count = 8,
 .dma_buf_len = I2S_BUFFER_SIZE,
 .use_apll = true,
 .tx_desc_auto_clear = false,
 .fixed_mclk = 0
 };
 i2s_pin_config_t pin_config = {
 .bck_io_num = I2S_SCK,
 .ws_io_num = I2S_WS,
 .data_out_num = -1,
 .data_in_num = I2S_SD
 };
 esp_err_t err = i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
 if (err != ESP_OK) {
 Serial.printf("Failed to install I2S driver: %d\n", err);
 return;
 }
 
 err = i2s_set_pin(I2S_PORT, &pin_config);
 if (err != ESP_OK) {
 Serial.printf("Failed to set I2S pins: %d\n", err);
 return;
 }
 
 // Clear I2S buffer
 i2s_zero_dma_buffer(I2S_PORT);
}
// Record audio function
void recordAudio() {
 // Allocate buffer for recording
 int16_t* i2sBuffer = (int16_t*) malloc(I2S_READ_LEN);
 if (!i2sBuffer) {
 Serial.println("Failed to allocate I2S buffer");
 return;
 }
 
 // Clear any old file
 if (SPIFFS.exists(filename)) {
 SPIFFS.remove(filename);
 }
 
 // Open file for writing
 File audioFile = SPIFFS.open(filename, FILE_WRITE);
 if (!audioFile) {
 Serial.println("Failed to open file for writing");
 free(i2sBuffer);
 return;
 }
 
 // Write WAV header
 writeWavHeader(audioFile, AUDIO_DATA_SIZE);
 
 // Start recording
 Serial.println("Recording started...");
 size_t bytesRead = 0;
 unsigned long totalBytesWritten = 0;
 unsigned long startTime = millis();
 
 // Record for RECORD_TIME seconds
 while(millis() - startTime < RECORD_TIME * 1000) {
 esp_err_t result = i2s_read(I2S_PORT, i2sBuffer, I2S_READ_LEN, &bytesRead, 100);
 
 if (result == ESP_OK && bytesRead > 0) {
 audioFile.write((uint8_t*)i2sBuffer, bytesRead);
 totalBytesWritten += bytesRead;
 }
 }
 
 // Close file
 audioFile.close();
 
 // Update WAV header with actual data size
 updateWavHeader(filename, totalBytesWritten);
 
 Serial.printf("Recording finished. %lu bytes recorded\n", totalBytesWritten);
 
 // Free the buffer
 free(i2sBuffer);
 
 // Set the flag
 recordingComplete = true;
}
// Function to write the WAV header
void writeWavHeader(File &file, uint32_t dataSize) {
 unsigned char header[headerSize];
 
 // RIFF chunk
 header[0] = 'R'; header[1] = 'I'; header[2] = 'F'; header[3] = 'F';
 // File size (will be updated later)
 uint32_t fileSize = dataSize + headerSize - 8;
 header[4] = fileSize & 0xFF;
 header[5] = (fileSize >> 8) & 0xFF;
 header[6] = (fileSize >> 16) & 0xFF;
 header[7] = (fileSize >> 24) & 0xFF;
 // WAVE chunk
 header[8] = 'W'; header[9] = 'A'; header[10] = 'V'; header[11] = 'E';
 // fmt subchunk
 header[12] = 'f'; header[13] = 'm'; header[14] = 't'; header[15] = ' ';
 // Subchunk1 size (16 for PCM)
 header[16] = 16; header[17] = 0; header[18] = 0; header[19] = 0;
 // Audio format (1 for PCM)
 header[20] = 1; header[21] = 0;
 // Number of channels (1 for mono)
 header[22] = 1; header[23] = 0;
 // Sample rate
 header[24] = SAMPLE_RATE & 0xFF;
 header[25] = (SAMPLE_RATE >> 8) & 0xFF;
 header[26] = (SAMPLE_RATE >> 16) & 0xFF;
 header[27] = (SAMPLE_RATE >> 24) & 0xFF;
 // Byte rate (SampleRate * NumChannels * BitsPerSample/8)
 uint32_t byteRate = SAMPLE_RATE * 1 * I2S_SAMPLE_BITS/8;
 header[28] = byteRate & 0xFF;
 header[29] = (byteRate >> 8) & 0xFF;
 header[30] = (byteRate >> 16) & 0xFF;
 header[31] = (byteRate >> 24) & 0xFF;
 // Block align (NumChannels * BitsPerSample/8)
 header[32] = 1 * I2S_SAMPLE_BITS/8;
 header[33] = 0;
 // Bits per sample
 header[34] = I2S_SAMPLE_BITS;
 header[35] = 0;
 // data subchunk
 header[36] = 'd'; header[37] = 'a'; header[38] = 't'; header[39] = 'a';
 // Subchunk2 size (will be updated later)
 header[40] = dataSize & 0xFF;
 header[41] = (dataSize >> 8) & 0xFF;
 header[42] = (dataSize >> 16) & 0xFF;
 header[43] = (dataSize >> 24) & 0xFF;
 
 file.write(header, headerSize);
}
// Function to update the WAV header after recording
void updateWavHeader(const char* filepath, uint32_t dataSize) {
 File file = SPIFFS.open(filepath, "r+");
 if(!file) {
 Serial.println("Failed to open file for header update");
 return;
 }
 // Update the RIFF chunk size (file size - 8)
 uint32_t fileSize = dataSize + headerSize - 8;
 file.seek(4);
 file.write((uint8_t*)&fileSize, 4);
 // Update the data chunk size
 file.seek(40);
 file.write((uint8_t*)&dataSize, 4);
 
 file.close();
}
// Function to upload file to the server
void uploadFile(const char* filepath) {
 if (WiFi.status() != WL_CONNECTED) {
 Serial.println("WiFi not connected, cannot upload");
 return;
 }
 
 File file = SPIFFS.open(filepath, "r");
 if(!file) {
 Serial.println("Failed to open audio file for upload");
 return;
 }
 uint32_t fileSize = file.size();
 Serial.printf("Uploading file, size: %lu bytes\n", fileSize);
 HTTPClient client;
 String url = String(serverHost) + uploadEndpoint;
 client.begin(url);
 client.addHeader("Content-Type", "audio/wav");
 client.addHeader("Content-Length", String(fileSize));
 
 Serial.print("Uploading to: ");
 Serial.println(url);
 
 int httpCode = client.sendRequest("POST", &file, fileSize);
 file.close();
 
 if(httpCode == HTTP_CODE_OK) {
 String response = client.getString();
 Serial.println("Server response: " + response);
 uploadComplete = true;
 } else {
 Serial.printf("Upload failed, error code: %d\n", httpCode);
 }
 client.end();
}

and then the server file:

const express = require('express');
const { SpeechClient } = require('@google-cloud/speech');
const fs = require('fs');
const path = require('path');
// Create Express app for HTTP file uploads
const app = express();
const httpServer = require('http').createServer(app);
const PORT_HTTP = 8888;
// Create Socket.IO server on port 3001 with allowEIO3 enabled
const io = require('socket.io')(3001, {
 allowEIO3: true,
 cors: {
 origin: "*"
 }
});
const PORT_SOCKET = 3001;
// Google Speech-to-Text client
const speech = new SpeechClient();
// Directory for uploaded files
const UPLOAD_DIR = './uploads';
// Ensure the upload directory exists
if (!fs.existsSync(UPLOAD_DIR)) {
 fs.mkdirSync(UPLOAD_DIR);
}
// Middleware to handle raw audio uploads
app.use(express.raw({
 type: 'audio/wav',
 limit: '10mb'
}));
// HTTP file upload endpoint
app.post('/uploadAudio', async (req, res) => {
 try {
 // Create a unique filename using template literals
 const fileName = `recording_${Date.now()}.wav`;
 const filePath = path.join(UPLOAD_DIR, fileName);
 
 // Save the audio file
 fs.writeFileSync(filePath, req.body);
 console.log(`Audio file saved: ${fileName}`);
 // Perform speech-to-text transcription
 const transcription = await speechToText(filePath);
 console.log('Transcription:', transcription);
 if(!transcription){
 res.status(400).send("speech recognition failed");
 return;
 }
 // Emit transcription to connected Java client via Socket.IO
 io.emit('processText', transcription);
 
 res.status(200).send(transcription);
 } catch (error) {
 console.error('Error processing audio:', error);
 res.status(500).send('Audio processing failed');
 }
});
// Socket.IO connection handling
io.on('connection', (socket) => {
 console.log('Client connected via Socket.IO:', socket.id);
 socket.on('chatgptResponse', (response) => {
 console.log('Received chatgptResponse:', response);
 // Forward the response to ESP32 or any other connected client
 io.emit('chatgptResponse', response);
 });
 socket.on('disconnect', () => {
 console.log('Client disconnected:', socket.id);
 });
});
// Speech-to-text function using Google Speech API
async function speechToText(filePath) {
 try {
 const audio = {
 content: fs.readFileSync(filePath).toString('base64'),
 };
 const config = {
 encoding: 'LINEAR16',
 sampleRateHertz: 16000,
 languageCode: 'en-US',
 };
 console.log("Sending request to Google Speech API...");
 const [response] = await speech.recognize({ audio, config });
 console.log('Google Speech Response received');
 
 if (!response.results || response.results.length === 0) {
 console.log('No transcription results returned');
 return '';
 }
 
 return response.results
 .map(result => result.alternatives[0].transcript)
 .join('\n');
 } catch (error) {
 console.error('Detailed transcription error:', error);
 throw error; // Rethrow so it's caught in the upload handler
 }
}
// Start the HTTP server for file uploads
httpServer.listen(PORT_HTTP, () => {
 console.log(`HTTP server running on port ${PORT_HTTP}`);
});
// Socket.IO server is already listening on PORT_SOCKET
console.log(`Socket.IO server running on port ${PORT_SOCKET}`);

I dont know where's the error, please help me debug it, this is a project due after few days.

asked Mar 1, 2025 at 8:57
3
  • If you think the file may not be there the most basic thing you can do to debug is verify that it's there. Add code after you call recordAudio() to check if the file exists, show its size, print some of its contents as hex. Commented Mar 1, 2025 at 16:00
  • RRecording finished. 172032 bytes recorded Recorded file '/audio_recording.wav' exists, size: 172076 bytes First 64 bytes of the file in hex: 52 49 46 46 24 A0 02 00 57 41 56 45 66 6D 74 20 10 00 00 00 01 00 01 00 80 3E 00 00 00 7D 00 00 02 00 10 00 64 61 74 61 00 A0 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 this is what's shown, what does it mean? Commented Mar 2, 2025 at 15:56
  • The first four bytes are the letters 'R', 'I', 'F', 'F', which your function writeWavHeader() stores in the file. So the issue isn't that the file isn't getting created or that the data is being stored incorrectly. So what does error -11 mean when you're trying to do the upload? It's easy to find out with a web search. Commented Mar 2, 2025 at 21:01

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.