I'm developing a voice control system using an ESP32 with an INMP441 microphone. The system records 5-second audio clips, transmits them to a Node.js server, which then uses Google Speech-to-Text API for transcription. The transcribed text is forwarded to a Java client for ChatGPT processing, and the response is sent back to the ESP32. I'm encountering a critical issue with the audio file upload. My ESP32 successfully records audio (reporting 20,480 bytes recorded), but when attempting to upload the file, it reports "Uploading file, size: 8 bytes" followed by "Upload failed, error code: -11". This suggests the file isn't being properly saved to SPIFFS or there's an issue with the HTTP client connection. I've verified my WiFi credentials are correct and can see the ESP32 connecting to the network. My server is running on 192.168.1.180:8888 and shows no errors, but it's not receiving the audio files. I need help debugging the file storage and upload process to identify whether this is a SPIFFS writing issue, a file handle problem, or a network connectivity issue.
Here are my code for the esp32 audio transmission:
#include <Arduino.h>
#include <WiFi.h>
#include <driver/i2s.h>
#include <SPIFFS.h>
#include <HTTPClient.h>
// Wi-Fi credentials
const char* ssid = "4B";
const char* password = "88886666@feihe";
const char* serverHost = "http://xxx.180:8888"; // Server address with port
const char* uploadEndpoint = "/uploadAudio";
// I2S pin configuration
#define I2S_WS 4
#define I2S_SD 5
#define I2S_SCK 6
#define I2S_PORT I2S_NUM_0
// Audio parameters
#define SAMPLE_RATE 16000 // Sample rate in Hz
#define I2S_SAMPLE_BITS 16
#define I2S_BUFFER_SIZE (1024*16) // Size of each I2S buffer (in 16-bit samples)
#define I2S_NUM_BUFFERS 4 // Number of buffers
#define I2S_READ_LEN (I2S_BUFFER_SIZE * I2S_NUM_BUFFERS) // Total buffer size
#define RECORD_TIME 5 // Record duration in seconds
// Calculating the size of the recorded audio
const int AUDIO_DATA_SIZE = (I2S_SAMPLE_BITS / 8) * SAMPLE_RATE * RECORD_TIME;
const int FLASH_RECORD_SIZE = AUDIO_DATA_SIZE + 44; // Including WAV header
// File management
const char filename[] = "/audio_recording.wav";
const int headerSize = 44;
// Status flags
volatile bool recordingComplete = false;
volatile bool uploadComplete = false;
// Function prototypes
void i2sInit();
void recordAudio();
void writeWavHeader(File &file, uint32_t dataSize);
void updateWavHeader(const char* filepath, uint32_t dataSize);
void uploadFile(const char* filepath);
void setup() {
Serial.begin(115200);
delay(1000);
Serial.println("\nESP32 Voice Control System Starting...");
// Initialize SPIFFS
if(!SPIFFS.begin(true)){
Serial.println("SPIFFS initialization failed!");
while(1); // Infinite loop if SPIFFS initialization fails
}
Serial.println("SPIFFS initialized successfully");
// Connect to Wi-Fi
WiFi.begin(ssid, password);
Serial.print("Connecting to WiFi");
while(WiFi.status() != WL_CONNECTED){
delay(500);
Serial.print(".");
}
if(WiFi.status() == WL_CONNECTED) {
Serial.println("\nWiFi connected!");
Serial.print("IP address: ");
Serial.println(WiFi.localIP());
} else {
Serial.println("\nWiFi connection failed. Continuing without network...");
}
// Initialize I2S
i2sInit();
Serial.println("I2S initialized");
// Start the recording process
recordAudio();
// Upload the recorded file
if(recordingComplete && WiFi.status() == WL_CONNECTED) {
uploadFile(filename);
}
Serial.println("Operation complete. Going to sleep...");
}
void loop() {
// Nothing to do in the loop
delay(1000);
}
// I2S initialization
void i2sInit() {
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_STAND_I2S),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = I2S_BUFFER_SIZE,
.use_apll = true,
.tx_desc_auto_clear = false,
.fixed_mclk = 0
};
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_SCK,
.ws_io_num = I2S_WS,
.data_out_num = -1,
.data_in_num = I2S_SD
};
esp_err_t err = i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
if (err != ESP_OK) {
Serial.printf("Failed to install I2S driver: %d\n", err);
return;
}
err = i2s_set_pin(I2S_PORT, &pin_config);
if (err != ESP_OK) {
Serial.printf("Failed to set I2S pins: %d\n", err);
return;
}
// Clear I2S buffer
i2s_zero_dma_buffer(I2S_PORT);
}
// Record audio function
void recordAudio() {
// Allocate buffer for recording
int16_t* i2sBuffer = (int16_t*) malloc(I2S_READ_LEN);
if (!i2sBuffer) {
Serial.println("Failed to allocate I2S buffer");
return;
}
// Clear any old file
if (SPIFFS.exists(filename)) {
SPIFFS.remove(filename);
}
// Open file for writing
File audioFile = SPIFFS.open(filename, FILE_WRITE);
if (!audioFile) {
Serial.println("Failed to open file for writing");
free(i2sBuffer);
return;
}
// Write WAV header
writeWavHeader(audioFile, AUDIO_DATA_SIZE);
// Start recording
Serial.println("Recording started...");
size_t bytesRead = 0;
unsigned long totalBytesWritten = 0;
unsigned long startTime = millis();
// Record for RECORD_TIME seconds
while(millis() - startTime < RECORD_TIME * 1000) {
esp_err_t result = i2s_read(I2S_PORT, i2sBuffer, I2S_READ_LEN, &bytesRead, 100);
if (result == ESP_OK && bytesRead > 0) {
audioFile.write((uint8_t*)i2sBuffer, bytesRead);
totalBytesWritten += bytesRead;
}
}
// Close file
audioFile.close();
// Update WAV header with actual data size
updateWavHeader(filename, totalBytesWritten);
Serial.printf("Recording finished. %lu bytes recorded\n", totalBytesWritten);
// Free the buffer
free(i2sBuffer);
// Set the flag
recordingComplete = true;
}
// Function to write the WAV header
void writeWavHeader(File &file, uint32_t dataSize) {
unsigned char header[headerSize];
// RIFF chunk
header[0] = 'R'; header[1] = 'I'; header[2] = 'F'; header[3] = 'F';
// File size (will be updated later)
uint32_t fileSize = dataSize + headerSize - 8;
header[4] = fileSize & 0xFF;
header[5] = (fileSize >> 8) & 0xFF;
header[6] = (fileSize >> 16) & 0xFF;
header[7] = (fileSize >> 24) & 0xFF;
// WAVE chunk
header[8] = 'W'; header[9] = 'A'; header[10] = 'V'; header[11] = 'E';
// fmt subchunk
header[12] = 'f'; header[13] = 'm'; header[14] = 't'; header[15] = ' ';
// Subchunk1 size (16 for PCM)
header[16] = 16; header[17] = 0; header[18] = 0; header[19] = 0;
// Audio format (1 for PCM)
header[20] = 1; header[21] = 0;
// Number of channels (1 for mono)
header[22] = 1; header[23] = 0;
// Sample rate
header[24] = SAMPLE_RATE & 0xFF;
header[25] = (SAMPLE_RATE >> 8) & 0xFF;
header[26] = (SAMPLE_RATE >> 16) & 0xFF;
header[27] = (SAMPLE_RATE >> 24) & 0xFF;
// Byte rate (SampleRate * NumChannels * BitsPerSample/8)
uint32_t byteRate = SAMPLE_RATE * 1 * I2S_SAMPLE_BITS/8;
header[28] = byteRate & 0xFF;
header[29] = (byteRate >> 8) & 0xFF;
header[30] = (byteRate >> 16) & 0xFF;
header[31] = (byteRate >> 24) & 0xFF;
// Block align (NumChannels * BitsPerSample/8)
header[32] = 1 * I2S_SAMPLE_BITS/8;
header[33] = 0;
// Bits per sample
header[34] = I2S_SAMPLE_BITS;
header[35] = 0;
// data subchunk
header[36] = 'd'; header[37] = 'a'; header[38] = 't'; header[39] = 'a';
// Subchunk2 size (will be updated later)
header[40] = dataSize & 0xFF;
header[41] = (dataSize >> 8) & 0xFF;
header[42] = (dataSize >> 16) & 0xFF;
header[43] = (dataSize >> 24) & 0xFF;
file.write(header, headerSize);
}
// Function to update the WAV header after recording
void updateWavHeader(const char* filepath, uint32_t dataSize) {
File file = SPIFFS.open(filepath, "r+");
if(!file) {
Serial.println("Failed to open file for header update");
return;
}
// Update the RIFF chunk size (file size - 8)
uint32_t fileSize = dataSize + headerSize - 8;
file.seek(4);
file.write((uint8_t*)&fileSize, 4);
// Update the data chunk size
file.seek(40);
file.write((uint8_t*)&dataSize, 4);
file.close();
}
// Function to upload file to the server
void uploadFile(const char* filepath) {
if (WiFi.status() != WL_CONNECTED) {
Serial.println("WiFi not connected, cannot upload");
return;
}
File file = SPIFFS.open(filepath, "r");
if(!file) {
Serial.println("Failed to open audio file for upload");
return;
}
uint32_t fileSize = file.size();
Serial.printf("Uploading file, size: %lu bytes\n", fileSize);
HTTPClient client;
String url = String(serverHost) + uploadEndpoint;
client.begin(url);
client.addHeader("Content-Type", "audio/wav");
client.addHeader("Content-Length", String(fileSize));
Serial.print("Uploading to: ");
Serial.println(url);
int httpCode = client.sendRequest("POST", &file, fileSize);
file.close();
if(httpCode == HTTP_CODE_OK) {
String response = client.getString();
Serial.println("Server response: " + response);
uploadComplete = true;
} else {
Serial.printf("Upload failed, error code: %d\n", httpCode);
}
client.end();
}
and then the server file:
const express = require('express');
const { SpeechClient } = require('@google-cloud/speech');
const fs = require('fs');
const path = require('path');
// Create Express app for HTTP file uploads
const app = express();
const httpServer = require('http').createServer(app);
const PORT_HTTP = 8888;
// Create Socket.IO server on port 3001 with allowEIO3 enabled
const io = require('socket.io')(3001, {
allowEIO3: true,
cors: {
origin: "*"
}
});
const PORT_SOCKET = 3001;
// Google Speech-to-Text client
const speech = new SpeechClient();
// Directory for uploaded files
const UPLOAD_DIR = './uploads';
// Ensure the upload directory exists
if (!fs.existsSync(UPLOAD_DIR)) {
fs.mkdirSync(UPLOAD_DIR);
}
// Middleware to handle raw audio uploads
app.use(express.raw({
type: 'audio/wav',
limit: '10mb'
}));
// HTTP file upload endpoint
app.post('/uploadAudio', async (req, res) => {
try {
// Create a unique filename using template literals
const fileName = `recording_${Date.now()}.wav`;
const filePath = path.join(UPLOAD_DIR, fileName);
// Save the audio file
fs.writeFileSync(filePath, req.body);
console.log(`Audio file saved: ${fileName}`);
// Perform speech-to-text transcription
const transcription = await speechToText(filePath);
console.log('Transcription:', transcription);
if(!transcription){
res.status(400).send("speech recognition failed");
return;
}
// Emit transcription to connected Java client via Socket.IO
io.emit('processText', transcription);
res.status(200).send(transcription);
} catch (error) {
console.error('Error processing audio:', error);
res.status(500).send('Audio processing failed');
}
});
// Socket.IO connection handling
io.on('connection', (socket) => {
console.log('Client connected via Socket.IO:', socket.id);
socket.on('chatgptResponse', (response) => {
console.log('Received chatgptResponse:', response);
// Forward the response to ESP32 or any other connected client
io.emit('chatgptResponse', response);
});
socket.on('disconnect', () => {
console.log('Client disconnected:', socket.id);
});
});
// Speech-to-text function using Google Speech API
async function speechToText(filePath) {
try {
const audio = {
content: fs.readFileSync(filePath).toString('base64'),
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
};
console.log("Sending request to Google Speech API...");
const [response] = await speech.recognize({ audio, config });
console.log('Google Speech Response received');
if (!response.results || response.results.length === 0) {
console.log('No transcription results returned');
return '';
}
return response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
} catch (error) {
console.error('Detailed transcription error:', error);
throw error; // Rethrow so it's caught in the upload handler
}
}
// Start the HTTP server for file uploads
httpServer.listen(PORT_HTTP, () => {
console.log(`HTTP server running on port ${PORT_HTTP}`);
});
// Socket.IO server is already listening on PORT_SOCKET
console.log(`Socket.IO server running on port ${PORT_SOCKET}`);
I dont know where's the error, please help me debug it, this is a project due after few days.
recordAudio()to check if the file exists, show its size, print some of its contents as hex.writeWavHeader()stores in the file. So the issue isn't that the file isn't getting created or that the data is being stored incorrectly. So what does error -11 mean when you're trying to do the upload? It's easy to find out with a web search.