0

I am trying to implement a pronunciation assessment system using Azure's JS SDK (see doc).

I get the following error in console:

"Could not deserialize speech context. websocket error code: 1007"

Here is my implementation:

assessPronunciation(fileUrl) {
 const speechConfig = window.SpeechSDK.SpeechConfig.fromSubscription("xxx", "westeurope");
 speechConfig.speechRecognitionLanguage = "en-GB";
 // Fetch the WAV file and create an AudioConfig
 fetch(fileUrl)
 .then(response => response.blob())
 .then(blob => {
 // Convert the blob to a File object
 const file = new File([blob], "audio.wav", { type: "audio/wav" });
 // Create an AudioConfig using the File object
 const audioConfig = window.SpeechSDK.AudioConfig.fromWavFileInput(file);
 var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig({
 referenceText: "Hello this is a test",
 gradingSystem: "HundredMark",
 granularity: "Phoneme"
 });
 var speechRecognizer = new window.SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
 pronunciationAssessmentConfig.applyTo(speechRecognizer);
 speechRecognizer.sessionStarted = (s, e) => {
 console.log(`SESSION ID: ${e.sessionId}`);
 };
 pronunciationAssessmentConfig.applyTo(speechRecognizer);
 
 speechRecognizer.recognizeOnceAsync(
 function(speechRecognitionResult) {
 if (speechRecognitionResult.reason === window.SpeechSDK.ResultReason.RecognizedSpeech) {
 // The pronunciation assessment result as a Speech SDK object
 var pronunciationAssessmentResult = SpeechSDK.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
 console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
 
 // The pronunciation assessment result as a JSON string
 var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(SpeechSDK.PropertyId.SpeechServiceResponse_JsonResult);
 console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
 } else {
 console.error("Speech not recognized. Reason:", speechRecognitionResult);
 }
 },
 function(error) {
 console.error("Error during recognition:", error);
 if (error instanceof window.SpeechSDK.SpeechRecognitionCanceledEventArgs) {
 console.error("Recognition canceled. Reason:", error.reason);
 console.error("Error details:", error.errorDetails);
 }
 }
 );
 })
 .catch(error => {
 console.error("Error fetching WAV file:", error);
 });
 }

I checked the recording (fileUrl) and it's a working Wav file as expected.

Recording configuration:

startRecording(event) {
 event.preventDefault();
 if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
 navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
 this.recorder = new RecordRTC(stream, {
 type: 'audio',
 mimeType: 'audio/wav',
 recorderType: RecordRTC.StereoAudioRecorder,
 desiredSampRate: 16000,
 numberOfAudioChannels: 1,
 audioBitsPerSecond: 128000
 });
 this.startRecorder(event);
 }).catch((error) => {
 console.log("The following error occurred: " + error);
 alert("Please grant permission for microphone access");
 });
 } else {
 alert("Your browser does not support audio recording, please use a different browser or update your current browser");
 }
 }

Any idea what's the issue? Thanks.

SOLUTION

var audioConfig = window.SpeechSDK.AudioConfig.fromStreamInput(pushStream);
var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig(
 "My voice is my passport, verify me.",
 window.SpeechSDK.PronunciationAssessmentGradingSystem.HundredMark,
 window.SpeechSDK.PronunciationAssessmentGranularity.Phoneme
 );
asked Jan 25, 2025 at 14:32
2
  • Use AudioConfig.fromWavFileInput(blob) and ensure the WAV file is 16-bit PCM, mono, 16 kHz. Commented Jan 25, 2025 at 14:42
  • @DasariKamali when I use the blob, I get the following error: "Error occurred while processing 'undefined'. TypeError: Cannot read properties of undefined (reading 'lastIndexOf')" in the speechRecognizer.recognizeOnceAsync() function. Commented Jan 25, 2025 at 16:56

1 Answer 1

1
+250

try this code block out:

var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs")
// not supported in node
// const audioConfig = sdk.AudioConfig.fromWavFileInput('myVoiceIsMyPassportVerifyMe01.wav');
// workaround
var filename = "myVoiceIsMyPassportVerifyMe01.wav"; // 16000 Hz, Mono
var pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream(filename).on('data', function (arrayBuffer) {
 pushStream.write(arrayBuffer.slice());
}).on('end', function () {
 pushStream.close();
});
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
const conf = sdk.SpeechConfig.fromSubscription(
 'xxxx',
 'eastus'
);
conf.speechRecognitionLanguage = "en-GB";
var speechRecognizer = new sdk.SpeechRecognizer(conf, audioConfig);
var pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
 ReferenceText = "My voice is my passport, verify me.",
 GradingSystem = "HundredMark",
 Granularity = "Phoneme"
);
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.sessionStarted = (s, e) => {
 console.log('SESSION ID:'+ e.sessionId);
};
speechRecognizer.recognizeOnceAsync(
 function (speechRecognitionResult) {
 // console.log("speechRecognitionResult:", speechRecognitionResult);
 if (speechRecognitionResult.reason === sdk.ResultReason.RecognizedSpeech) {
 // The pronunciation assessment result as a Speech SDK object
 var pronunciationAssessmentResult = sdk.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
 console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
 // The pronunciation assessment result as a JSON string
 var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult);
 console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
 } else {
 console.error("Speech not recognized. Reason:", speechRecognitionResult);
 }
 },
 function (error) {
 console.error("Error during recognition:", error);
 if (error instanceof sdk.SpeechRecognitionCanceledEventArgs) {
 console.error("Recognition canceled. Reason:", error.reason);
 console.error("Error details:", error.errorDetails);
 }
 }
);

a few catches:

  1. AudioConfig.fromWavFileInput might not supported in Node. I just used the workaround mentioned in the link, it worked. https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/813

  2. the PronunciationAssessmentConfig needs to be passed as individual parameter values, not a json

  3. i used a sample wav from here. you can edit to yours https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/sampledata/audiofiles/myVoiceIsMyPassportVerifyMe01.wav

enter image description here

answered Feb 1, 2025 at 11:22
Sign up to request clarification or add additional context in comments.

2 Comments

Thank you, the workaround worked, along with the pronunciationAssessmentConfig without any keys (see edited question). You made my day!
aha great. that's even simpler. happy coding. if you think it is helpful, some bounty plz. :D

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.