Azure Pronunciation Assessment Could not deserialize speech context error

Question 1

I am trying to implement a pronunciation assessment system using Azure's JS SDK (see doc).

I get the following error in console:

"Could not deserialize speech context. websocket error code: 1007"

Here is my implementation:

assessPronunciation(fileUrl) {
 const speechConfig = window.SpeechSDK.SpeechConfig.fromSubscription("xxx", "westeurope");
 speechConfig.speechRecognitionLanguage = "en-GB";
 // Fetch the WAV file and create an AudioConfig
 fetch(fileUrl)
 .then(response => response.blob())
 .then(blob => {
 // Convert the blob to a File object
 const file = new File([blob], "audio.wav", { type: "audio/wav" });
 // Create an AudioConfig using the File object
 const audioConfig = window.SpeechSDK.AudioConfig.fromWavFileInput(file);
 var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig({
 referenceText: "Hello this is a test",
 gradingSystem: "HundredMark",
 granularity: "Phoneme"
 });
 var speechRecognizer = new window.SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
 pronunciationAssessmentConfig.applyTo(speechRecognizer);
 speechRecognizer.sessionStarted = (s, e) => {
 console.log(`SESSION ID: ${e.sessionId}`);
 };
 pronunciationAssessmentConfig.applyTo(speechRecognizer);
 
 speechRecognizer.recognizeOnceAsync(
 function(speechRecognitionResult) {
 if (speechRecognitionResult.reason === window.SpeechSDK.ResultReason.RecognizedSpeech) {
 // The pronunciation assessment result as a Speech SDK object
 var pronunciationAssessmentResult = SpeechSDK.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
 console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
 
 // The pronunciation assessment result as a JSON string
 var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(SpeechSDK.PropertyId.SpeechServiceResponse_JsonResult);
 console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
 } else {
 console.error("Speech not recognized. Reason:", speechRecognitionResult);
 }
 },
 function(error) {
 console.error("Error during recognition:", error);
 if (error instanceof window.SpeechSDK.SpeechRecognitionCanceledEventArgs) {
 console.error("Recognition canceled. Reason:", error.reason);
 console.error("Error details:", error.errorDetails);
 }
 }
 );
 })
 .catch(error => {
 console.error("Error fetching WAV file:", error);
 });
 }

I checked the recording (fileUrl) and it's a working Wav file as expected.

Recording configuration:

startRecording(event) {
 event.preventDefault();
 if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
 navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
 this.recorder = new RecordRTC(stream, {
 type: 'audio',
 mimeType: 'audio/wav',
 recorderType: RecordRTC.StereoAudioRecorder,
 desiredSampRate: 16000,
 numberOfAudioChannels: 1,
 audioBitsPerSecond: 128000
 });
 this.startRecorder(event);
 }).catch((error) => {
 console.log("The following error occurred: " + error);
 alert("Please grant permission for microphone access");
 });
 } else {
 alert("Your browser does not support audio recording, please use a different browser or update your current browser");
 }
 }

Any idea what's the issue? Thanks.

SOLUTION

var audioConfig = window.SpeechSDK.AudioConfig.fromStreamInput(pushStream);
var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig(
 "My voice is my passport, verify me.",
 window.SpeechSDK.PronunciationAssessmentGradingSystem.HundredMark,
 window.SpeechSDK.PronunciationAssessmentGranularity.Phoneme
 );

Question 2

Use AudioConfig.fromWavFileInput(blob) and ensure the WAV file is 16-bit PCM, mono, 16 kHz.

Question 3

@DasariKamali when I use the blob, I get the following error: "Error occurred while processing 'undefined'. TypeError: Cannot read properties of undefined (reading 'lastIndexOf')" in the speechRecognizer.recognizeOnceAsync() function.

Question 4

try this code block out:

var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs")
// not supported in node
// const audioConfig = sdk.AudioConfig.fromWavFileInput('myVoiceIsMyPassportVerifyMe01.wav');
// workaround
var filename = "myVoiceIsMyPassportVerifyMe01.wav"; // 16000 Hz, Mono
var pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream(filename).on('data', function (arrayBuffer) {
 pushStream.write(arrayBuffer.slice());
}).on('end', function () {
 pushStream.close();
});
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
const conf = sdk.SpeechConfig.fromSubscription(
 'xxxx',
 'eastus'
);
conf.speechRecognitionLanguage = "en-GB";
var speechRecognizer = new sdk.SpeechRecognizer(conf, audioConfig);
var pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
 ReferenceText = "My voice is my passport, verify me.",
 GradingSystem = "HundredMark",
 Granularity = "Phoneme"
);
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.sessionStarted = (s, e) => {
 console.log('SESSION ID:'+ e.sessionId);
};
speechRecognizer.recognizeOnceAsync(
 function (speechRecognitionResult) {
 // console.log("speechRecognitionResult:", speechRecognitionResult);
 if (speechRecognitionResult.reason === sdk.ResultReason.RecognizedSpeech) {
 // The pronunciation assessment result as a Speech SDK object
 var pronunciationAssessmentResult = sdk.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
 console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
 // The pronunciation assessment result as a JSON string
 var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult);
 console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
 } else {
 console.error("Speech not recognized. Reason:", speechRecognitionResult);
 }
 },
 function (error) {
 console.error("Error during recognition:", error);
 if (error instanceof sdk.SpeechRecognitionCanceledEventArgs) {
 console.error("Recognition canceled. Reason:", error.reason);
 console.error("Error details:", error.errorDetails);
 }
 }
);

a few catches:

AudioConfig.fromWavFileInput might not supported in Node. I just used the workaround mentioned in the link, it worked. https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/813
the PronunciationAssessmentConfig needs to be passed as individual parameter values, not a json
i used a sample wav from here. you can edit to yours https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/sampledata/audiofiles/myVoiceIsMyPassportVerifyMe01.wav

enter image description here

Question 5

Thank you, the workaround worked, along with the pronunciationAssessmentConfig without any keys (see edited question). You made my day!

Question 6

aha great. that's even simpler. happy coding. if you think it is helpful, some bounty plz. :D

qkfang 1,8311 gold badge5 silver badges26 bronze badges · Accepted Answer · 2025-02-01 11:22:10Z

try this code block out:

var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs")
// not supported in node
// const audioConfig = sdk.AudioConfig.fromWavFileInput('myVoiceIsMyPassportVerifyMe01.wav');
// workaround
var filename = "myVoiceIsMyPassportVerifyMe01.wav"; // 16000 Hz, Mono
var pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream(filename).on('data', function (arrayBuffer) {
 pushStream.write(arrayBuffer.slice());
}).on('end', function () {
 pushStream.close();
});
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
const conf = sdk.SpeechConfig.fromSubscription(
 'xxxx',
 'eastus'
);
conf.speechRecognitionLanguage = "en-GB";
var speechRecognizer = new sdk.SpeechRecognizer(conf, audioConfig);
var pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
 ReferenceText = "My voice is my passport, verify me.",
 GradingSystem = "HundredMark",
 Granularity = "Phoneme"
);
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.sessionStarted = (s, e) => {
 console.log('SESSION ID:'+ e.sessionId);
};
speechRecognizer.recognizeOnceAsync(
 function (speechRecognitionResult) {
 // console.log("speechRecognitionResult:", speechRecognitionResult);
 if (speechRecognitionResult.reason === sdk.ResultReason.RecognizedSpeech) {
 // The pronunciation assessment result as a Speech SDK object
 var pronunciationAssessmentResult = sdk.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
 console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
 // The pronunciation assessment result as a JSON string
 var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult);
 console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
 } else {
 console.error("Speech not recognized. Reason:", speechRecognitionResult);
 }
 },
 function (error) {
 console.error("Error during recognition:", error);
 if (error instanceof sdk.SpeechRecognitionCanceledEventArgs) {
 console.error("Recognition canceled. Reason:", error.reason);
 console.error("Error details:", error.errorDetails);
 }
 }
);

a few catches:

AudioConfig.fromWavFileInput might not supported in Node. I just used the workaround mentioned in the link, it worked. https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/813
the PronunciationAssessmentConfig needs to be passed as individual parameter values, not a json
i used a sample wav from here. you can edit to yours https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/sampledata/audiofiles/myVoiceIsMyPassportVerifyMe01.wav

enter image description here

Thank you, the workaround worked, along with the pronunciationAssessmentConfig without any keys (see edited question). You made my day!
aha great. that's even simpler. happy coding. if you think it is helpful, some bounty plz. :D

CollectivesTM on Stack Overflow

Azure Pronunciation Assessment Could not deserialize speech context error

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

CollectivesTM on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related