Transcribe a local file with recognition metadata (beta)
Stay organized with collections
Save and categorize content based on your preferences.
Transcribe a local audio file, including recognition metadata in the response.
Code sample
Java
To learn how to install and use the client library for Cloud STT, see Cloud STT client libraries. For more information, see the Cloud STT Java API reference documentation.
To authenticate to Cloud STT, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
/**
* Transcribe the given audio file and include recognition metadata in the request.
*
* @param fileName the path to an audio file.
*/
publicstaticvoidtranscribeFileWithMetadata(StringfileName)throwsException{
Pathpath=Paths.get(fileName);
byte[]content=Files.readAllBytes(path);
try(SpeechClientspeechClient=SpeechClient.create()){
// Get the contents of the local audio file
RecognitionAudiorecognitionAudio=
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
// Construct a recognition metadata object.
// Most metadata fields are specified as enums that can be found
// in speech.enums.RecognitionMetadata
RecognitionMetadatametadata=
RecognitionMetadata.newBuilder()
.setInteractionType(InteractionType.DISCUSSION)
.setMicrophoneDistance(MicrophoneDistance.NEARFIELD)
.setRecordingDeviceType(RecordingDeviceType.SMARTPHONE)
.setRecordingDeviceName("Pixel 2 XL")// Some metadata fields are free form strings
// And some are integers, for instance the 6 digit NAICS code
// https://www.naics.com/search/
.setIndustryNaicsCodeOfAudio(519190)
.build();
// Configure request to enable enhanced models
RecognitionConfigconfig=
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(8000)
.setMetadata(metadata)// Add the metadata to the config
.build();
// Perform the transcription request
RecognizeResponserecognizeResponse=speechClient.recognize(config,recognitionAudio);
// Print out the results
for(SpeechRecognitionResultresult:recognizeResponse.getResultsList()){
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternativealternative=result.getAlternatives(0);
System.out.format("Transcript: %s\n\n",alternative.getTranscript());
}
}
}Node.js
To learn how to install and use the client library for Cloud STT, see Cloud STT client libraries. For more information, see the Cloud STT Node.js API reference documentation.
To authenticate to Cloud STT, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
constspeech=require('@google-cloud/speech').v1p1beta1;
constfs=require('fs');
// Creates a client
constclient=newspeech.SpeechClient ();
asyncfunctionsyncRecognizeWithMetaData(){
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
constrecognitionMetadata={
interactionType:'DISCUSSION',
microphoneDistance:'NEARFIELD',
recordingDeviceType:'SMARTPHONE',
recordingDeviceName:'Pixel 2 XL',
industryNaicsCodeOfAudio:519190,
};
constconfig={
encoding:encoding,
sampleRateHertz:sampleRateHertz,
languageCode:languageCode,
metadata:recognitionMetadata,
};
constaudio={
content:fs.readFileSync(filename).toString('base64'),
};
constrequest={
config:config,
audio:audio,
};
// Detects speech in the audio file
const[response]=awaitclient.recognize(request);
response.results.forEach(result=>{
constalternative=result.alternatives[0];
console.log(alternative.transcript);
});Python
To learn how to install and use the client library for Cloud STT, see Cloud STT client libraries. For more information, see the Cloud STT Python API reference documentation.
To authenticate to Cloud STT, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
fromgoogle.cloudimport speech_v1p1beta1 as speech
client = speech.SpeechClient()
speech_file = "resources/commercial_mono.wav"
with open(speech_file, "rb") as audio_file:
content = audio_file.read()
# Here we construct a recognition metadata object.
# Most metadata fields are specified as enums that can be found
# in speech.enums.RecognitionMetadata
metadata = speech.RecognitionMetadata()
metadata.interaction_type = speech.RecognitionMetadata.InteractionType.DISCUSSION
metadata.microphone_distance = (
speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD
)
metadata.recording_device_type = (
speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE
)
# Some metadata fields are free form strings
metadata.recording_device_name = "Pixel 2 XL"
# And some are integers, for instance the 6 digit NAICS code
# https://www.naics.com/search/
metadata.industry_naics_code_of_audio = 519190
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=8000,
language_code="en-US",
# Add this in the request to send metadata.
metadata=metadata,
)
response = client.recognize(config=config, audio=audio)
for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print("-" * 20)
print(f"First alternative of result {i}")
print(f"Transcript: {alternative.transcript}")
return response.resultsWhat's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.