How to implement a global voice navigation system in React using the Web Speech API and Context API?
Best practices
I'm building a React application with a global voice navigation feature using the Web Speech API. I'm managing speech recognition, speech synthesis, keyboard shortcuts, and page-specific voice commands through a React Context provider.
I also support multiple languages and use refs to avoid stale closures in event listeners.
Is this a good approach, or is there a better way to structure this implementation?
// WhisperContext.jsx — Global voice navigation system for WhisperMode
// Uses stable refs throughout to avoid stale closure crashes on state changes.
import { createContext, useContext, useState, useEffect, useRef, useCallback } from 'react';
import { useTranslation } from 'react-i18next';
import { useUser } from './UserContext.jsx';
import { Mic } from 'lucide-react';
const WhisperContext = createContext(null);
// Maps locale code → BCP-47 language tag for Web Speech API
const LANG_MAP = {
en: 'en-IN',
hi: 'hi-IN',
mr: 'mr-IN',
kn: 'kn-IN',
ta: 'ta-IN',
te: 'te-IN',
};
import { useNavigate } from 'react-router-dom';
export function WhisperProvider({ children }) {
const navigate = useNavigate();
const { t } = useTranslation();
const { whisperMode, language: activeLang } = useUser();
const [isListening, setIsListening] = useState(false);
const [ariaText, setAriaText] = useState('');
// Use refs for values we need inside event callbacks so we don't
// re-attach listeners on every re-render (avoids the crash).
const whisperModeRef = useRef(whisperMode);
const activeLangRef = useRef(activeLang);
const handlerRef = useRef(null);
const isListeningRef = useRef(false);
const recognitionRef = useRef(null);
const audioCtxRef = useRef(null);
const spaceHoldTimerRef = useRef(null);
const lastTapTimeRef = useRef(0);
const currentSummaryRef = useRef("");
useEffect(() => { activeLangRef.current = activeLang; }, [activeLang]);
// ── Audio beep ────────────────────────────────────────────────────────────────
const playBeep = useCallback((freq = 880, durationMs = 150) => {
try {
if (!audioCtxRef.current) {
audioCtxRef.current = new (window.AudioContext || window.webkitAudioContext)();
}
const ctx = audioCtxRef.current;
const osc = ctx.createOscillator();
const gain = ctx.createGain();
osc.connect(gain);
gain.connect(ctx.destination);
osc.type = 'sine';
osc.frequency.value = freq;
gain.gain.setValueAtTime(0.25, ctx.currentTime);
gain.gain.exponentialRampToValueAtTime(0.001, ctx.currentTime + durationMs / 1000);
osc.start(ctx.currentTime);
osc.stop(ctx.currentTime + durationMs / 1000);
} catch (_) { /* AudioContext may need user gesture */ }
}, []);
// ── Speech Synthesis ──────────────────────────────────────────────────────────
const speak = useCallback((text, onEnd) => {
if (!text || typeof window === 'undefined' || !window.speechSynthesis) return;
window.speechSynthesis.cancel();
const utter = new SpeechSynthesisUtterance(text);
utter.lang = LANG_MAP[activeLangRef.current] || 'en-IN';
utter.rate = 0.95;
utter.pitch = 1.0;
if (onEnd) utter.onend = onEnd;
window.speechSynthesis.speak(utter);
setAriaText(text);
}, []); // stable — reads lang from ref
// Keep refs in sync with latest prop values and announce immediately when enabled
useEffect(() => {
whisperModeRef.current = whisperMode;
if (whisperMode && currentSummaryRef.current) {
speak(currentSummaryRef.current);
}
}, [whisperMode, speak]);
// ── Start SpeechRecognition ───────────────────────────────────────────────────
// Defined as a stable ref-based function so keyboard handlers never go stale
const startListening = useCallback(() => {
if (!whisperModeRef.current) return;
if (isListeningRef.current) return; // already listening
const SpeechRec = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRec) {
speak('Sorry, your browser does not support voice recognition.');
return;
}
window.speechSynthesis?.cancel();
playBeep(880, 120);
const rec = new SpeechRec();
rec.lang = LANG_MAP[activeLangRef.current] || 'en-IN';
rec.continuous = false;
rec.interimResults = false;
rec.maxAlternatives = 1;
recognitionRef.current = rec;
rec.onstart = () => {
isListeningRef.current = true;
setIsListening(true);
};
rec.onend = () => {
isListeningRef.current = false;
setIsListening(false);
recognitionRef.current = null;
};
rec.onerror = (evt) => {
isListeningRef.current = false;
setIsListening(false);
recognitionRef.current = null;
if (evt.error !== 'aborted' && evt.error !== 'no-speech') {
playBeep(220, 200);
speak(t('whispermode.recognition_error'));
}
};
rec.onresult = (evt) => {
const transcript = (evt.results[0]?.[0]?.transcript || '').trim();
if (transcript) {
const lc = transcript.toLowerCase();
playBeep(440, 80);
// Global Navigation Commands
if (/go to profile|go profile|open profile|प्रोफाइल|प्रोफ़ाइल|ಪ್ರೊಫൈಲ್|சுயவிவரம்|ప్రొఫైల్/.test(lc)) {
navigate('/profile');
return;
}
if (/go to home|go to dashboard|go home|dashboard|मुख्य पृष्ठ|घर|ಹೋಮ್|முகப்பு|హోమ్/.test(lc)) {
navigate('/home');
return;
}
if (/go to goalpath|go to goal path|बचत योजना|लक्ष्य|ಬಚತ್|கோல்பாத்|గోల్పాత్/.test(lc)) {
navigate('/goalpath');
return;
}
if (/go to financelens|go to finance lens|वित्त|फाइनेंस|ಫೈನಾನ್ಸ್|நிதி|ఫైనాన్స్/.test(lc)) {
navigate('/financelens');
return;
}
if (/go to schemeseeker|go to scheme seeker|योजना|ಯೋಜನೆ|திட்டம்|పథకం/.test(lc)) {
navigate('/schemeseeker');
return;
}
if (/go to fraulert|go to fraudalert|go to fraud alert|घोटाला|धोखा|ಮೋಸ|மோசடி|మోసం|scam/.test(lc)) {
navigate('/fraulert');
return;
}
if (/go to mandimitra|go to mandi mitra|मंडी|மண்டி|మండి/.test(lc)) {
navigate('/mandimitra');
return;
}
if (/go to benefactor|tax|कर|टैक्स|வரி|పన్ను/.test(lc)) {
navigate('/benefactor');
return;
}
if (handlerRef.current) {
handlerRef.current(transcript);
}
}
};
try { rec.start(); } catch (_) { /* recognition already started */ }
}, [speak, playBeep, t]); // stable — reads whisperMode + isListening from refs
// ── Register a page-specific handler + announce the page ─────────────────────
const registerHandler = useCallback((handler, summary) => {
handlerRef.current = handler;
currentSummaryRef.current = summary || "";
if (whisperModeRef.current && summary) {
const tid = setTimeout(() => speak(summary), 700);
return () => clearTimeout(tid);
}
}, [speak]); // stable
const unregisterHandler = useCallback(() => {
handlerRef.current = null;
window.speechSynthesis?.cancel();
recognitionRef.current?.abort();
recognitionRef.current = null;
isListeningRef.current = false;
setIsListening(false);
}, []);
// ── Global keyboard listener (spacebar hold — desktop) ────────────────────────
// Attached once on mount; reads all state from refs to avoid stale closures.
useEffect(() => {
const onKeyDown = (e) => {
if (!whisperModeRef.current) return;
if (e.code !== 'Space' || e.repeat) return;
const active = document.activeElement;
if (
active &&
(active.tagName === 'INPUT' ||
active.tagName === 'TEXTAREA' ||
active.isContentEditable)
) return;
e.preventDefault();
spaceHoldTimerRef.current = setTimeout(() => startListening(), 250);
};
const onKeyUp = (e) => {
if (e.code !== 'Space') return;
if (spaceHoldTimerRef.current) {
clearTimeout(spaceHoldTimerRef.current);
spaceHoldTimerRef.current = null;
}
};
window.addEventListener('keydown', onKeyDown);
window.addEventListener('keyup', onKeyUp);
return () => {
window.removeEventListener('keydown', onKeyDown);
window.removeEventListener('keyup', onKeyUp);
};
}, [startListening]); // startListening is stable — attached once
// ── Mobile double-tap listener ────────────────────────────────────────────────
useEffect(() => {
const onTouchEnd = (e) => {
if (!whisperModeRef.current) return;
const active = document.activeElement;
if (active && (active.tagName === 'INPUT' || active.tagName === 'TEXTAREA')) return;
const now = Date.now();
if (now - lastTapTimeRef.current < 350) {
e.preventDefault();
startListening();
}
lastTapTimeRef.current = now;
};
window.addEventListener('touchend', onTouchEnd, { passive: false });
return () => window.removeEventListener('touchend', onTouchEnd);
}, [startListening]); // startListening is stable
// ── Cleanup on unmount ────────────────────────────────────────────────────────
useEffect(() => {
return () => {
window.speechSynthesis?.cancel();
recognitionRef.current?.abort();
if (spaceHoldTimerRef.current) clearTimeout(spaceHoldTimerRef.current);
};
}, []);
return (
<WhisperContext.Provider value={{ speak, registerHandler, unregisterHandler, isListening, whisperMode }}>
{children}
{/* ARIA live region — screen readers pick this up automatically */}
<div
aria-live="assertive"
aria-atomic="true"
style={{ position: 'absolute', left: '-9999px', width: '1px', height: '1px', overflow: 'hidden' }}
>
{ariaText}
</div>
{/* Floating listening indicator */}
{isListening && (
<div
style={{
position: 'fixed',
bottom: '28px',
right: '28px',
zIndex: 99999,
background: 'rgba(7, 20, 11, 0.93)',
backdropFilter: 'blur(16px)',
border: '1.5px solid rgba(74, 222, 128, 0.55)',
borderRadius: '20px',
padding: '14px 22px',
display: 'flex',
alignItems: 'center',
gap: '10px',
boxShadow: '0 8px 32px rgba(0,0,0,0.45), 0 0 0 4px rgba(74,222,128,0.12)',
color: '#4ade80',
fontFamily: 'Outfit, sans-serif',
fontSize: '15px',
fontWeight: 600,
}}
>
<Mic
size={18}
style={{ animation: 'pulse 1s ease-in-out infinite', filter: 'drop-shadow(0 0 6px #4ade80)' }}
/>
Listening...
</div>
)}
</WhisperContext.Provider>
);
}
export function useWhisper() {
const ctx = useContext(WhisperContext);
if (!ctx) throw new Error('useWhisper must be used within a WhisperProvider');
return ctx;
}
export default WhisperContext;
lang-js