How to implement a global voice navigation system in React using the Web Speech API and Context API?

Question

I'm building a React application with a global voice navigation feature using the Web Speech API. I'm managing speech recognition, speech synthesis, keyboard shortcuts, and page-specific voice commands through a React Context provider.

I also support multiple languages and use refs to avoid stale closures in event listeners.

Is this a good approach, or is there a better way to structure this implementation?

// WhisperContext.jsx — Global voice navigation system for WhisperMode
// Uses stable refs throughout to avoid stale closure crashes on state changes.
import { createContext, useContext, useState, useEffect, useRef, useCallback } from 'react';
import { useTranslation } from 'react-i18next';
import { useUser } from './UserContext.jsx';
import { Mic } from 'lucide-react';
const WhisperContext = createContext(null);
// Maps locale code → BCP-47 language tag for Web Speech API
const LANG_MAP = {
 en: 'en-IN',
 hi: 'hi-IN',
 mr: 'mr-IN',
 kn: 'kn-IN',
 ta: 'ta-IN',
 te: 'te-IN',
};
import { useNavigate } from 'react-router-dom';
export function WhisperProvider({ children }) {
 const navigate = useNavigate();
 const { t } = useTranslation();
 const { whisperMode, language: activeLang } = useUser();
 const [isListening, setIsListening] = useState(false);
 const [ariaText, setAriaText] = useState('');
 // Use refs for values we need inside event callbacks so we don't
 // re-attach listeners on every re-render (avoids the crash).
 const whisperModeRef = useRef(whisperMode);
 const activeLangRef = useRef(activeLang);
 const handlerRef = useRef(null);
 const isListeningRef = useRef(false);
 const recognitionRef = useRef(null);
 const audioCtxRef = useRef(null);
 const spaceHoldTimerRef = useRef(null);
 const lastTapTimeRef = useRef(0);
 const currentSummaryRef = useRef("");
 useEffect(() => { activeLangRef.current = activeLang; }, [activeLang]);
 // ── Audio beep ────────────────────────────────────────────────────────────────
 const playBeep = useCallback((freq = 880, durationMs = 150) => {
 try {
 if (!audioCtxRef.current) {
 audioCtxRef.current = new (window.AudioContext || window.webkitAudioContext)();
 }
 const ctx = audioCtxRef.current;
 const osc = ctx.createOscillator();
 const gain = ctx.createGain();
 osc.connect(gain);
 gain.connect(ctx.destination);
 osc.type = 'sine';
 osc.frequency.value = freq;
 gain.gain.setValueAtTime(0.25, ctx.currentTime);
 gain.gain.exponentialRampToValueAtTime(0.001, ctx.currentTime + durationMs / 1000);
 osc.start(ctx.currentTime);
 osc.stop(ctx.currentTime + durationMs / 1000);
 } catch (_) { /* AudioContext may need user gesture */ }
 }, []);
 // ── Speech Synthesis ──────────────────────────────────────────────────────────
 const speak = useCallback((text, onEnd) => {
 if (!text || typeof window === 'undefined' || !window.speechSynthesis) return;
 window.speechSynthesis.cancel();
 const utter = new SpeechSynthesisUtterance(text);
 utter.lang = LANG_MAP[activeLangRef.current] || 'en-IN';
 utter.rate = 0.95;
 utter.pitch = 1.0;
 if (onEnd) utter.onend = onEnd;
 window.speechSynthesis.speak(utter);
 setAriaText(text);
 }, []); // stable — reads lang from ref
 // Keep refs in sync with latest prop values and announce immediately when enabled
 useEffect(() => {
 whisperModeRef.current = whisperMode;
 if (whisperMode && currentSummaryRef.current) {
 speak(currentSummaryRef.current);
 }
 }, [whisperMode, speak]);
 // ── Start SpeechRecognition ───────────────────────────────────────────────────
 // Defined as a stable ref-based function so keyboard handlers never go stale
 const startListening = useCallback(() => {
 if (!whisperModeRef.current) return;
 if (isListeningRef.current) return; // already listening
 const SpeechRec = window.SpeechRecognition || window.webkitSpeechRecognition;
 if (!SpeechRec) {
 speak('Sorry, your browser does not support voice recognition.');
 return;
 }
 window.speechSynthesis?.cancel();
 playBeep(880, 120);
 const rec = new SpeechRec();
 rec.lang = LANG_MAP[activeLangRef.current] || 'en-IN';
 rec.continuous = false;
 rec.interimResults = false;
 rec.maxAlternatives = 1;
 recognitionRef.current = rec;
 rec.onstart = () => {
 isListeningRef.current = true;
 setIsListening(true);
 };
 rec.onend = () => {
 isListeningRef.current = false;
 setIsListening(false);
 recognitionRef.current = null;
 };
 rec.onerror = (evt) => {
 isListeningRef.current = false;
 setIsListening(false);
 recognitionRef.current = null;
 if (evt.error !== 'aborted' && evt.error !== 'no-speech') {
 playBeep(220, 200);
 speak(t('whispermode.recognition_error'));
 }
 };
 rec.onresult = (evt) => {
 const transcript = (evt.results[0]?.[0]?.transcript || '').trim();
 if (transcript) {
 const lc = transcript.toLowerCase();
 playBeep(440, 80);
 // Global Navigation Commands
 if (/go to profile|go profile|open profile|प्रोफाइल|प्रोफ़ाइल|ಪ್ರೊಫൈಲ್|சுயவிவரம்|ప్రొఫైల్/.test(lc)) {
 navigate('/profile');
 return;
 }
 if (/go to home|go to dashboard|go home|dashboard|मुख्य पृष्ठ|घर|ಹೋಮ್|முகப்பு|హోమ్/.test(lc)) {
 navigate('/home');
 return;
 }
 if (/go to goalpath|go to goal path|बचत योजना|लक्ष्य|ಬಚತ್|கோல்பாத்|గోల్‌పాత్/.test(lc)) {
 navigate('/goalpath');
 return;
 }
 if (/go to financelens|go to finance lens|वित्त|फाइनेंस|ಫೈನಾನ್ಸ್|நிதி|ఫైనాన్స్/.test(lc)) {
 navigate('/financelens');
 return;
 }
 if (/go to schemeseeker|go to scheme seeker|योजना|ಯೋಜನೆ|திட்டம்|పథకం/.test(lc)) {
 navigate('/schemeseeker');
 return;
 }
 if (/go to fraulert|go to fraudalert|go to fraud alert|घोटाला|धोखा|ಮೋಸ|மோசடி|మోసం|scam/.test(lc)) {
 navigate('/fraulert');
 return;
 }
 if (/go to mandimitra|go to mandi mitra|मंडी|மண்டி|మండి/.test(lc)) {
 navigate('/mandimitra');
 return;
 }
 if (/go to benefactor|tax|कर|टैक्स|வரி|పన్ను/.test(lc)) {
 navigate('/benefactor');
 return;
 }
 if (handlerRef.current) {
 handlerRef.current(transcript);
 }
 }
 };
 try { rec.start(); } catch (_) { /* recognition already started */ }
 }, [speak, playBeep, t]); // stable — reads whisperMode + isListening from refs
 // ── Register a page-specific handler + announce the page ─────────────────────
 const registerHandler = useCallback((handler, summary) => {
 handlerRef.current = handler;
 currentSummaryRef.current = summary || "";
 if (whisperModeRef.current && summary) {
 const tid = setTimeout(() => speak(summary), 700);
 return () => clearTimeout(tid);
 }
 }, [speak]); // stable
 const unregisterHandler = useCallback(() => {
 handlerRef.current = null;
 window.speechSynthesis?.cancel();
 recognitionRef.current?.abort();
 recognitionRef.current = null;
 isListeningRef.current = false;
 setIsListening(false);
 }, []);
 // ── Global keyboard listener (spacebar hold — desktop) ────────────────────────
 // Attached once on mount; reads all state from refs to avoid stale closures.
 useEffect(() => {
 const onKeyDown = (e) => {
 if (!whisperModeRef.current) return;
 if (e.code !== 'Space' || e.repeat) return;
 const active = document.activeElement;
 if (
 active &&
 (active.tagName === 'INPUT' ||
 active.tagName === 'TEXTAREA' ||
 active.isContentEditable)
 ) return;
 e.preventDefault();
 spaceHoldTimerRef.current = setTimeout(() => startListening(), 250);
 };
 const onKeyUp = (e) => {
 if (e.code !== 'Space') return;
 if (spaceHoldTimerRef.current) {
 clearTimeout(spaceHoldTimerRef.current);
 spaceHoldTimerRef.current = null;
 }
 };
 window.addEventListener('keydown', onKeyDown);
 window.addEventListener('keyup', onKeyUp);
 return () => {
 window.removeEventListener('keydown', onKeyDown);
 window.removeEventListener('keyup', onKeyUp);
 };
 }, [startListening]); // startListening is stable — attached once
 // ── Mobile double-tap listener ────────────────────────────────────────────────
 useEffect(() => {
 const onTouchEnd = (e) => {
 if (!whisperModeRef.current) return;
 const active = document.activeElement;
 if (active && (active.tagName === 'INPUT' || active.tagName === 'TEXTAREA')) return;
 const now = Date.now();
 if (now - lastTapTimeRef.current < 350) {
 e.preventDefault();
 startListening();
 }
 lastTapTimeRef.current = now;
 };
 window.addEventListener('touchend', onTouchEnd, { passive: false });
 return () => window.removeEventListener('touchend', onTouchEnd);
 }, [startListening]); // startListening is stable
 // ── Cleanup on unmount ────────────────────────────────────────────────────────
 useEffect(() => {
 return () => {
 window.speechSynthesis?.cancel();
 recognitionRef.current?.abort();
 if (spaceHoldTimerRef.current) clearTimeout(spaceHoldTimerRef.current);
 };
 }, []);
 return (
 <WhisperContext.Provider value={{ speak, registerHandler, unregisterHandler, isListening, whisperMode }}>
 {children}
 {/* ARIA live region — screen readers pick this up automatically */}
 <div
 aria-live="assertive"
 aria-atomic="true"
 style={{ position: 'absolute', left: '-9999px', width: '1px', height: '1px', overflow: 'hidden' }}
 >
 {ariaText}
 </div>
 {/* Floating listening indicator */}
 {isListening && (
 <div
 style={{
 position: 'fixed',
 bottom: '28px',
 right: '28px',
 zIndex: 99999,
 background: 'rgba(7, 20, 11, 0.93)',
 backdropFilter: 'blur(16px)',
 border: '1.5px solid rgba(74, 222, 128, 0.55)',
 borderRadius: '20px',
 padding: '14px 22px',
 display: 'flex',
 alignItems: 'center',
 gap: '10px',
 boxShadow: '0 8px 32px rgba(0,0,0,0.45), 0 0 0 4px rgba(74,222,128,0.12)',
 color: '#4ade80',
 fontFamily: 'Outfit, sans-serif',
 fontSize: '15px',
 fontWeight: 600,
 }}
 >
 <Mic
 size={18}
 style={{ animation: 'pulse 1s ease-in-out infinite', filter: 'drop-shadow(0 0 6px #4ade80)' }}
 />
 Listening...
 </div>
 )}
 </WhisperContext.Provider>
 );
}
export function useWhisper() {
 const ctx = useContext(WhisperContext);
 if (!ctx) throw new Error('useWhisper must be used within a WhisperProvider');
 return ctx;
}
export default WhisperContext;

CollectivesTM on Stack Overflow

How to implement a global voice navigation system in React using the Web Speech API and Context API?