From 8bc63b1ffd55dc03ab3b48411845c93e1d51c254 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月14日 20:41:12 +0000
Subject: [PATCH 1/8] Feature with Unit Test
---
 src/google/adk/agents/llm_agent.py &#124; 16 +++
 src/google/adk/agents/llm_agent_config.py &#124; 4 +
 src/google/adk/flows/llm_flows/basic.py &#124; 7 +-
 .../unittests/agents/test_llm_agent_fields.py &#124; 26 ++++
 .../flows/llm_flows/test_basic_processor.py &#124; 126 +++++++++++++++++-
 5 files changed, 175 insertions(+), 4 deletions(-)
diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py
index c143568252..40146d0ee8 100644
--- a/src/google/adk/agents/llm_agent.py
+++ b/src/google/adk/agents/llm_agent.py
@@ -263,6 +263,9 @@ class LlmAgent(BaseAgent):
 settings, etc.
 """
 
+ speech_config: Optional[types.SpeechConfig] = None
+ """The agent's speech configurations."""
+
 # LLM-based agent transfer configs - Start
 disallow_transfer_to_parent: bool = False
 """Disallows LLM-controlled transferring to the parent agent.
@@ -697,6 +700,7 @@ def __maybe_save_output_to_state(self, event: Event):
 @model_validator(mode='after')
 def __model_validator_after(self) -> LlmAgent:
 self.__check_output_schema()
+ self.__check_speech_config()
 return self
 
 def __check_output_schema(self):
@@ -722,6 +726,16 @@ def __check_output_schema(self):
 ' sub_agents must be empty to disable agent transfer.'
 )
 
+ def __check_speech_config(self):
+ if self.speech_config:
+ logger.warning(
+ 'Agent %s has a speech_config set. This configuration is only'
+ ' effective when using the agent in a live/streaming mode'
+ ' (e.g., via run_live) and with a model that supports speech'
+ ' input/output.',
+ self.name,
+ )
+
 @field_validator('generate_content_config', mode='after')
 @classmethod
 def validate_generate_content_config(
@@ -851,6 +865,8 @@ def _parse_config(
 )
 if config.generate_content_config:
 kwargs['generate_content_config'] = config.generate_content_config
+ if config.speech_config:
+ kwargs['speech_config'] = config.speech_config
 
 return kwargs
 
diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py
index 4203a5923b..4214da879a 100644
--- a/src/google/adk/agents/llm_agent_config.py
+++ b/src/google/adk/agents/llm_agent_config.py
@@ -188,3 +188,7 @@ class LlmAgentConfig(BaseAgentConfig):
 generate_content_config: Optional[types.GenerateContentConfig] = Field(
 default=None, description='Optional. LlmAgent.generate_content_config.'
 )
+
+ speech_config: Optional[types.SpeechConfig] = Field(
+ default=None, description='Optional. LlmAgent.speech_config'
+ )
diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
index 013c7ad054..b946112d18 100644
--- a/src/google/adk/flows/llm_flows/basic.py
+++ b/src/google/adk/flows/llm_flows/basic.py
@@ -58,9 +58,12 @@ async def run_async(
 llm_request.live_connect_config.response_modalities = (
 invocation_context.run_config.response_modalities
 )
- llm_request.live_connect_config.speech_config = (
- invocation_context.run_config.speech_config
+
+ speech_config_to_use = (
+ agent.speech_config or invocation_context.run_config.speech_config
 )
+ llm_request.live_connect_config.speech_config = speech_config_to_use
+
 llm_request.live_connect_config.output_audio_transcription = (
 invocation_context.run_config.output_audio_transcription
 )
diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py
index 5540e55b0d..bc8b387f9c 100644
--- a/tests/unittests/agents/test_llm_agent_fields.py
+++ b/tests/unittests/agents/test_llm_agent_fields.py
@@ -167,6 +167,32 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str:
 assert bypass_state_injection
 
 
+def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture):
+ with caplog.at_level('WARNING'):
+
+ agent = LlmAgent(
+ name='test_agent',
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name='Kore',
+ )
+ )
+ ),
+ )
+
+ assert agent.speech_config is not None
+ assert (
+ agent.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == 'Kore'
+ )
+ assert (
+ 'Agent test_agent has a speech_config set.' in caplog.text
+ and 'only effective when using the agent in a live/streaming mode'
+ in caplog.text
+ )
+
+
 def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture):
 with caplog.at_level('WARNING'):
 
diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py
index 770f358949..773614b6d6 100644
--- a/tests/unittests/flows/llm_flows/test_basic_processor.py
+++ b/tests/unittests/flows/llm_flows/test_basic_processor.py
@@ -21,6 +21,7 @@
 from google.adk.models.llm_request import LlmRequest
 from google.adk.sessions.in_memory_session_service import InMemorySessionService
 from google.adk.tools.function_tool import FunctionTool
+from google.genai import types
 from pydantic import BaseModel
 from pydantic import Field
 import pytest
@@ -38,7 +39,9 @@ def dummy_tool(query: str) -> str:
 return f'Result: {query}'
 
 
-async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
+async def _create_invocation_context(
+ agent: LlmAgent, run_config: RunConfig = RunConfig()
+) -> InvocationContext:
 """Helper to create InvocationContext for testing."""
 session_service = InMemorySessionService()
 session = await session_service.create_session(
@@ -49,7 +52,7 @@ async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
 agent=agent,
 session=session,
 session_service=session_service,
- run_config=RunConfig(),
+ run_config=run_config,
 )
 
 
@@ -143,3 +146,122 @@ async def test_sets_model_name(self):
 
 # Should have set the model name
 assert llm_request.model == 'gemini-1.5-flash'
+
+ @pytest.mark.asyncio
+ async def test_speech_config_agent_overrides_run_config(self):
+ """Tests that agent's speech_config is prioritized over the RunConfig's."""
+ agent_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name='Kore',
+ )
+ )
+ )
+ run_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name='Puck',
+ )
+ )
+ )
+
+ agent = LlmAgent(
+ name='test_agent',
+ model='gemini-1.5-flash',
+ speech_config=agent_speech_config,
+ )
+ run_config = RunConfig(speech_config=run_speech_config)
+ invocation_context = await _create_invocation_context(agent, run_config)
+ llm_request = LlmRequest()
+ processor = _BasicLlmRequestProcessor()
+
+ # Process the request
+ async for _ in processor.run_async(invocation_context, llm_request):
+ pass
+
+ # Assert that the agent's override was used
+ assert llm_request.live_connect_config.speech_config == agent_speech_config
+ assert (
+ llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == 'Kore'
+ )
+
+ @pytest.mark.asyncio
+ async def test_speech_config_uses_agent_as_fallback(self):
+ """Tests that the agent's speech_config is used when RunConfig's is None."""
+ agent_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name='Kore',
+ )
+ )
+ )
+
+ agent = LlmAgent(
+ name='test_agent',
+ model='gemini-1.5-flash',
+ speech_config=agent_speech_config,
+ )
+ run_config = RunConfig(speech_config=None) # No runtime config
+ invocation_context = await _create_invocation_context(agent, run_config)
+ llm_request = LlmRequest()
+ processor = _BasicLlmRequestProcessor()
+
+ # Process the request
+ async for _ in processor.run_async(invocation_context, llm_request):
+ pass
+
+ # Assert that the agent's config was used as a fallback
+ assert llm_request.live_connect_config.speech_config == agent_speech_config
+ assert (
+ llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == 'Kore'
+ )
+
+ @pytest.mark.asyncio
+ async def test_speech_config_uses_run_config_when_agent_is_none(self):
+ """Tests that RunConfig's speech_config is used when the agent's is None."""
+ run_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name='Puck',
+ )
+ )
+ )
+
+ agent = LlmAgent(
+ name='test_agent', model='gemini-1.5-flash', speech_config=None
+ ) # No agent config
+ run_config = RunConfig(speech_config=run_speech_config)
+ invocation_context = await _create_invocation_context(agent, run_config)
+ llm_request = LlmRequest()
+ processor = _BasicLlmRequestProcessor()
+
+ # Process the request
+ async for _ in processor.run_async(invocation_context, llm_request):
+ pass
+
+ # Assert that the runtime config was used
+ assert llm_request.live_connect_config.speech_config == run_speech_config
+ assert (
+ llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == 'Puck'
+ )
+
+ @pytest.mark.asyncio
+ async def test_speech_config_is_none_when_both_are_none(self):
+ """Tests that speech_config is None when neither agent nor RunConfig has it."""
+ agent = LlmAgent(
+ name='test_agent', model='gemini-1.5-flash', speech_config=None
+ )
+ run_config = RunConfig(speech_config=None) # No runtime config
+ invocation_context = await _create_invocation_context(agent, run_config)
+ llm_request = LlmRequest()
+ processor = _BasicLlmRequestProcessor()
+
+ # Process the request
+ async for _ in processor.run_async(invocation_context, llm_request):
+ pass
+
+ # Assert that the final config is None
+ assert llm_request.live_connect_config.speech_config is None
From 02d9ab3cfd07f780e74321c65ebef70b0af0f269 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月14日 23:18:25 +0000
Subject: [PATCH 2/8] Added voices to sample
---
 .../live_bidi_streaming_multi_agent/agent.py &#124; 25 +++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/contributing/samples/live_bidi_streaming_multi_agent/agent.py b/contributing/samples/live_bidi_streaming_multi_agent/agent.py
index 413e33a727..defc95ff50 100644
--- a/contributing/samples/live_bidi_streaming_multi_agent/agent.py
+++ b/contributing/samples/live_bidi_streaming_multi_agent/agent.py
@@ -42,6 +42,13 @@ def roll_die(sides: int) -> int:
 ),
 ]
 ),
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Kore",
+ )
+ )
+ ),
 )
 
 
@@ -85,6 +92,13 @@ def check_prime(nums: list[int]) -> str:
 ),
 ]
 ),
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Puck",
+ )
+ )
+ ),
 )
 
 
@@ -100,8 +114,8 @@ def get_current_weather(location: str):
 
 root_agent = Agent(
 # find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/
- model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
- # model="gemini-live-2.5-flash-preview", # for AI studio key
+ # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
+ model="gemini-live-2.5-flash-preview", # for AI studio key
 name="root_agent",
 instruction="""
 You are a helpful assistant that can check time, roll dice and check if numbers are prime.
@@ -126,4 +140,11 @@ def get_current_weather(location: str):
 ),
 ]
 ),
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Zephyr",
+ )
+ )
+ ),
 )
From 626e86bc14e17139f3339e829acc0c71c278f3ef Mon Sep 17 00:00:00 2001
From: Hangfei Lin <hangfeilin@gmail.com>
Date: 2025年10月14日 19:39:17 -0700
Subject: [PATCH 3/8] Update run_config.py
---
 src/google/adk/agents/run_config.py &#124; 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py
index 9fe82fabf1..ebe77dcb3c 100644
--- a/src/google/adk/agents/run_config.py
+++ b/src/google/adk/agents/run_config.py
@@ -35,7 +35,10 @@ class StreamingMode(Enum):
 
 
 class RunConfig(BaseModel):
- """Configs for runtime behavior of agents."""
+ """Configs for runtime behavior of agents.
+
+ The configs here will be overriden by agent-spcific configurations.
+ """
 
 model_config = ConfigDict(
 extra='forbid',
From 5c1fc02e46ea934a13aff1e603687c80c4e3b3a1 Mon Sep 17 00:00:00 2001
From: Hangfei Lin <hangfeilin@gmail.com>
Date: 2025年10月15日 12:41:48 -0700
Subject: [PATCH 4/8] Update run_config.py
---
 src/google/adk/agents/run_config.py &#124; 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py
index ebe77dcb3c..890923385c 100644
--- a/src/google/adk/agents/run_config.py
+++ b/src/google/adk/agents/run_config.py
@@ -37,7 +37,7 @@ class StreamingMode(Enum):
 class RunConfig(BaseModel):
 """Configs for runtime behavior of agents.
 
- The configs here will be overriden by agent-spcific configurations.
+ The configs here will be overriden by agent-specific configurations.
 """
 
 model_config = ConfigDict(
From 54cd5a210b868b155f930685094aba971dffaf90 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月15日 22:35:39 +0000
Subject: [PATCH 5/8] Moved to Gemini class; run_config overrides
---
 .../live_bidi_streaming_multi_agent/agent.py &#124; 57 +++++---
 src/google/adk/flows/llm_flows/basic.py &#124; 5 +-
 src/google/adk/models/google_llm.py &#124; 8 +
 .../unittests/agents/test_llm_agent_fields.py &#124; 26 ----
 .../flows/llm_flows/test_basic_processor.py &#124; 126 +---------------
 tests/unittests/models/test_google_llm.py &#124; 138 ++++++++++++++++++
 6 files changed, 184 insertions(+), 176 deletions(-)
diff --git a/contributing/samples/live_bidi_streaming_multi_agent/agent.py b/contributing/samples/live_bidi_streaming_multi_agent/agent.py
index defc95ff50..ddb36b2845 100644
--- a/contributing/samples/live_bidi_streaming_multi_agent/agent.py
+++ b/contributing/samples/live_bidi_streaming_multi_agent/agent.py
@@ -16,6 +16,7 @@
 
 from google.adk.agents.llm_agent import Agent
 from google.adk.examples.example import Example
+from google.adk.models.google_llm import Gemini
 from google.adk.tools.example_tool import ExampleTool
 from google.genai import types
 
@@ -28,6 +29,17 @@ def roll_die(sides: int) -> int:
 
 roll_agent = Agent(
 name="roll_agent",
+ model=Gemini(
+ # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
+ model="gemini-live-2.5-flash-preview", # for AI studio key
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Kore",
+ )
+ )
+ ),
+ ),
 description="Handles rolling dice of different sizes.",
 instruction="""
 You are responsible for rolling dice based on the user's request.
@@ -42,13 +54,6 @@ def roll_die(sides: int) -> int:
 ),
 ]
 ),
- speech_config=types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name="Kore",
- )
- )
- ),
 )
 
 
@@ -76,6 +81,17 @@ def check_prime(nums: list[int]) -> str:
 
 prime_agent = Agent(
 name="prime_agent",
+ model=Gemini(
+ # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
+ model="gemini-live-2.5-flash-preview", # for AI studio key
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Puck",
+ )
+ )
+ ),
+ ),
 description="Handles checking if numbers are prime.",
 instruction="""
 You are responsible for checking whether numbers are prime.
@@ -92,13 +108,6 @@ def check_prime(nums: list[int]) -> str:
 ),
 ]
 ),
- speech_config=types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name="Puck",
- )
- )
- ),
 )
 
 
@@ -114,8 +123,17 @@ def get_current_weather(location: str):
 
 root_agent = Agent(
 # find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/
- # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
- model="gemini-live-2.5-flash-preview", # for AI studio key
+ model=Gemini(
+ # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
+ model="gemini-live-2.5-flash-preview", # for AI studio key
+ speech_config=types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Zephyr",
+ )
+ )
+ ),
+ ),
 name="root_agent",
 instruction="""
 You are a helpful assistant that can check time, roll dice and check if numbers are prime.
@@ -140,11 +158,4 @@ def get_current_weather(location: str):
 ),
 ]
 ),
- speech_config=types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name="Zephyr",
- )
- )
- ),
 )
diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
index c0bc50edfc..789eeb0a56 100644
--- a/src/google/adk/flows/llm_flows/basic.py
+++ b/src/google/adk/flows/llm_flows/basic.py
@@ -59,10 +59,9 @@ async def run_async(
 invocation_context.run_config.response_modalities
 )
 
- speech_config_to_use = (
- agent.speech_config or invocation_context.run_config.speech_config
+ llm_request.live_connect_config.speech_config = (
+ invocation_context.run_config.speech_config
 )
- llm_request.live_connect_config.speech_config = speech_config_to_use
 
 llm_request.live_connect_config.output_audio_transcription = (
 invocation_context.run_config.output_audio_transcription
diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py
index 411162bb0c..bf57562262 100644
--- a/src/google/adk/models/google_llm.py
+++ b/src/google/adk/models/google_llm.py
@@ -60,6 +60,8 @@ class Gemini(BaseLlm):
 
 model: str = 'gemini-2.5-flash'
 
+ speech_config: Optional[types.SpeechConfig] = None
+
 retry_options: Optional[types.HttpRetryOptions] = None
 """Allow Gemini to retry failed responses.
 
@@ -261,6 +263,12 @@ async def connect(self, llm_request: LlmRequest) -> BaseLlmConnection:
 self._live_api_version
 )
 
+ if (
+ llm_request.live_connect_config
+ and llm_request.live_connect_config.speech_config is None
+ ):
+ llm_request.live_connect_config.speech_config = self.speech_config
+
 llm_request.live_connect_config.system_instruction = types.Content(
 role='system',
 parts=[
diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py
index bc8b387f9c..5540e55b0d 100644
--- a/tests/unittests/agents/test_llm_agent_fields.py
+++ b/tests/unittests/agents/test_llm_agent_fields.py
@@ -167,32 +167,6 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str:
 assert bypass_state_injection
 
 
-def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture):
- with caplog.at_level('WARNING'):
-
- agent = LlmAgent(
- name='test_agent',
- speech_config=types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name='Kore',
- )
- )
- ),
- )
-
- assert agent.speech_config is not None
- assert (
- agent.speech_config.voice_config.prebuilt_voice_config.voice_name
- == 'Kore'
- )
- assert (
- 'Agent test_agent has a speech_config set.' in caplog.text
- and 'only effective when using the agent in a live/streaming mode'
- in caplog.text
- )
-
-
 def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture):
 with caplog.at_level('WARNING'):
 
diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py
index 773614b6d6..770f358949 100644
--- a/tests/unittests/flows/llm_flows/test_basic_processor.py
+++ b/tests/unittests/flows/llm_flows/test_basic_processor.py
@@ -21,7 +21,6 @@
 from google.adk.models.llm_request import LlmRequest
 from google.adk.sessions.in_memory_session_service import InMemorySessionService
 from google.adk.tools.function_tool import FunctionTool
-from google.genai import types
 from pydantic import BaseModel
 from pydantic import Field
 import pytest
@@ -39,9 +38,7 @@ def dummy_tool(query: str) -> str:
 return f'Result: {query}'
 
 
-async def _create_invocation_context(
- agent: LlmAgent, run_config: RunConfig = RunConfig()
-) -> InvocationContext:
+async def _create_invocation_context(agent: LlmAgent) -> InvocationContext:
 """Helper to create InvocationContext for testing."""
 session_service = InMemorySessionService()
 session = await session_service.create_session(
@@ -52,7 +49,7 @@ async def _create_invocation_context(
 agent=agent,
 session=session,
 session_service=session_service,
- run_config=run_config,
+ run_config=RunConfig(),
 )
 
 
@@ -146,122 +143,3 @@ async def test_sets_model_name(self):
 
 # Should have set the model name
 assert llm_request.model == 'gemini-1.5-flash'
-
- @pytest.mark.asyncio
- async def test_speech_config_agent_overrides_run_config(self):
- """Tests that agent's speech_config is prioritized over the RunConfig's."""
- agent_speech_config = types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name='Kore',
- )
- )
- )
- run_speech_config = types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name='Puck',
- )
- )
- )
-
- agent = LlmAgent(
- name='test_agent',
- model='gemini-1.5-flash',
- speech_config=agent_speech_config,
- )
- run_config = RunConfig(speech_config=run_speech_config)
- invocation_context = await _create_invocation_context(agent, run_config)
- llm_request = LlmRequest()
- processor = _BasicLlmRequestProcessor()
-
- # Process the request
- async for _ in processor.run_async(invocation_context, llm_request):
- pass
-
- # Assert that the agent's override was used
- assert llm_request.live_connect_config.speech_config == agent_speech_config
- assert (
- llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
- == 'Kore'
- )
-
- @pytest.mark.asyncio
- async def test_speech_config_uses_agent_as_fallback(self):
- """Tests that the agent's speech_config is used when RunConfig's is None."""
- agent_speech_config = types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name='Kore',
- )
- )
- )
-
- agent = LlmAgent(
- name='test_agent',
- model='gemini-1.5-flash',
- speech_config=agent_speech_config,
- )
- run_config = RunConfig(speech_config=None) # No runtime config
- invocation_context = await _create_invocation_context(agent, run_config)
- llm_request = LlmRequest()
- processor = _BasicLlmRequestProcessor()
-
- # Process the request
- async for _ in processor.run_async(invocation_context, llm_request):
- pass
-
- # Assert that the agent's config was used as a fallback
- assert llm_request.live_connect_config.speech_config == agent_speech_config
- assert (
- llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
- == 'Kore'
- )
-
- @pytest.mark.asyncio
- async def test_speech_config_uses_run_config_when_agent_is_none(self):
- """Tests that RunConfig's speech_config is used when the agent's is None."""
- run_speech_config = types.SpeechConfig(
- voice_config=types.VoiceConfig(
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
- voice_name='Puck',
- )
- )
- )
-
- agent = LlmAgent(
- name='test_agent', model='gemini-1.5-flash', speech_config=None
- ) # No agent config
- run_config = RunConfig(speech_config=run_speech_config)
- invocation_context = await _create_invocation_context(agent, run_config)
- llm_request = LlmRequest()
- processor = _BasicLlmRequestProcessor()
-
- # Process the request
- async for _ in processor.run_async(invocation_context, llm_request):
- pass
-
- # Assert that the runtime config was used
- assert llm_request.live_connect_config.speech_config == run_speech_config
- assert (
- llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name
- == 'Puck'
- )
-
- @pytest.mark.asyncio
- async def test_speech_config_is_none_when_both_are_none(self):
- """Tests that speech_config is None when neither agent nor RunConfig has it."""
- agent = LlmAgent(
- name='test_agent', model='gemini-1.5-flash', speech_config=None
- )
- run_config = RunConfig(speech_config=None) # No runtime config
- invocation_context = await _create_invocation_context(agent, run_config)
- llm_request = LlmRequest()
- processor = _BasicLlmRequestProcessor()
-
- # Process the request
- async for _ in processor.run_async(invocation_context, llm_request):
- pass
-
- # Assert that the final config is None
- assert llm_request.live_connect_config.speech_config is None
diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py
index 1b5979bdf9..180e988862 100644
--- a/tests/unittests/models/test_google_llm.py
+++ b/tests/unittests/models/test_google_llm.py
@@ -1858,3 +1858,141 @@ def mock_model_dump(*args, **kwargs):
 # Should still succeed using repr()
 assert "Config:" in log_output
 assert "GenerateContentConfig" in log_output
+
+
+@pytest.mark.asyncio
+async def test_connect_uses_gemini_speech_config_when_request_is_none(
+ gemini_llm, llm_request
+):
+ """Tests that Gemini's speech_config is used when live_connect_config's is None."""
+ # Arrange: Set a speech_config on the Gemini instance with the voice "Kore"
+ gemini_llm.speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Kore",
+ )
+ )
+ )
+ llm_request.live_connect_config = (
+ types.LiveConnectConfig()
+ ) # speech_config is None
+
+ mock_live_session = mock.AsyncMock()
+
+ with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
+
+ class MockLiveConnect:
+
+ async def __aenter__(self):
+ return mock_live_session
+
+ async def __aexit__(self, *args):
+ pass
+
+ mock_live_client.aio.live.connect.return_value = MockLiveConnect()
+
+ # Act
+ async with gemini_llm.connect(llm_request) as connection:
+ # Assert
+ mock_live_client.aio.live.connect.assert_called_once()
+ call_args = mock_live_client.aio.live.connect.call_args
+ config_arg = call_args.kwargs["config"]
+
+ # Verify the speech_config from the Gemini instance was used
+ assert config_arg.speech_config is not None
+ assert (
+ config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == "Kore"
+ )
+ assert isinstance(connection, GeminiLlmConnection)
+
+
+@pytest.mark.asyncio
+async def test_connect_request_speech_config_overrides_gemini_config(
+ gemini_llm, llm_request
+):
+ """Tests that live_connect_config's speech_config is preserved even if Gemini has one."""
+ # Arrange: Set different speech_configs on both the Gemini instance ("Puck") and the request ("Zephyr")
+ gemini_llm.speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Puck",
+ )
+ )
+ )
+ request_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Zephyr",
+ )
+ )
+ )
+ llm_request.live_connect_config = types.LiveConnectConfig(
+ speech_config=request_speech_config
+ )
+
+ mock_live_session = mock.AsyncMock()
+
+ with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
+
+ class MockLiveConnect:
+
+ async def __aenter__(self):
+ return mock_live_session
+
+ async def __aexit__(self, *args):
+ pass
+
+ mock_live_client.aio.live.connect.return_value = MockLiveConnect()
+
+ # Act
+ async with gemini_llm.connect(llm_request) as connection:
+ # Assert
+ mock_live_client.aio.live.connect.assert_called_once()
+ call_args = mock_live_client.aio.live.connect.call_args
+ config_arg = call_args.kwargs["config"]
+
+ # Verify the speech_config from the request ("Zephyr") was preserved and not overwritten
+ assert config_arg.speech_config is not None
+ assert (
+ config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == "Zephyr"
+ )
+ assert isinstance(connection, GeminiLlmConnection)
+
+
+@pytest.mark.asyncio
+async def test_connect_speech_config_remains_none_when_both_are_none(
+ gemini_llm, llm_request
+):
+ """Tests that speech_config is None when neither Gemini nor the request has it."""
+ # Arrange: Ensure both Gemini instance and request have no speech_config
+ gemini_llm.speech_config = None
+ llm_request.live_connect_config = (
+ types.LiveConnectConfig()
+ ) # speech_config is None
+
+ mock_live_session = mock.AsyncMock()
+
+ with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
+
+ class MockLiveConnect:
+
+ async def __aenter__(self):
+ return mock_live_session
+
+ async def __aexit__(self, *args):
+ pass
+
+ mock_live_client.aio.live.connect.return_value = MockLiveConnect()
+
+ # Act
+ async with gemini_llm.connect(llm_request) as connection:
+ # Assert
+ mock_live_client.aio.live.connect.assert_called_once()
+ call_args = mock_live_client.aio.live.connect.call_args
+ config_arg = call_args.kwargs["config"]
+
+ # Verify the final speech_config is still None
+ assert config_arg.speech_config is None
+ assert isinstance(connection, GeminiLlmConnection)
From 38a5494ffb96e704175a64a3ddaeb4346af73ac9 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月15日 22:35:39 +0000
Subject: [PATCH 6/8] Moved to Gemini class; run_config overrides
---
 src/google/adk/agents/llm_agent.py &#124; 16 ----------------
 src/google/adk/agents/llm_agent_config.py &#124; 4 ----
 src/google/adk/flows/llm_flows/basic.py &#124; 2 --
 3 files changed, 22 deletions(-)
diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py
index 40146d0ee8..c143568252 100644
--- a/src/google/adk/agents/llm_agent.py
+++ b/src/google/adk/agents/llm_agent.py
@@ -263,9 +263,6 @@ class LlmAgent(BaseAgent):
 settings, etc.
 """
 
- speech_config: Optional[types.SpeechConfig] = None
- """The agent's speech configurations."""
-
 # LLM-based agent transfer configs - Start
 disallow_transfer_to_parent: bool = False
 """Disallows LLM-controlled transferring to the parent agent.
@@ -700,7 +697,6 @@ def __maybe_save_output_to_state(self, event: Event):
 @model_validator(mode='after')
 def __model_validator_after(self) -> LlmAgent:
 self.__check_output_schema()
- self.__check_speech_config()
 return self
 
 def __check_output_schema(self):
@@ -726,16 +722,6 @@ def __check_output_schema(self):
 ' sub_agents must be empty to disable agent transfer.'
 )
 
- def __check_speech_config(self):
- if self.speech_config:
- logger.warning(
- 'Agent %s has a speech_config set. This configuration is only'
- ' effective when using the agent in a live/streaming mode'
- ' (e.g., via run_live) and with a model that supports speech'
- ' input/output.',
- self.name,
- )
-
 @field_validator('generate_content_config', mode='after')
 @classmethod
 def validate_generate_content_config(
@@ -865,8 +851,6 @@ def _parse_config(
 )
 if config.generate_content_config:
 kwargs['generate_content_config'] = config.generate_content_config
- if config.speech_config:
- kwargs['speech_config'] = config.speech_config
 
 return kwargs
 
diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py
index 4214da879a..4203a5923b 100644
--- a/src/google/adk/agents/llm_agent_config.py
+++ b/src/google/adk/agents/llm_agent_config.py
@@ -188,7 +188,3 @@ class LlmAgentConfig(BaseAgentConfig):
 generate_content_config: Optional[types.GenerateContentConfig] = Field(
 default=None, description='Optional. LlmAgent.generate_content_config.'
 )
-
- speech_config: Optional[types.SpeechConfig] = Field(
- default=None, description='Optional. LlmAgent.speech_config'
- )
diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
index 789eeb0a56..24cc7fd67c 100644
--- a/src/google/adk/flows/llm_flows/basic.py
+++ b/src/google/adk/flows/llm_flows/basic.py
@@ -58,11 +58,9 @@ async def run_async(
 llm_request.live_connect_config.response_modalities = (
 invocation_context.run_config.response_modalities
 )
-
 llm_request.live_connect_config.speech_config = (
 invocation_context.run_config.speech_config
 )
-
 llm_request.live_connect_config.output_audio_transcription = (
 invocation_context.run_config.output_audio_transcription
 )
From 07870c6ebad8ac76e80ac4142ea3a43e855167c0 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月15日 22:52:49 +0000
Subject: [PATCH 7/8] Make agent config override run_config
---
 src/google/adk/models/google_llm.py &#124; 5 +----
 tests/unittests/models/test_google_llm.py &#124; 6 +++---
 2 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py
index bf57562262..b96e56e169 100644
--- a/src/google/adk/models/google_llm.py
+++ b/src/google/adk/models/google_llm.py
@@ -263,10 +263,7 @@ async def connect(self, llm_request: LlmRequest) -> BaseLlmConnection:
 self._live_api_version
 )
 
- if (
- llm_request.live_connect_config
- and llm_request.live_connect_config.speech_config is None
- ):
+ if self.speech_config is not None:
 llm_request.live_connect_config.speech_config = self.speech_config
 
 llm_request.live_connect_config.system_instruction = types.Content(
diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py
index 180e988862..e9cfb3b842 100644
--- a/tests/unittests/models/test_google_llm.py
+++ b/tests/unittests/models/test_google_llm.py
@@ -1908,7 +1908,7 @@ async def __aexit__(self, *args):
 
 
 @pytest.mark.asyncio
-async def test_connect_request_speech_config_overrides_gemini_config(
+async def test_connect_request_gemini_config_overrides_speech_config(
 gemini_llm, llm_request
 ):
 """Tests that live_connect_config's speech_config is preserved even if Gemini has one."""
@@ -1952,11 +1952,11 @@ async def __aexit__(self, *args):
 call_args = mock_live_client.aio.live.connect.call_args
 config_arg = call_args.kwargs["config"]
 
- # Verify the speech_config from the request ("Zephyr") was preserved and not overwritten
+ # Verify the speech_config from the request ("Zephyr") was overwritten by Gemini's speech_config ("Puck")
 assert config_arg.speech_config is not None
 assert (
 config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
- == "Zephyr"
+ == "Puck"
 )
 assert isinstance(connection, GeminiLlmConnection)
 
From 2b4d3ac34f2b63c8ffe4d573216644cb264e1d18 Mon Sep 17 00:00:00 2001
From: Luis Pabon <lpabon@google.com>
Date: 2025年10月15日 22:58:10 +0000
Subject: [PATCH 8/8] Added last case
---
 tests/unittests/models/test_google_llm.py &#124; 48 +++++++++++++++++++++++
 1 file changed, 48 insertions(+)
diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py
index e9cfb3b842..f3356975f5 100644
--- a/tests/unittests/models/test_google_llm.py
+++ b/tests/unittests/models/test_google_llm.py
@@ -1907,6 +1907,54 @@ async def __aexit__(self, *args):
 assert isinstance(connection, GeminiLlmConnection)
 
 
+@pytest.mark.asyncio
+async def test_connect_uses_request_speech_config_when_gemini_is_none(
+ gemini_llm, llm_request
+):
+ """Tests that request's speech_config is used when Gemini's is None."""
+ # Arrange: Set a speech_config on the request instance with the voice "Kore"
+ gemini_llm.speech_config = None
+ request_speech_config = types.SpeechConfig(
+ voice_config=types.VoiceConfig(
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
+ voice_name="Kore",
+ )
+ )
+ )
+ llm_request.live_connect_config = types.LiveConnectConfig(
+ speech_config=request_speech_config
+ )
+
+ mock_live_session = mock.AsyncMock()
+
+ with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
+
+ class MockLiveConnect:
+
+ async def __aenter__(self):
+ return mock_live_session
+
+ async def __aexit__(self, *args):
+ pass
+
+ mock_live_client.aio.live.connect.return_value = MockLiveConnect()
+
+ # Act
+ async with gemini_llm.connect(llm_request) as connection:
+ # Assert
+ mock_live_client.aio.live.connect.assert_called_once()
+ call_args = mock_live_client.aio.live.connect.call_args
+ config_arg = call_args.kwargs["config"]
+
+ # Verify the speech_config from the request instance was used
+ assert config_arg.speech_config is not None
+ assert (
+ config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
+ == "Kore"
+ )
+ assert isinstance(connection, GeminiLlmConnection)
+
+
 @pytest.mark.asyncio
 async def test_connect_request_gemini_config_overrides_speech_config(
 gemini_llm, llm_request
</div><div class="naked_ctrl">
<form action="/index.cgi/larger-text" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://patch-diff.githubusercontent.com/raw/google/adk-python/pull/3170.patch">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://patch-diff.githubusercontent.com/raw/google/adk-python/pull/3170.patch" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast">配色反転</option>
<option value="larger-text" selected="selected">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>