From 8bc63b1ffd55dc03ab3b48411845c93e1d51c254 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月14日 20:41:12 +0000 Subject: [PATCH 1/8] Feature with Unit Test --- src/google/adk/agents/llm_agent.py | 16 +++ src/google/adk/agents/llm_agent_config.py | 4 + src/google/adk/flows/llm_flows/basic.py | 7 +- .../unittests/agents/test_llm_agent_fields.py | 26 ++++ .../flows/llm_flows/test_basic_processor.py | 126 +++++++++++++++++- 5 files changed, 175 insertions(+), 4 deletions(-) diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py index c143568252..40146d0ee8 100644 --- a/src/google/adk/agents/llm_agent.py +++ b/src/google/adk/agents/llm_agent.py @@ -263,6 +263,9 @@ class LlmAgent(BaseAgent): settings, etc. """ + speech_config: Optional[types.SpeechConfig] = None + """The agent's speech configurations.""" + # LLM-based agent transfer configs - Start disallow_transfer_to_parent: bool = False """Disallows LLM-controlled transferring to the parent agent. @@ -697,6 +700,7 @@ def __maybe_save_output_to_state(self, event: Event): @model_validator(mode='after') def __model_validator_after(self) -> LlmAgent: self.__check_output_schema() + self.__check_speech_config() return self def __check_output_schema(self): @@ -722,6 +726,16 @@ def __check_output_schema(self): ' sub_agents must be empty to disable agent transfer.' ) + def __check_speech_config(self): + if self.speech_config: + logger.warning( + 'Agent %s has a speech_config set. This configuration is only' + ' effective when using the agent in a live/streaming mode' + ' (e.g., via run_live) and with a model that supports speech' + ' input/output.', + self.name, + ) + @field_validator('generate_content_config', mode='after') @classmethod def validate_generate_content_config( @@ -851,6 +865,8 @@ def _parse_config( ) if config.generate_content_config: kwargs['generate_content_config'] = config.generate_content_config + if config.speech_config: + kwargs['speech_config'] = config.speech_config return kwargs diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py index 4203a5923b..4214da879a 100644 --- a/src/google/adk/agents/llm_agent_config.py +++ b/src/google/adk/agents/llm_agent_config.py @@ -188,3 +188,7 @@ class LlmAgentConfig(BaseAgentConfig): generate_content_config: Optional[types.GenerateContentConfig] = Field( default=None, description='Optional. LlmAgent.generate_content_config.' ) + + speech_config: Optional[types.SpeechConfig] = Field( + default=None, description='Optional. LlmAgent.speech_config' + ) diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py index 013c7ad054..b946112d18 100644 --- a/src/google/adk/flows/llm_flows/basic.py +++ b/src/google/adk/flows/llm_flows/basic.py @@ -58,9 +58,12 @@ async def run_async( llm_request.live_connect_config.response_modalities = ( invocation_context.run_config.response_modalities ) - llm_request.live_connect_config.speech_config = ( - invocation_context.run_config.speech_config + + speech_config_to_use = ( + agent.speech_config or invocation_context.run_config.speech_config ) + llm_request.live_connect_config.speech_config = speech_config_to_use + llm_request.live_connect_config.output_audio_transcription = ( invocation_context.run_config.output_audio_transcription ) diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py index 5540e55b0d..bc8b387f9c 100644 --- a/tests/unittests/agents/test_llm_agent_fields.py +++ b/tests/unittests/agents/test_llm_agent_fields.py @@ -167,6 +167,32 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str: assert bypass_state_injection +def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture): + with caplog.at_level('WARNING'): + + agent = LlmAgent( + name='test_agent', + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Kore', + ) + ) + ), + ) + + assert agent.speech_config is not None + assert ( + agent.speech_config.voice_config.prebuilt_voice_config.voice_name + == 'Kore' + ) + assert ( + 'Agent test_agent has a speech_config set.' in caplog.text + and 'only effective when using the agent in a live/streaming mode' + in caplog.text + ) + + def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture): with caplog.at_level('WARNING'): diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py index 770f358949..773614b6d6 100644 --- a/tests/unittests/flows/llm_flows/test_basic_processor.py +++ b/tests/unittests/flows/llm_flows/test_basic_processor.py @@ -21,6 +21,7 @@ from google.adk.models.llm_request import LlmRequest from google.adk.sessions.in_memory_session_service import InMemorySessionService from google.adk.tools.function_tool import FunctionTool +from google.genai import types from pydantic import BaseModel from pydantic import Field import pytest @@ -38,7 +39,9 @@ def dummy_tool(query: str) -> str: return f'Result: {query}' -async def _create_invocation_context(agent: LlmAgent) -> InvocationContext: +async def _create_invocation_context( + agent: LlmAgent, run_config: RunConfig = RunConfig() +) -> InvocationContext: """Helper to create InvocationContext for testing.""" session_service = InMemorySessionService() session = await session_service.create_session( @@ -49,7 +52,7 @@ async def _create_invocation_context(agent: LlmAgent) -> InvocationContext: agent=agent, session=session, session_service=session_service, - run_config=RunConfig(), + run_config=run_config, ) @@ -143,3 +146,122 @@ async def test_sets_model_name(self): # Should have set the model name assert llm_request.model == 'gemini-1.5-flash' + + @pytest.mark.asyncio + async def test_speech_config_agent_overrides_run_config(self): + """Tests that agent's speech_config is prioritized over the RunConfig's.""" + agent_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Kore', + ) + ) + ) + run_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Puck', + ) + ) + ) + + agent = LlmAgent( + name='test_agent', + model='gemini-1.5-flash', + speech_config=agent_speech_config, + ) + run_config = RunConfig(speech_config=run_speech_config) + invocation_context = await _create_invocation_context(agent, run_config) + llm_request = LlmRequest() + processor = _BasicLlmRequestProcessor() + + # Process the request + async for _ in processor.run_async(invocation_context, llm_request): + pass + + # Assert that the agent's override was used + assert llm_request.live_connect_config.speech_config == agent_speech_config + assert ( + llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name + == 'Kore' + ) + + @pytest.mark.asyncio + async def test_speech_config_uses_agent_as_fallback(self): + """Tests that the agent's speech_config is used when RunConfig's is None.""" + agent_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Kore', + ) + ) + ) + + agent = LlmAgent( + name='test_agent', + model='gemini-1.5-flash', + speech_config=agent_speech_config, + ) + run_config = RunConfig(speech_config=None) # No runtime config + invocation_context = await _create_invocation_context(agent, run_config) + llm_request = LlmRequest() + processor = _BasicLlmRequestProcessor() + + # Process the request + async for _ in processor.run_async(invocation_context, llm_request): + pass + + # Assert that the agent's config was used as a fallback + assert llm_request.live_connect_config.speech_config == agent_speech_config + assert ( + llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name + == 'Kore' + ) + + @pytest.mark.asyncio + async def test_speech_config_uses_run_config_when_agent_is_none(self): + """Tests that RunConfig's speech_config is used when the agent's is None.""" + run_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Puck', + ) + ) + ) + + agent = LlmAgent( + name='test_agent', model='gemini-1.5-flash', speech_config=None + ) # No agent config + run_config = RunConfig(speech_config=run_speech_config) + invocation_context = await _create_invocation_context(agent, run_config) + llm_request = LlmRequest() + processor = _BasicLlmRequestProcessor() + + # Process the request + async for _ in processor.run_async(invocation_context, llm_request): + pass + + # Assert that the runtime config was used + assert llm_request.live_connect_config.speech_config == run_speech_config + assert ( + llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name + == 'Puck' + ) + + @pytest.mark.asyncio + async def test_speech_config_is_none_when_both_are_none(self): + """Tests that speech_config is None when neither agent nor RunConfig has it.""" + agent = LlmAgent( + name='test_agent', model='gemini-1.5-flash', speech_config=None + ) + run_config = RunConfig(speech_config=None) # No runtime config + invocation_context = await _create_invocation_context(agent, run_config) + llm_request = LlmRequest() + processor = _BasicLlmRequestProcessor() + + # Process the request + async for _ in processor.run_async(invocation_context, llm_request): + pass + + # Assert that the final config is None + assert llm_request.live_connect_config.speech_config is None From 02d9ab3cfd07f780e74321c65ebef70b0af0f269 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月14日 23:18:25 +0000 Subject: [PATCH 2/8] Added voices to sample --- .../live_bidi_streaming_multi_agent/agent.py | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/contributing/samples/live_bidi_streaming_multi_agent/agent.py b/contributing/samples/live_bidi_streaming_multi_agent/agent.py index 413e33a727..defc95ff50 100644 --- a/contributing/samples/live_bidi_streaming_multi_agent/agent.py +++ b/contributing/samples/live_bidi_streaming_multi_agent/agent.py @@ -42,6 +42,13 @@ def roll_die(sides: int) -> int: ), ] ), + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Kore", + ) + ) + ), ) @@ -85,6 +92,13 @@ def check_prime(nums: list[int]) -> str: ), ] ), + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Puck", + ) + ) + ), ) @@ -100,8 +114,8 @@ def get_current_weather(location: str): root_agent = Agent( # find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/ - model="gemini-2.0-flash-live-preview-04-09", # for Vertex project - # model="gemini-live-2.5-flash-preview", # for AI studio key + # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project + model="gemini-live-2.5-flash-preview", # for AI studio key name="root_agent", instruction=""" You are a helpful assistant that can check time, roll dice and check if numbers are prime. @@ -126,4 +140,11 @@ def get_current_weather(location: str): ), ] ), + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Zephyr", + ) + ) + ), ) From 626e86bc14e17139f3339e829acc0c71c278f3ef Mon Sep 17 00:00:00 2001 From: Hangfei Lin Date: 2025年10月14日 19:39:17 -0700 Subject: [PATCH 3/8] Update run_config.py --- src/google/adk/agents/run_config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py index 9fe82fabf1..ebe77dcb3c 100644 --- a/src/google/adk/agents/run_config.py +++ b/src/google/adk/agents/run_config.py @@ -35,7 +35,10 @@ class StreamingMode(Enum): class RunConfig(BaseModel): - """Configs for runtime behavior of agents.""" + """Configs for runtime behavior of agents. + + The configs here will be overriden by agent-spcific configurations. + """ model_config = ConfigDict( extra='forbid', From 5c1fc02e46ea934a13aff1e603687c80c4e3b3a1 Mon Sep 17 00:00:00 2001 From: Hangfei Lin Date: 2025年10月15日 12:41:48 -0700 Subject: [PATCH 4/8] Update run_config.py --- src/google/adk/agents/run_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py index ebe77dcb3c..890923385c 100644 --- a/src/google/adk/agents/run_config.py +++ b/src/google/adk/agents/run_config.py @@ -37,7 +37,7 @@ class StreamingMode(Enum): class RunConfig(BaseModel): """Configs for runtime behavior of agents. - The configs here will be overriden by agent-spcific configurations. + The configs here will be overriden by agent-specific configurations. """ model_config = ConfigDict( From 54cd5a210b868b155f930685094aba971dffaf90 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月15日 22:35:39 +0000 Subject: [PATCH 5/8] Moved to Gemini class; run_config overrides --- .../live_bidi_streaming_multi_agent/agent.py | 57 +++++--- src/google/adk/flows/llm_flows/basic.py | 5 +- src/google/adk/models/google_llm.py | 8 + .../unittests/agents/test_llm_agent_fields.py | 26 ---- .../flows/llm_flows/test_basic_processor.py | 126 +--------------- tests/unittests/models/test_google_llm.py | 138 ++++++++++++++++++ 6 files changed, 184 insertions(+), 176 deletions(-) diff --git a/contributing/samples/live_bidi_streaming_multi_agent/agent.py b/contributing/samples/live_bidi_streaming_multi_agent/agent.py index defc95ff50..ddb36b2845 100644 --- a/contributing/samples/live_bidi_streaming_multi_agent/agent.py +++ b/contributing/samples/live_bidi_streaming_multi_agent/agent.py @@ -16,6 +16,7 @@ from google.adk.agents.llm_agent import Agent from google.adk.examples.example import Example +from google.adk.models.google_llm import Gemini from google.adk.tools.example_tool import ExampleTool from google.genai import types @@ -28,6 +29,17 @@ def roll_die(sides: int) -> int: roll_agent = Agent( name="roll_agent", + model=Gemini( + # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project + model="gemini-live-2.5-flash-preview", # for AI studio key + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Kore", + ) + ) + ), + ), description="Handles rolling dice of different sizes.", instruction=""" You are responsible for rolling dice based on the user's request. @@ -42,13 +54,6 @@ def roll_die(sides: int) -> int: ), ] ), - speech_config=types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name="Kore", - ) - ) - ), ) @@ -76,6 +81,17 @@ def check_prime(nums: list[int]) -> str: prime_agent = Agent( name="prime_agent", + model=Gemini( + # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project + model="gemini-live-2.5-flash-preview", # for AI studio key + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Puck", + ) + ) + ), + ), description="Handles checking if numbers are prime.", instruction=""" You are responsible for checking whether numbers are prime. @@ -92,13 +108,6 @@ def check_prime(nums: list[int]) -> str: ), ] ), - speech_config=types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name="Puck", - ) - ) - ), ) @@ -114,8 +123,17 @@ def get_current_weather(location: str): root_agent = Agent( # find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/ - # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project - model="gemini-live-2.5-flash-preview", # for AI studio key + model=Gemini( + # model="gemini-2.0-flash-live-preview-04-09", # for Vertex project + model="gemini-live-2.5-flash-preview", # for AI studio key + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Zephyr", + ) + ) + ), + ), name="root_agent", instruction=""" You are a helpful assistant that can check time, roll dice and check if numbers are prime. @@ -140,11 +158,4 @@ def get_current_weather(location: str): ), ] ), - speech_config=types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name="Zephyr", - ) - ) - ), ) diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py index c0bc50edfc..789eeb0a56 100644 --- a/src/google/adk/flows/llm_flows/basic.py +++ b/src/google/adk/flows/llm_flows/basic.py @@ -59,10 +59,9 @@ async def run_async( invocation_context.run_config.response_modalities ) - speech_config_to_use = ( - agent.speech_config or invocation_context.run_config.speech_config + llm_request.live_connect_config.speech_config = ( + invocation_context.run_config.speech_config ) - llm_request.live_connect_config.speech_config = speech_config_to_use llm_request.live_connect_config.output_audio_transcription = ( invocation_context.run_config.output_audio_transcription diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py index 411162bb0c..bf57562262 100644 --- a/src/google/adk/models/google_llm.py +++ b/src/google/adk/models/google_llm.py @@ -60,6 +60,8 @@ class Gemini(BaseLlm): model: str = 'gemini-2.5-flash' + speech_config: Optional[types.SpeechConfig] = None + retry_options: Optional[types.HttpRetryOptions] = None """Allow Gemini to retry failed responses. @@ -261,6 +263,12 @@ async def connect(self, llm_request: LlmRequest) -> BaseLlmConnection: self._live_api_version ) + if ( + llm_request.live_connect_config + and llm_request.live_connect_config.speech_config is None + ): + llm_request.live_connect_config.speech_config = self.speech_config + llm_request.live_connect_config.system_instruction = types.Content( role='system', parts=[ diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py index bc8b387f9c..5540e55b0d 100644 --- a/tests/unittests/agents/test_llm_agent_fields.py +++ b/tests/unittests/agents/test_llm_agent_fields.py @@ -167,32 +167,6 @@ async def _global_instruction_provider(ctx: ReadonlyContext) -> str: assert bypass_state_injection -def test_speech_config_logs_warning(caplog: pytest.LogCaptureFixture): - with caplog.at_level('WARNING'): - - agent = LlmAgent( - name='test_agent', - speech_config=types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name='Kore', - ) - ) - ), - ) - - assert agent.speech_config is not None - assert ( - agent.speech_config.voice_config.prebuilt_voice_config.voice_name - == 'Kore' - ) - assert ( - 'Agent test_agent has a speech_config set.' in caplog.text - and 'only effective when using the agent in a live/streaming mode' - in caplog.text - ) - - def test_output_schema_will_disable_transfer(caplog: pytest.LogCaptureFixture): with caplog.at_level('WARNING'): diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py index 773614b6d6..770f358949 100644 --- a/tests/unittests/flows/llm_flows/test_basic_processor.py +++ b/tests/unittests/flows/llm_flows/test_basic_processor.py @@ -21,7 +21,6 @@ from google.adk.models.llm_request import LlmRequest from google.adk.sessions.in_memory_session_service import InMemorySessionService from google.adk.tools.function_tool import FunctionTool -from google.genai import types from pydantic import BaseModel from pydantic import Field import pytest @@ -39,9 +38,7 @@ def dummy_tool(query: str) -> str: return f'Result: {query}' -async def _create_invocation_context( - agent: LlmAgent, run_config: RunConfig = RunConfig() -) -> InvocationContext: +async def _create_invocation_context(agent: LlmAgent) -> InvocationContext: """Helper to create InvocationContext for testing.""" session_service = InMemorySessionService() session = await session_service.create_session( @@ -52,7 +49,7 @@ async def _create_invocation_context( agent=agent, session=session, session_service=session_service, - run_config=run_config, + run_config=RunConfig(), ) @@ -146,122 +143,3 @@ async def test_sets_model_name(self): # Should have set the model name assert llm_request.model == 'gemini-1.5-flash' - - @pytest.mark.asyncio - async def test_speech_config_agent_overrides_run_config(self): - """Tests that agent's speech_config is prioritized over the RunConfig's.""" - agent_speech_config = types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name='Kore', - ) - ) - ) - run_speech_config = types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name='Puck', - ) - ) - ) - - agent = LlmAgent( - name='test_agent', - model='gemini-1.5-flash', - speech_config=agent_speech_config, - ) - run_config = RunConfig(speech_config=run_speech_config) - invocation_context = await _create_invocation_context(agent, run_config) - llm_request = LlmRequest() - processor = _BasicLlmRequestProcessor() - - # Process the request - async for _ in processor.run_async(invocation_context, llm_request): - pass - - # Assert that the agent's override was used - assert llm_request.live_connect_config.speech_config == agent_speech_config - assert ( - llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name - == 'Kore' - ) - - @pytest.mark.asyncio - async def test_speech_config_uses_agent_as_fallback(self): - """Tests that the agent's speech_config is used when RunConfig's is None.""" - agent_speech_config = types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name='Kore', - ) - ) - ) - - agent = LlmAgent( - name='test_agent', - model='gemini-1.5-flash', - speech_config=agent_speech_config, - ) - run_config = RunConfig(speech_config=None) # No runtime config - invocation_context = await _create_invocation_context(agent, run_config) - llm_request = LlmRequest() - processor = _BasicLlmRequestProcessor() - - # Process the request - async for _ in processor.run_async(invocation_context, llm_request): - pass - - # Assert that the agent's config was used as a fallback - assert llm_request.live_connect_config.speech_config == agent_speech_config - assert ( - llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name - == 'Kore' - ) - - @pytest.mark.asyncio - async def test_speech_config_uses_run_config_when_agent_is_none(self): - """Tests that RunConfig's speech_config is used when the agent's is None.""" - run_speech_config = types.SpeechConfig( - voice_config=types.VoiceConfig( - prebuilt_voice_config=types.PrebuiltVoiceConfig( - voice_name='Puck', - ) - ) - ) - - agent = LlmAgent( - name='test_agent', model='gemini-1.5-flash', speech_config=None - ) # No agent config - run_config = RunConfig(speech_config=run_speech_config) - invocation_context = await _create_invocation_context(agent, run_config) - llm_request = LlmRequest() - processor = _BasicLlmRequestProcessor() - - # Process the request - async for _ in processor.run_async(invocation_context, llm_request): - pass - - # Assert that the runtime config was used - assert llm_request.live_connect_config.speech_config == run_speech_config - assert ( - llm_request.live_connect_config.speech_config.voice_config.prebuilt_voice_config.voice_name - == 'Puck' - ) - - @pytest.mark.asyncio - async def test_speech_config_is_none_when_both_are_none(self): - """Tests that speech_config is None when neither agent nor RunConfig has it.""" - agent = LlmAgent( - name='test_agent', model='gemini-1.5-flash', speech_config=None - ) - run_config = RunConfig(speech_config=None) # No runtime config - invocation_context = await _create_invocation_context(agent, run_config) - llm_request = LlmRequest() - processor = _BasicLlmRequestProcessor() - - # Process the request - async for _ in processor.run_async(invocation_context, llm_request): - pass - - # Assert that the final config is None - assert llm_request.live_connect_config.speech_config is None diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py index 1b5979bdf9..180e988862 100644 --- a/tests/unittests/models/test_google_llm.py +++ b/tests/unittests/models/test_google_llm.py @@ -1858,3 +1858,141 @@ def mock_model_dump(*args, **kwargs): # Should still succeed using repr() assert "Config:" in log_output assert "GenerateContentConfig" in log_output + + +@pytest.mark.asyncio +async def test_connect_uses_gemini_speech_config_when_request_is_none( + gemini_llm, llm_request +): + """Tests that Gemini's speech_config is used when live_connect_config's is None.""" + # Arrange: Set a speech_config on the Gemini instance with the voice "Kore" + gemini_llm.speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Kore", + ) + ) + ) + llm_request.live_connect_config = ( + types.LiveConnectConfig() + ) # speech_config is None + + mock_live_session = mock.AsyncMock() + + with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client: + + class MockLiveConnect: + + async def __aenter__(self): + return mock_live_session + + async def __aexit__(self, *args): + pass + + mock_live_client.aio.live.connect.return_value = MockLiveConnect() + + # Act + async with gemini_llm.connect(llm_request) as connection: + # Assert + mock_live_client.aio.live.connect.assert_called_once() + call_args = mock_live_client.aio.live.connect.call_args + config_arg = call_args.kwargs["config"] + + # Verify the speech_config from the Gemini instance was used + assert config_arg.speech_config is not None + assert ( + config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name + == "Kore" + ) + assert isinstance(connection, GeminiLlmConnection) + + +@pytest.mark.asyncio +async def test_connect_request_speech_config_overrides_gemini_config( + gemini_llm, llm_request +): + """Tests that live_connect_config's speech_config is preserved even if Gemini has one.""" + # Arrange: Set different speech_configs on both the Gemini instance ("Puck") and the request ("Zephyr") + gemini_llm.speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Puck", + ) + ) + ) + request_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Zephyr", + ) + ) + ) + llm_request.live_connect_config = types.LiveConnectConfig( + speech_config=request_speech_config + ) + + mock_live_session = mock.AsyncMock() + + with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client: + + class MockLiveConnect: + + async def __aenter__(self): + return mock_live_session + + async def __aexit__(self, *args): + pass + + mock_live_client.aio.live.connect.return_value = MockLiveConnect() + + # Act + async with gemini_llm.connect(llm_request) as connection: + # Assert + mock_live_client.aio.live.connect.assert_called_once() + call_args = mock_live_client.aio.live.connect.call_args + config_arg = call_args.kwargs["config"] + + # Verify the speech_config from the request ("Zephyr") was preserved and not overwritten + assert config_arg.speech_config is not None + assert ( + config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name + == "Zephyr" + ) + assert isinstance(connection, GeminiLlmConnection) + + +@pytest.mark.asyncio +async def test_connect_speech_config_remains_none_when_both_are_none( + gemini_llm, llm_request +): + """Tests that speech_config is None when neither Gemini nor the request has it.""" + # Arrange: Ensure both Gemini instance and request have no speech_config + gemini_llm.speech_config = None + llm_request.live_connect_config = ( + types.LiveConnectConfig() + ) # speech_config is None + + mock_live_session = mock.AsyncMock() + + with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client: + + class MockLiveConnect: + + async def __aenter__(self): + return mock_live_session + + async def __aexit__(self, *args): + pass + + mock_live_client.aio.live.connect.return_value = MockLiveConnect() + + # Act + async with gemini_llm.connect(llm_request) as connection: + # Assert + mock_live_client.aio.live.connect.assert_called_once() + call_args = mock_live_client.aio.live.connect.call_args + config_arg = call_args.kwargs["config"] + + # Verify the final speech_config is still None + assert config_arg.speech_config is None + assert isinstance(connection, GeminiLlmConnection) From 38a5494ffb96e704175a64a3ddaeb4346af73ac9 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月15日 22:35:39 +0000 Subject: [PATCH 6/8] Moved to Gemini class; run_config overrides --- src/google/adk/agents/llm_agent.py | 16 ---------------- src/google/adk/agents/llm_agent_config.py | 4 ---- src/google/adk/flows/llm_flows/basic.py | 2 -- 3 files changed, 22 deletions(-) diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py index 40146d0ee8..c143568252 100644 --- a/src/google/adk/agents/llm_agent.py +++ b/src/google/adk/agents/llm_agent.py @@ -263,9 +263,6 @@ class LlmAgent(BaseAgent): settings, etc. """ - speech_config: Optional[types.SpeechConfig] = None - """The agent's speech configurations.""" - # LLM-based agent transfer configs - Start disallow_transfer_to_parent: bool = False """Disallows LLM-controlled transferring to the parent agent. @@ -700,7 +697,6 @@ def __maybe_save_output_to_state(self, event: Event): @model_validator(mode='after') def __model_validator_after(self) -> LlmAgent: self.__check_output_schema() - self.__check_speech_config() return self def __check_output_schema(self): @@ -726,16 +722,6 @@ def __check_output_schema(self): ' sub_agents must be empty to disable agent transfer.' ) - def __check_speech_config(self): - if self.speech_config: - logger.warning( - 'Agent %s has a speech_config set. This configuration is only' - ' effective when using the agent in a live/streaming mode' - ' (e.g., via run_live) and with a model that supports speech' - ' input/output.', - self.name, - ) - @field_validator('generate_content_config', mode='after') @classmethod def validate_generate_content_config( @@ -865,8 +851,6 @@ def _parse_config( ) if config.generate_content_config: kwargs['generate_content_config'] = config.generate_content_config - if config.speech_config: - kwargs['speech_config'] = config.speech_config return kwargs diff --git a/src/google/adk/agents/llm_agent_config.py b/src/google/adk/agents/llm_agent_config.py index 4214da879a..4203a5923b 100644 --- a/src/google/adk/agents/llm_agent_config.py +++ b/src/google/adk/agents/llm_agent_config.py @@ -188,7 +188,3 @@ class LlmAgentConfig(BaseAgentConfig): generate_content_config: Optional[types.GenerateContentConfig] = Field( default=None, description='Optional. LlmAgent.generate_content_config.' ) - - speech_config: Optional[types.SpeechConfig] = Field( - default=None, description='Optional. LlmAgent.speech_config' - ) diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py index 789eeb0a56..24cc7fd67c 100644 --- a/src/google/adk/flows/llm_flows/basic.py +++ b/src/google/adk/flows/llm_flows/basic.py @@ -58,11 +58,9 @@ async def run_async( llm_request.live_connect_config.response_modalities = ( invocation_context.run_config.response_modalities ) - llm_request.live_connect_config.speech_config = ( invocation_context.run_config.speech_config ) - llm_request.live_connect_config.output_audio_transcription = ( invocation_context.run_config.output_audio_transcription ) From 07870c6ebad8ac76e80ac4142ea3a43e855167c0 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月15日 22:52:49 +0000 Subject: [PATCH 7/8] Make agent config override run_config --- src/google/adk/models/google_llm.py | 5 +---- tests/unittests/models/test_google_llm.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py index bf57562262..b96e56e169 100644 --- a/src/google/adk/models/google_llm.py +++ b/src/google/adk/models/google_llm.py @@ -263,10 +263,7 @@ async def connect(self, llm_request: LlmRequest) -> BaseLlmConnection: self._live_api_version ) - if ( - llm_request.live_connect_config - and llm_request.live_connect_config.speech_config is None - ): + if self.speech_config is not None: llm_request.live_connect_config.speech_config = self.speech_config llm_request.live_connect_config.system_instruction = types.Content( diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py index 180e988862..e9cfb3b842 100644 --- a/tests/unittests/models/test_google_llm.py +++ b/tests/unittests/models/test_google_llm.py @@ -1908,7 +1908,7 @@ async def __aexit__(self, *args): @pytest.mark.asyncio -async def test_connect_request_speech_config_overrides_gemini_config( +async def test_connect_request_gemini_config_overrides_speech_config( gemini_llm, llm_request ): """Tests that live_connect_config's speech_config is preserved even if Gemini has one.""" @@ -1952,11 +1952,11 @@ async def __aexit__(self, *args): call_args = mock_live_client.aio.live.connect.call_args config_arg = call_args.kwargs["config"] - # Verify the speech_config from the request ("Zephyr") was preserved and not overwritten + # Verify the speech_config from the request ("Zephyr") was overwritten by Gemini's speech_config ("Puck") assert config_arg.speech_config is not None assert ( config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name - == "Zephyr" + == "Puck" ) assert isinstance(connection, GeminiLlmConnection) From 2b4d3ac34f2b63c8ffe4d573216644cb264e1d18 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: 2025年10月15日 22:58:10 +0000 Subject: [PATCH 8/8] Added last case --- tests/unittests/models/test_google_llm.py | 48 +++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py index e9cfb3b842..f3356975f5 100644 --- a/tests/unittests/models/test_google_llm.py +++ b/tests/unittests/models/test_google_llm.py @@ -1907,6 +1907,54 @@ async def __aexit__(self, *args): assert isinstance(connection, GeminiLlmConnection) +@pytest.mark.asyncio +async def test_connect_uses_request_speech_config_when_gemini_is_none( + gemini_llm, llm_request +): + """Tests that request's speech_config is used when Gemini's is None.""" + # Arrange: Set a speech_config on the request instance with the voice "Kore" + gemini_llm.speech_config = None + request_speech_config = types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name="Kore", + ) + ) + ) + llm_request.live_connect_config = types.LiveConnectConfig( + speech_config=request_speech_config + ) + + mock_live_session = mock.AsyncMock() + + with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client: + + class MockLiveConnect: + + async def __aenter__(self): + return mock_live_session + + async def __aexit__(self, *args): + pass + + mock_live_client.aio.live.connect.return_value = MockLiveConnect() + + # Act + async with gemini_llm.connect(llm_request) as connection: + # Assert + mock_live_client.aio.live.connect.assert_called_once() + call_args = mock_live_client.aio.live.connect.call_args + config_arg = call_args.kwargs["config"] + + # Verify the speech_config from the request instance was used + assert config_arg.speech_config is not None + assert ( + config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name + == "Kore" + ) + assert isinstance(connection, GeminiLlmConnection) + + @pytest.mark.asyncio async def test_connect_request_gemini_config_overrides_speech_config( gemini_llm, llm_request

AltStyle によって変換されたページ (->オリジナル) /