Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 984086b

Browse files
feat(ai): add support for the Live API (#9224)
1 parent cbef6c6 commit 984086b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+4197
-8
lines changed

‎.changeset/blue-pets-sin.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
'firebase': minor
3+
'@firebase/ai': minor
4+
---
5+
6+
Add support for the Gemini Live API.

‎common/api-review/ai.api.md

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export const AIErrorCode: {
4141
readonly REQUEST_ERROR: "request-error";
4242
readonly RESPONSE_ERROR: "response-error";
4343
readonly FETCH_ERROR: "fetch-error";
44+
readonly SESSION_CLOSED: "session-closed";
4445
readonly INVALID_CONTENT: "invalid-content";
4546
readonly API_NOT_ENABLED: "api-not-enabled";
4647
readonly INVALID_SCHEMA: "invalid-schema";
@@ -94,6 +95,11 @@ export class ArraySchema extends Schema {
9495
toJSON(): SchemaRequest;
9596
}
9697

98+
// @beta
99+
export interface AudioConversationController {
100+
stop: () => Promise<void>;
101+
}
102+
97103
// @public
98104
export abstract class Backend {
99105
protected constructor(type: BackendType);
@@ -290,6 +296,7 @@ export type FinishReason = (typeof FinishReason)[keyof typeof FinishReason];
290296
export interface FunctionCall {
291297
// (undocumented)
292298
args: object;
299+
id?: string;
293300
// (undocumented)
294301
name: string;
295302
}
@@ -342,6 +349,7 @@ export interface FunctionDeclarationsTool {
342349

343350
// @public
344351
export interface FunctionResponse {
352+
id?: string;
345353
// (undocumented)
346354
name: string;
347355
// (undocumented)
@@ -480,6 +488,9 @@ export function getGenerativeModel(ai: AI, modelParams: ModelParams | HybridPara
480488
// @beta
481489
export function getImagenModel(ai: AI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel;
482490

491+
// @beta
492+
export function getLiveGenerativeModel(ai: AI, modelParams: LiveModelParams): LiveGenerativeModel;
493+
483494
// @public
484495
export class GoogleAIBackend extends Backend {
485496
constructor();
@@ -813,6 +824,96 @@ export interface LanguageModelPromptOptions {
813824
responseConstraint?: object;
814825
}
815826

827+
// @beta
828+
export interface LiveGenerationConfig {
829+
frequencyPenalty?: number;
830+
maxOutputTokens?: number;
831+
presencePenalty?: number;
832+
responseModalities?: ResponseModality[];
833+
speechConfig?: SpeechConfig;
834+
temperature?: number;
835+
topK?: number;
836+
topP?: number;
837+
}
838+
839+
// @beta
840+
export class LiveGenerativeModel extends AIModel {
841+
// Warning: (ae-forgotten-export) The symbol "WebSocketHandler" needs to be exported by the entry point index.d.ts
842+
//
843+
// @internal
844+
constructor(ai: AI, modelParams: LiveModelParams,
845+
_webSocketHandler: WebSocketHandler);
846+
connect(): Promise<LiveSession>;
847+
// (undocumented)
848+
generationConfig: LiveGenerationConfig;
849+
// (undocumented)
850+
systemInstruction?: Content;
851+
// (undocumented)
852+
toolConfig?: ToolConfig;
853+
// (undocumented)
854+
tools?: Tool[];
855+
}
856+
857+
// @beta
858+
export interface LiveModelParams {
859+
// (undocumented)
860+
generationConfig?: LiveGenerationConfig;
861+
// (undocumented)
862+
model: string;
863+
// (undocumented)
864+
systemInstruction?: string | Part | Content;
865+
// (undocumented)
866+
toolConfig?: ToolConfig;
867+
// (undocumented)
868+
tools?: Tool[];
869+
}
870+
871+
// @beta
872+
export const LiveResponseType: {
873+
SERVER_CONTENT: string;
874+
TOOL_CALL: string;
875+
TOOL_CALL_CANCELLATION: string;
876+
};
877+
878+
// @beta
879+
export type LiveResponseType = (typeof LiveResponseType)[keyof typeof LiveResponseType];
880+
881+
// @beta
882+
export interface LiveServerContent {
883+
interrupted?: boolean;
884+
modelTurn?: Content;
885+
turnComplete?: boolean;
886+
// (undocumented)
887+
type: 'serverContent';
888+
}
889+
890+
// @beta
891+
export interface LiveServerToolCall {
892+
functionCalls: FunctionCall[];
893+
// (undocumented)
894+
type: 'toolCall';
895+
}
896+
897+
// @beta
898+
export interface LiveServerToolCallCancellation {
899+
functionIds: string[];
900+
// (undocumented)
901+
type: 'toolCallCancellation';
902+
}
903+
904+
// @beta
905+
export class LiveSession {
906+
// @internal
907+
constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
908+
close(): Promise<void>;
909+
inConversation: boolean;
910+
isClosed: boolean;
911+
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
912+
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
913+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
914+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
915+
}
916+
816917
// @public
817918
export const Modality: {
818919
readonly MODALITY_UNSPECIFIED: "MODALITY_UNSPECIFIED";
@@ -885,6 +986,11 @@ export type Part = TextPart | InlineDataPart | FunctionCallPart | FunctionRespon
885986
// @public
886987
export const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
887988

989+
// @beta
990+
export interface PrebuiltVoiceConfig {
991+
voiceName?: string;
992+
}
993+
888994
// @public
889995
export interface PromptFeedback {
890996
// (undocumented)
@@ -904,6 +1010,7 @@ export interface RequestOptions {
9041010
export const ResponseModality: {
9051011
readonly TEXT: "TEXT";
9061012
readonly IMAGE: "IMAGE";
1013+
readonly AUDIO: "AUDIO";
9071014
};
9081015

9091016
// @beta
@@ -1048,6 +1155,19 @@ export interface Segment {
10481155
text: string;
10491156
}
10501157

1158+
// @beta
1159+
export interface SpeechConfig {
1160+
voiceConfig?: VoiceConfig;
1161+
}
1162+
1163+
// @beta
1164+
export function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
1165+
1166+
// @beta
1167+
export interface StartAudioConversationOptions {
1168+
functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
1169+
}
1170+
10511171
// @public
10521172
export interface StartChatParams extends BaseParams {
10531173
// (undocumented)
@@ -1130,6 +1250,11 @@ export interface VideoMetadata {
11301250
startOffset: string;
11311251
}
11321252

1253+
// @beta
1254+
export interface VoiceConfig {
1255+
prebuiltVoiceConfig?: PrebuiltVoiceConfig;
1256+
}
1257+
11331258
// @public (undocumented)
11341259
export interface WebAttribution {
11351260
// (undocumented)

‎docs-devsite/_toc.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ toc:
1616
path: /docs/reference/js/ai.anyofschema.md
1717
- title: ArraySchema
1818
path: /docs/reference/js/ai.arrayschema.md
19+
- title: AudioConversationController
20+
path: /docs/reference/js/ai.audioconversationcontroller.md
1921
- title: Backend
2022
path: /docs/reference/js/ai.backend.md
2123
- title: BaseParams
@@ -124,6 +126,20 @@ toc:
124126
path: /docs/reference/js/ai.languagemodelmessagecontent.md
125127
- title: LanguageModelPromptOptions
126128
path: /docs/reference/js/ai.languagemodelpromptoptions.md
129+
- title: LiveGenerationConfig
130+
path: /docs/reference/js/ai.livegenerationconfig.md
131+
- title: LiveGenerativeModel
132+
path: /docs/reference/js/ai.livegenerativemodel.md
133+
- title: LiveModelParams
134+
path: /docs/reference/js/ai.livemodelparams.md
135+
- title: LiveServerContent
136+
path: /docs/reference/js/ai.liveservercontent.md
137+
- title: LiveServerToolCall
138+
path: /docs/reference/js/ai.liveservertoolcall.md
139+
- title: LiveServerToolCallCancellation
140+
path: /docs/reference/js/ai.liveservertoolcallcancellation.md
141+
- title: LiveSession
142+
path: /docs/reference/js/ai.livesession.md
127143
- title: ModalityTokenCount
128144
path: /docs/reference/js/ai.modalitytokencount.md
129145
- title: ModelParams
@@ -136,6 +152,8 @@ toc:
136152
path: /docs/reference/js/ai.objectschemarequest.md
137153
- title: OnDeviceParams
138154
path: /docs/reference/js/ai.ondeviceparams.md
155+
- title: PrebuiltVoiceConfig
156+
path: /docs/reference/js/ai.prebuiltvoiceconfig.md
139157
- title: PromptFeedback
140158
path: /docs/reference/js/ai.promptfeedback.md
141159
- title: RequestOptions
@@ -160,6 +178,10 @@ toc:
160178
path: /docs/reference/js/ai.searchentrypoint.md
161179
- title: Segment
162180
path: /docs/reference/js/ai.segment.md
181+
- title: SpeechConfig
182+
path: /docs/reference/js/ai.speechconfig.md
183+
- title: StartAudioConversationOptions
184+
path: /docs/reference/js/ai.startaudioconversationoptions.md
163185
- title: StartChatParams
164186
path: /docs/reference/js/ai.startchatparams.md
165187
- title: StringSchema
@@ -176,6 +198,8 @@ toc:
176198
path: /docs/reference/js/ai.vertexaibackend.md
177199
- title: VideoMetadata
178200
path: /docs/reference/js/ai.videometadata.md
201+
- title: VoiceConfig
202+
path: /docs/reference/js/ai.voiceconfig.md
179203
- title: WebAttribution
180204
path: /docs/reference/js/ai.webattribution.md
181205
- title: WebGroundingChunk
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Project: /docs/reference/js/_project.yaml
2+
Book: /docs/reference/_book.yaml
3+
page_type: reference
4+
5+
{% comment %}
6+
DO NOT EDIT THIS FILE!
7+
This is generated by the JS SDK team, and any local changes will be
8+
overwritten. Changes should be made in the source code at
9+
https://github.com/firebase/firebase-js-sdk
10+
{% endcomment %}
11+
12+
# AudioConversationController interface
13+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
14+
>
15+
16+
A controller for managing an active audio conversation.
17+
18+
<b>Signature:</b>
19+
20+
```typescript
21+
export interface AudioConversationController
22+
```
23+
24+
## Properties
25+
26+
| Property | Type | Description |
27+
| --- | --- | --- |
28+
| [stop](./ai.audioconversationcontroller.md#audioconversationcontrollerstop) | () =&gt; Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete. |
29+
30+
## AudioConversationController.stop
31+
32+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
33+
>
34+
35+
Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete.
36+
37+
<b>Signature:</b>
38+
39+
```typescript
40+
stop: () => Promise<void>;
41+
```

‎docs-devsite/ai.functioncall.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export interface FunctionCall
2323
| Property | Type | Description |
2424
| --- | --- | --- |
2525
| [args](./ai.functioncall.md#functioncallargs) | object | |
26+
| [id](./ai.functioncall.md#functioncallid) | string | The id of the function call. This must be sent back in the associated [FunctionResponse](./ai.functionresponse.md#functionresponse_interface)<!-- -->. |
2627
| [name](./ai.functioncall.md#functioncallname) | string | |
2728

2829
## FunctionCall.args
@@ -33,6 +34,18 @@ export interface FunctionCall
3334
args: object;
3435
```
3536

37+
## FunctionCall.id
38+
39+
The id of the function call. This must be sent back in the associated [FunctionResponse](./ai.functionresponse.md#functionresponse_interface)<!-- -->.
40+
41+
This property is only supported in the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->). When using the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->), this property will be `undefined`<!-- -->.
42+
43+
<b>Signature:</b>
44+
45+
```typescript
46+
id?: string;
47+
```
48+
3649
## FunctionCall.name
3750

3851
<b>Signature:</b>

‎docs-devsite/ai.functionresponse.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,22 @@ export interface FunctionResponse
2222

2323
| Property | Type | Description |
2424
| --- | --- | --- |
25+
| [id](./ai.functionresponse.md#functionresponseid) | string | The id of the [FunctionCall](./ai.functioncall.md#functioncall_interface)<!-- -->. |
2526
| [name](./ai.functionresponse.md#functionresponsename) | string | |
2627
| [response](./ai.functionresponse.md#functionresponseresponse) | object | |
2728

29+
## FunctionResponse.id
30+
31+
The id of the [FunctionCall](./ai.functioncall.md#functioncall_interface)<!-- -->.
32+
33+
This property is only supported in the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->). When using the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->), this property will be `undefined`<!-- -->.
34+
35+
<b>Signature:</b>
36+
37+
```typescript
38+
id?: string;
39+
```
40+
2841
## FunctionResponse.name
2942

3043
<b>Signature:</b>

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /