Counting tokens

For a detailed guide on counting tokens using the Gemini API, including how images, audio and video are counted, see the Token counting guide and accompanying Cookbook recipe.

Method: models.countTokens

Runs a model's tokenizer on input Content and returns the token count. Refer to the tokens guide to learn more about tokens.

Endpoint

post https://generativelanguage.googleapis.com/v1beta/{model=models/*}:countTokens

Path parameters

model string

Required. The model's resource name. This serves as an ID for the Model to use.

This name should match a model name returned by the models.list method.

Format: models/{model} It takes the form models/{model}.

Request body

The request body contains data with the following structure:

Fields
contents[] object (Content )

Optional. The input given to the model as a prompt. This field is ignored when generateContentRequest is set.

generateContentRequest object (GenerateContentRequest )

Optional. The overall input given to the Model. This includes the prompt as well as other model steering information like system instructions, and/or function declarations for function calling. Models/Contents and generateContentRequests are mutually exclusive. You can either send Model + Contents or a generateContentRequest, but never both.

Example request

Text

Python

fromgoogleimport genai
client = genai.Client()
prompt = "The quick brown fox jumps over the lazy dog."
# Count tokens using the new client method.
total_tokens = client.models.count_tokens(
 model="gemini-2.0-flash", contents=prompt
)
print("total_tokens: ", total_tokens)
# ( e.g., total_tokens: 10 )
response = client.models.generate_content(
 model="gemini-2.0-flash", contents=prompt
)
# The usage_metadata provides detailed token counts.
print(response.usage_metadata)
# ( e.g., prompt_token_count: 11, candidates_token_count: 73, total_token_count: 84 )

Node.js

// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="The quick brown fox jumps over the lazy dog.";
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(generateResponse.usageMetadata);

Go

ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
prompt:="The quick brown fox jumps over the lazy dog."
// Convert prompt to a slice of *genai.Content using the helper.
contents:=[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
}
countResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
returnerr
}
fmt.Println("total_tokens:",countResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
fmt.Println(string(usageMetadata))

Shell

curlhttps://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY\
-H'Content-Type: application/json'\
-XPOST\
-d'{
 "contents": [{
 "parts":[{
 "text": "The quick brown fox jumps over the lazy dog."
 }],
 }],
 }'

Chat

Python

fromgoogleimport genai
fromgoogle.genaiimport types
client = genai.Client()
chat = client.chats.create(
 model="gemini-2.0-flash",
 history=[
 types.Content(
 role="user", parts=[types.Part(text="Hi my name is Bob")]
 ),
 types.Content(role="model", parts=[types.Part(text="Hi Bob!")]),
 ],
)
# Count tokens for the chat history.
print(
 client.models.count_tokens(
 model="gemini-2.0-flash", contents=chat.get_history()
 )
)
# ( e.g., total_tokens: 10 )
response = chat.send_message(
 message="In one sentence, explain how a computer works to a young child."
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 25, candidates_token_count: 21, total_token_count: 46 )
# You can count tokens for the combined history and a new message.
extra = types.UserContent(
 parts=[
 types.Part(
 text="What is the meaning of life?",
 )
 ]
)
history = chat.get_history()
history.append(extra)
print(client.models.count_tokens(model="gemini-2.0-flash", contents=history))
# ( e.g., total_tokens: 56 )

Node.js

// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
// Initial chat history.
consthistory=[
{role:"user",parts:[{text:"Hi my name is Bob"}]},
{role:"model",parts:[{text:"Hi Bob!"}]},
];
constchat=ai.chats.create({
model:"gemini-2.0-flash",
history:history,
});
// Count tokens for the current chat history.
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:chat.getHistory(),
});
console.log(countTokensResponse.totalTokens);
constchatResponse=awaitchat.sendMessage({
message:"In one sentence, explain how a computer works to a young child.",
});
console.log(chatResponse.usageMetadata);
// Add an extra user message to the history.
constextraMessage={
role:"user",
parts:[{text:"What is the meaning of life?"}],
};
constcombinedHistory=chat.getHistory();
combinedHistory.push(extraMessage);
constcombinedCountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:combinedHistory,
});
console.log(
"Combined history token count:",
combinedCountTokensResponse.totalTokens,
);

Go

ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
// Initialize chat with some history.
history:=[]*genai.Content{
{Role:genai.RoleUser,Parts:[]*genai.Part{{Text: "Hi my name is Bob"}}},
{Role:genai.RoleModel,Parts:[]*genai.Part{{Text: "Hi Bob!"}}},
}
chat,err:=client.Chats.Create(ctx,"gemini-2.0-flash",nil,history)
iferr!=nil{
log.Fatal(err)
}
firstTokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",chat.History(false),nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println(firstTokenResp.TotalTokens)
resp,err:=chat.SendMessage(ctx,genai.Part{
Text:"In one sentence, explain how a computer works to a young child."},
)
iferr!=nil{
log.Fatal(err)
}
fmt.Printf("%#v\n",resp.UsageMetadata)
// Append an extra user message and recount.
extra:=genai.NewContentFromText("What is the meaning of life?",genai.RoleUser)
hist:=chat.History(false)
hist=append(hist,extra)
secondTokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",hist,nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println(secondTokenResp.TotalTokens)

Shell

curlhttps://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY\
-H'Content-Type: application/json'\
-XPOST\
-d'{
 "contents": [
 {"role": "user",
 "parts": [{"text": "Hi, my name is Bob."}],
 },
 {"role": "model",
 "parts":[{"text": "Hi Bob"}],
 },
 ],
 }'

Inline media

Python

fromgoogleimport genai
importPIL.Image
client = genai.Client()
prompt = "Tell me about this image"
your_image_file = PIL.Image.open(media / "organ.jpg")
# Count tokens for combined text and inline image.
print(
 client.models.count_tokens(
 model="gemini-2.0-flash", contents=[prompt, your_image_file]
 )
)
# ( e.g., total_tokens: 263 )
response = client.models.generate_content(
 model="gemini-2.0-flash", contents=[prompt, your_image_file]
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 264, candidates_token_count: 80, total_token_count: 345 )

Node.js

// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="Tell me about this image";
constimageBuffer=fs.readFileSync(path.join(media,"organ.jpg"));
// Convert buffer to base64 string.
constimageBase64=imageBuffer.toString("base64");
// Build contents using createUserContent and createPartFromBase64.
constcontents=createUserContent([
prompt,
createPartFromBase64(imageBase64,"image/jpeg"),
]);
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:contents,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:contents,
});
console.log(generateResponse.usageMetadata);

Go

model:=client.GenerativeModel("gemini-1.5-flash")
prompt:="Tell me about this image"
imageFile,err:=os.ReadFile(filepath.Join(testDataDir,"personWorkingOnComputer.jpg"))
iferr!=nil{
log.Fatal(err)
}
// Call `CountTokens` to get the input token count
// of the combined text and file (`total_tokens`).
// An image's display or file size does not affect its token count.
// Optionally, you can call `count_tokens` for the text and file separately.
tokResp,err:=model.CountTokens(ctx,genai.Text(prompt),genai.ImageData("jpeg",imageFile))
iferr!=nil{
log.Fatal(err)
}
fmt.Println("total_tokens:",tokResp.TotalTokens)
// ( total_tokens: 264 )
resp,err:=model.GenerateContent(ctx,genai.Text(prompt),genai.ImageData("jpeg",imageFile))
iferr!=nil{
log.Fatal(err)
}
fmt.Println("prompt_token_count:",resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:",resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:",resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 264, candidates_token_count: 100, total_token_count: 364 )

Shell

curl"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY"\
-H'Content-Type: application/json'\
-XPOST\
-d'{
 "contents": [{
 "parts":[
 {"text": "Tell me about this instrument"},
 {
 "inline_data": {
 "mime_type":"image/jpeg",
 "data": "'$(base64$B64FLAGS$IMG_PATH)'"
 }
 }
 ]
 }]
 }'2>/dev/null

Video

Python

fromgoogleimport genai
importtime
client = genai.Client()
prompt = "Tell me about this video"
your_file = client.files.upload(file=media / "Big_Buck_Bunny.mp4")
# Poll until the video file is completely processed (state becomes ACTIVE).
while not your_file.state or your_file.state.name != "ACTIVE":
 print("Processing video...")
 print("File state:", your_file.state)
 time.sleep(5)
 your_file = client.files.get(name=your_file.name)
print(
 client.models.count_tokens(
 model="gemini-2.0-flash", contents=[prompt, your_file]
 )
)
# ( e.g., total_tokens: 300 )
response = client.models.generate_content(
 model="gemini-2.0-flash", contents=[prompt, your_file]
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 301, candidates_token_count: 60, total_token_count: 361 )

Node.js

// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="Tell me about this video";
letvideoFile=awaitai.files.upload({
file:path.join(media,"Big_Buck_Bunny.mp4"),
config:{mimeType:"video/mp4"},
});
// Poll until the video file is completely processed (state becomes ACTIVE).
while(!videoFile.state||videoFile.state.toString()!=="ACTIVE"){
console.log("Processing video...");
console.log("File state: ",videoFile.state);
awaitsleep(5000);
videoFile=awaitai.files.get({name:videoFile.name});
}
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:createUserContent([
prompt,
createPartFromUri(videoFile.uri,videoFile.mimeType),
]),
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:createUserContent([
prompt,
createPartFromUri(videoFile.uri,videoFile.mimeType),
]),
});
console.log(generateResponse.usageMetadata);

Go

ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
file,err:=client.Files.UploadFromPath(
ctx,
filepath.Join(getMedia(),"Big_Buck_Bunny.mp4"),
&genai.UploadFileConfig{
MIMEType:"video/mp4",
},
)
iferr!=nil{
log.Fatal(err)
}
// Poll until the video file is completely processed (state becomes ACTIVE).
forfile.State==genai.FileStateUnspecified||file.State!=genai.FileStateActive{
fmt.Println("Processing video...")
fmt.Println("File state:",file.State)
time.Sleep(5*time.Second)
file,err=client.Files.Get(ctx,file.Name,nil)
iferr!=nil{
log.Fatal(err)
}
}
parts:=[]*genai.Part{
genai.NewPartFromText("Tell me about this video"),
genai.NewPartFromURI(file.URI,file.MIMEType),
}
contents:=[]*genai.Content{
genai.NewContentFromParts(parts,genai.RoleUser),
}
tokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println("Multimodal video/audio token count:",tokenResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
fmt.Println(string(usageMetadata))

Shell


MIME_TYPE=$(file-b--mime-type"${VIDEO_PATH}")
NUM_BYTES=$(wc-c < "${VIDEO_PATH}")
DISPLAY_NAME=VIDEO_PATH
# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl"${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}"\
-Dupload-header.tmp\
-H"X-Goog-Upload-Protocol: resumable"\
-H"X-Goog-Upload-Command: start"\
-H"X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}"\
-H"Content-Type: application/json"\
-d"{'file': {'display_name': '${DISPLAY_NAME}'}}"2>/dev/null
upload_url=$(grep-i"x-goog-upload-url: ""${tmp_header_file}"|cut-d" "-f2|tr-d"\r")
rm"${tmp_header_file}"
# Upload the actual bytes.
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upload-Command: upload, finalize"\
--data-binary"@${VIDEO_PATH}"2>/dev/null > file_info.json
file_uri=$(jq".file.uri"file_info.json)
state=$(jq".file.state"file_info.json)
name=$(jq".file.name"file_info.json)
while[["($state)"=*"PROCESSING"*]];
do
echo"Processing video..."
sleep5
# Get the file of interest to check state
curlhttps://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json
state=$(jq".file.state"file_info.json)
done
curl"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:countTokens?key=$GOOGLE_API_KEY"\
-H'Content-Type: application/json'\
-XPOST\
-d'{
 "contents": [{
 "parts":[
 {"text": "Describe this video clip"},
 {"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
 }]
 }'

PDF

Python

fromgoogleimport genai
client = genai.Client()
sample_pdf = client.files.upload(file=media / "test.pdf")
token_count = client.models.count_tokens(
 model="gemini-2.0-flash",
 contents=["Give me a summary of this document.", sample_pdf],
)
print(f"{token_count=}")
response = client.models.generate_content(
 model="gemini-2.0-flash",
 contents=["Give me a summary of this document.", sample_pdf],
)
print(response.usage_metadata)

Cache

Python

fromgoogleimport genai
fromgoogle.genaiimport types
importtime
client = genai.Client()
your_file = client.files.upload(file=media / "a11.txt")
cache = client.caches.create(
 model="gemini-1.5-flash-001",
 config={
 "contents": ["Here the Apollo 11 transcript:", your_file],
 "system_instruction": None,
 "tools": None,
 },
)
# Create a prompt.
prompt = "Please give a short summary of this file."
# Count tokens for the prompt (the cached content is not passed here).
print(client.models.count_tokens(model="gemini-2.0-flash", contents=prompt))
# ( e.g., total_tokens: 9 )
response = client.models.generate_content(
 model="gemini-1.5-flash-001",
 contents=prompt,
 config=types.GenerateContentConfig(
 cached_content=cache.name,
 ),
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: ..., cached_content_token_count: ..., candidates_token_count: ... )
client.caches.delete(name=cache.name)

Node.js

// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
consttextFile=awaitai.files.upload({
file:path.join(media,"a11.txt"),
config:{mimeType:"text/plain"},
});
constcache=awaitai.caches.create({
model:"gemini-1.5-flash-001",
config:{
contents:createUserContent([
"Here the Apollo 11 transcript:",
createPartFromUri(textFile.uri,textFile.mimeType),
]),
system_instruction:null,
tools:null,
},
});
constprompt="Please give a short summary of this file.";
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-1.5-flash-001",
contents:prompt,
config:{cachedContent:cache.name},
});
console.log(generateResponse.usageMetadata);
awaitai.caches.delete({name:cache.name});
return{
totalTokens:countTokensResponse.totalTokens,
usage:generateResponse.usageMetadata,
};
}

Go

ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
file,err:=client.Files.UploadFromPath(
ctx,
filepath.Join(getMedia(),"a11.txt"),
&genai.UploadFileConfig{
MIMEType:"text/plain",
},
)
iferr!=nil{
log.Fatal(err)
}
parts:=[]*genai.Part{
genai.NewPartFromText("Here the Apollo 11 transcript:"),
genai.NewPartFromURI(file.URI,file.MIMEType),
}
contents:=[]*genai.Content{
genai.NewContentFromParts(parts,genai.RoleUser),
}
// Create cached content using a simple slice with text and a file.
cache,err:=client.Caches.Create(ctx,"gemini-1.5-flash-001",&genai.CreateCachedContentConfig{
Contents:contents,
})
iferr!=nil{
log.Fatal(err)
}
prompt:="Please give a short summary of this file."
countResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
},nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Printf("%d",countResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-1.5-flash-001",[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
},&genai.GenerateContentConfig{
CachedContent:cache.Name,
})
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
// Returns `nil` for some reason
fmt.Println(string(usageMetadata))
_,err=client.Caches.Delete(ctx,cache.Name,&genai.DeleteCachedContentConfig{})

System Instruction

Go

ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
// Construct the user message contents.
contents:=[]*genai.Content{
genai.NewContentFromText("Good morning! How are you?",genai.RoleUser),
}
// Set the system instruction as a *genai.Content.
config:=&genai.GenerateContentConfig{
SystemInstruction:genai.NewContentFromText("You are a cat. Your name is Neko.",genai.RoleUser),
}
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,config)
iferr!=nil{
log.Fatal(err)
}
printResponse(response)

Tools

Java

FunctionDeclarationmultiplyDefinition=
defineFunction(
/* name */"multiply",
/* description */"returns a * b.",
/* parameters */Arrays.asList(
Schema.numDouble("a","First parameter"),
Schema.numDouble("b","Second parameter")),
/* required */Arrays.asList("a","b"));
Tooltool=newTool(Arrays.asList(multiplyDefinition),null);
;
// Specify a Gemini model appropriate for your use case
GenerativeModelgm=
newGenerativeModel(
/* modelName */"gemini-1.5-flash",
// Access your API key as a Build Configuration variable (see "Set up your API key"
// above)
/* apiKey */BuildConfig.apiKey,
/* generationConfig (optional) */null,
/* safetySettings (optional) */null,
/* requestOptions (optional) */newRequestOptions(),
/* tools (optional) */Arrays.asList(tool));
GenerativeModelFuturesmodel=GenerativeModelFutures.from(gm);
ContentinputContent=newContent.Builder().addText("What's your name?.").build();
// For illustrative purposes only. You should use an executor that fits your needs.
Executorexecutor=Executors.newSingleThreadExecutor();
// For text-only input
ListenableFuture<CountTokensResponse>countTokensResponse=model.countTokens(inputContent);
Futures.addCallback(
countTokensResponse,
newFutureCallback<CountTokensResponse>(){
@Override
publicvoidonSuccess(CountTokensResponseresult){
inttotalTokens=result.getTotalTokens();
System.out.println("TotalTokens = "+totalTokens);
}
@Override
publicvoidonFailure(Throwablet){
t.printStackTrace();
}
},
executor);

Response body

A response from models.countTokens.

It returns the model's tokenCount for the prompt.

If successful, the response body contains data with the following structure:

Fields
totalTokens integer

The number of tokens that the Model tokenizes the prompt into. Always non-negative.

cachedContentTokenCount integer

Number of tokens in the cached part of the prompt (the cached content).

promptTokensDetails[] object (ModalityTokenCount )

Output only. List of modalities that were processed in the request input.

cacheTokensDetails[] object (ModalityTokenCount )

Output only. List of modalities that were processed in the cached content.

JSON representation
{
 "totalTokens": integer,
 "cachedContentTokenCount": integer,
 "promptTokensDetails": [
 {
 object (ModalityTokenCount )
 }
 ],
 "cacheTokensDetails": [
 {
 object (ModalityTokenCount )
 }
 ]
}

Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Last updated 2025年10月30日 UTC.