Counting tokens
For a detailed guide on counting tokens using the Gemini API, including how images, audio and video are counted, see the Token counting guide and accompanying Cookbook recipe.
Method: models.countTokens
Runs a model's tokenizer on input Content and returns the token count. Refer to the tokens guide to learn more about tokens.
Endpoint
posthttps: / /generativelanguage.googleapis.com /v1beta /{model=models /*}:countTokens
Path parameters
modelstring
Required. The model's resource name. This serves as an ID for the Model to use.
This name should match a model name returned by the models.list method.
Format: models/{model} It takes the form models/{model}.
Request body
The request body contains data with the following structure:
Optional. The input given to the model as a prompt. This field is ignored when generateContentRequest is set.
Optional. The overall input given to the Model. This includes the prompt as well as other model steering information like system instructions, and/or function declarations for function calling. Models/Contents and generateContentRequests are mutually exclusive. You can either send Model + Contents or a generateContentRequest, but never both.
Example request
Text
Python
fromgoogleimport genai
client = genai.Client()
prompt = "The quick brown fox jumps over the lazy dog."
# Count tokens using the new client method.
total_tokens = client.models.count_tokens(
model="gemini-2.0-flash", contents=prompt
)
print("total_tokens: ", total_tokens)
# ( e.g., total_tokens: 10 )
response = client.models.generate_content(
model="gemini-2.0-flash", contents=prompt
)
# The usage_metadata provides detailed token counts.
print(response.usage_metadata)
# ( e.g., prompt_token_count: 11, candidates_token_count: 73, total_token_count: 84 )Node.js
// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="The quick brown fox jumps over the lazy dog.";
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(generateResponse.usageMetadata);Go
ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
prompt:="The quick brown fox jumps over the lazy dog."
// Convert prompt to a slice of *genai.Content using the helper.
contents:=[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
}
countResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
returnerr
}
fmt.Println("total_tokens:",countResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
fmt.Println(string(usageMetadata))Shell
curlhttps://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY\
-H'Content-Type: application/json'\
-XPOST\
-d'{
"contents": [{
"parts":[{
"text": "The quick brown fox jumps over the lazy dog."
}],
}],
}'Chat
Python
fromgoogleimport genai
fromgoogle.genaiimport types
client = genai.Client()
chat = client.chats.create(
model="gemini-2.0-flash",
history=[
types.Content(
role="user", parts=[types.Part(text="Hi my name is Bob")]
),
types.Content(role="model", parts=[types.Part(text="Hi Bob!")]),
],
)
# Count tokens for the chat history.
print(
client.models.count_tokens(
model="gemini-2.0-flash", contents=chat.get_history()
)
)
# ( e.g., total_tokens: 10 )
response = chat.send_message(
message="In one sentence, explain how a computer works to a young child."
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 25, candidates_token_count: 21, total_token_count: 46 )
# You can count tokens for the combined history and a new message.
extra = types.UserContent(
parts=[
types.Part(
text="What is the meaning of life?",
)
]
)
history = chat.get_history()
history.append(extra)
print(client.models.count_tokens(model="gemini-2.0-flash", contents=history))
# ( e.g., total_tokens: 56 )Node.js
// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
// Initial chat history.
consthistory=[
{role:"user",parts:[{text:"Hi my name is Bob"}]},
{role:"model",parts:[{text:"Hi Bob!"}]},
];
constchat=ai.chats.create({
model:"gemini-2.0-flash",
history:history,
});
// Count tokens for the current chat history.
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:chat.getHistory(),
});
console.log(countTokensResponse.totalTokens);
constchatResponse=awaitchat.sendMessage({
message:"In one sentence, explain how a computer works to a young child.",
});
console.log(chatResponse.usageMetadata);
// Add an extra user message to the history.
constextraMessage={
role:"user",
parts:[{text:"What is the meaning of life?"}],
};
constcombinedHistory=chat.getHistory();
combinedHistory.push(extraMessage);
constcombinedCountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:combinedHistory,
});
console.log(
"Combined history token count:",
combinedCountTokensResponse.totalTokens,
);Go
ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
// Initialize chat with some history.
history:=[]*genai.Content{
{Role:genai.RoleUser,Parts:[]*genai.Part{{Text: "Hi my name is Bob"}}},
{Role:genai.RoleModel,Parts:[]*genai.Part{{Text: "Hi Bob!"}}},
}
chat,err:=client.Chats.Create(ctx,"gemini-2.0-flash",nil,history)
iferr!=nil{
log.Fatal(err)
}
firstTokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",chat.History(false),nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println(firstTokenResp.TotalTokens)
resp,err:=chat.SendMessage(ctx,genai.Part{
Text:"In one sentence, explain how a computer works to a young child."},
)
iferr!=nil{
log.Fatal(err)
}
fmt.Printf("%#v\n",resp.UsageMetadata)
// Append an extra user message and recount.
extra:=genai.NewContentFromText("What is the meaning of life?",genai.RoleUser)
hist:=chat.History(false)
hist=append(hist,extra)
secondTokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",hist,nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println(secondTokenResp.TotalTokens)Shell
curlhttps://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY\
-H'Content-Type: application/json'\
-XPOST\
-d'{
"contents": [
{"role": "user",
"parts": [{"text": "Hi, my name is Bob."}],
},
{"role": "model",
"parts":[{"text": "Hi Bob"}],
},
],
}'Inline media
Python
fromgoogleimport genai
importPIL.Image
client = genai.Client()
prompt = "Tell me about this image"
your_image_file = PIL.Image.open(media / "organ.jpg")
# Count tokens for combined text and inline image.
print(
client.models.count_tokens(
model="gemini-2.0-flash", contents=[prompt, your_image_file]
)
)
# ( e.g., total_tokens: 263 )
response = client.models.generate_content(
model="gemini-2.0-flash", contents=[prompt, your_image_file]
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 264, candidates_token_count: 80, total_token_count: 345 )Node.js
// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="Tell me about this image";
constimageBuffer=fs.readFileSync(path.join(media,"organ.jpg"));
// Convert buffer to base64 string.
constimageBase64=imageBuffer.toString("base64");
// Build contents using createUserContent and createPartFromBase64.
constcontents=createUserContent([
prompt,
createPartFromBase64(imageBase64,"image/jpeg"),
]);
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:contents,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:contents,
});
console.log(generateResponse.usageMetadata);Go
model:=client.GenerativeModel("gemini-1.5-flash")
prompt:="Tell me about this image"
imageFile,err:=os.ReadFile(filepath.Join(testDataDir,"personWorkingOnComputer.jpg"))
iferr!=nil{
log.Fatal(err)
}
// Call `CountTokens` to get the input token count
// of the combined text and file (`total_tokens`).
// An image's display or file size does not affect its token count.
// Optionally, you can call `count_tokens` for the text and file separately.
tokResp,err:=model.CountTokens(ctx,genai.Text(prompt),genai.ImageData("jpeg",imageFile))
iferr!=nil{
log.Fatal(err)
}
fmt.Println("total_tokens:",tokResp.TotalTokens)
// ( total_tokens: 264 )
resp,err:=model.GenerateContent(ctx,genai.Text(prompt),genai.ImageData("jpeg",imageFile))
iferr!=nil{
log.Fatal(err)
}
fmt.Println("prompt_token_count:",resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:",resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:",resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 264, candidates_token_count: 100, total_token_count: 364 )Shell
curl"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:countTokens?key=$GEMINI_API_KEY"\
-H'Content-Type: application/json'\
-XPOST\
-d'{
"contents": [{
"parts":[
{"text": "Tell me about this instrument"},
{
"inline_data": {
"mime_type":"image/jpeg",
"data": "'$(base64$B64FLAGS$IMG_PATH)'"
}
}
]
}]
}'2>/dev/nullVideo
Python
fromgoogleimport genai
importtime
client = genai.Client()
prompt = "Tell me about this video"
your_file = client.files.upload(file=media / "Big_Buck_Bunny.mp4")
# Poll until the video file is completely processed (state becomes ACTIVE).
while not your_file.state or your_file.state.name != "ACTIVE":
print("Processing video...")
print("File state:", your_file.state)
time.sleep(5)
your_file = client.files.get(name=your_file.name)
print(
client.models.count_tokens(
model="gemini-2.0-flash", contents=[prompt, your_file]
)
)
# ( e.g., total_tokens: 300 )
response = client.models.generate_content(
model="gemini-2.0-flash", contents=[prompt, your_file]
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: 301, candidates_token_count: 60, total_token_count: 361 )Node.js
// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
constprompt="Tell me about this video";
letvideoFile=awaitai.files.upload({
file:path.join(media,"Big_Buck_Bunny.mp4"),
config:{mimeType:"video/mp4"},
});
// Poll until the video file is completely processed (state becomes ACTIVE).
while(!videoFile.state||videoFile.state.toString()!=="ACTIVE"){
console.log("Processing video...");
console.log("File state: ",videoFile.state);
awaitsleep(5000);
videoFile=awaitai.files.get({name:videoFile.name});
}
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:createUserContent([
prompt,
createPartFromUri(videoFile.uri,videoFile.mimeType),
]),
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-2.0-flash",
contents:createUserContent([
prompt,
createPartFromUri(videoFile.uri,videoFile.mimeType),
]),
});
console.log(generateResponse.usageMetadata);Go
ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
file,err:=client.Files.UploadFromPath(
ctx,
filepath.Join(getMedia(),"Big_Buck_Bunny.mp4"),
&genai.UploadFileConfig{
MIMEType:"video/mp4",
},
)
iferr!=nil{
log.Fatal(err)
}
// Poll until the video file is completely processed (state becomes ACTIVE).
forfile.State==genai.FileStateUnspecified||file.State!=genai.FileStateActive{
fmt.Println("Processing video...")
fmt.Println("File state:",file.State)
time.Sleep(5*time.Second)
file,err=client.Files.Get(ctx,file.Name,nil)
iferr!=nil{
log.Fatal(err)
}
}
parts:=[]*genai.Part{
genai.NewPartFromText("Tell me about this video"),
genai.NewPartFromURI(file.URI,file.MIMEType),
}
contents:=[]*genai.Content{
genai.NewContentFromParts(parts,genai.RoleUser),
}
tokenResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Println("Multimodal video/audio token count:",tokenResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,nil)
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
fmt.Println(string(usageMetadata))Shell
MIME_TYPE=$(file-b--mime-type"${VIDEO_PATH}")
NUM_BYTES=$(wc-c < "${VIDEO_PATH}")
DISPLAY_NAME=VIDEO_PATH
# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl"${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}"\
-Dupload-header.tmp\
-H"X-Goog-Upload-Protocol: resumable"\
-H"X-Goog-Upload-Command: start"\
-H"X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}"\
-H"Content-Type: application/json"\
-d"{'file': {'display_name': '${DISPLAY_NAME}'}}"2>/dev/null
upload_url=$(grep-i"x-goog-upload-url: ""${tmp_header_file}"|cut-d" "-f2|tr-d"\r")
rm"${tmp_header_file}"
# Upload the actual bytes.
curl"${upload_url}"\
-H"Content-Length: ${NUM_BYTES}"\
-H"X-Goog-Upload-Offset: 0"\
-H"X-Goog-Upload-Command: upload, finalize"\
--data-binary"@${VIDEO_PATH}"2>/dev/null > file_info.json
file_uri=$(jq".file.uri"file_info.json)
state=$(jq".file.state"file_info.json)
name=$(jq".file.name"file_info.json)
while[["($state)"=*"PROCESSING"*]];
do
echo"Processing video..."
sleep5
# Get the file of interest to check state
curlhttps://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json
state=$(jq".file.state"file_info.json)
done
curl"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:countTokens?key=$GOOGLE_API_KEY"\
-H'Content-Type: application/json'\
-XPOST\
-d'{
"contents": [{
"parts":[
{"text": "Describe this video clip"},
{"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
}]
}'Python
fromgoogleimport genai
client = genai.Client()
sample_pdf = client.files.upload(file=media / "test.pdf")
token_count = client.models.count_tokens(
model="gemini-2.0-flash",
contents=["Give me a summary of this document.", sample_pdf],
)
print(f"{token_count=}")
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=["Give me a summary of this document.", sample_pdf],
)
print(response.usage_metadata)Cache
Python
fromgoogleimport genai
fromgoogle.genaiimport types
importtime
client = genai.Client()
your_file = client.files.upload(file=media / "a11.txt")
cache = client.caches.create(
model="gemini-1.5-flash-001",
config={
"contents": ["Here the Apollo 11 transcript:", your_file],
"system_instruction": None,
"tools": None,
},
)
# Create a prompt.
prompt = "Please give a short summary of this file."
# Count tokens for the prompt (the cached content is not passed here).
print(client.models.count_tokens(model="gemini-2.0-flash", contents=prompt))
# ( e.g., total_tokens: 9 )
response = client.models.generate_content(
model="gemini-1.5-flash-001",
contents=prompt,
config=types.GenerateContentConfig(
cached_content=cache.name,
),
)
print(response.usage_metadata)
# ( e.g., prompt_token_count: ..., cached_content_token_count: ..., candidates_token_count: ... )
client.caches.delete(name=cache.name)Node.js
// Make sure to include the following import:
// import {GoogleGenAI} from '@google/genai';
constai=newGoogleGenAI({apiKey:process.env.GEMINI_API_KEY});
consttextFile=awaitai.files.upload({
file:path.join(media,"a11.txt"),
config:{mimeType:"text/plain"},
});
constcache=awaitai.caches.create({
model:"gemini-1.5-flash-001",
config:{
contents:createUserContent([
"Here the Apollo 11 transcript:",
createPartFromUri(textFile.uri,textFile.mimeType),
]),
system_instruction:null,
tools:null,
},
});
constprompt="Please give a short summary of this file.";
constcountTokensResponse=awaitai.models.countTokens({
model:"gemini-2.0-flash",
contents:prompt,
});
console.log(countTokensResponse.totalTokens);
constgenerateResponse=awaitai.models.generateContent({
model:"gemini-1.5-flash-001",
contents:prompt,
config:{cachedContent:cache.name},
});
console.log(generateResponse.usageMetadata);
awaitai.caches.delete({name:cache.name});
return{
totalTokens:countTokensResponse.totalTokens,
usage:generateResponse.usageMetadata,
};
}
Go
ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
file,err:=client.Files.UploadFromPath(
ctx,
filepath.Join(getMedia(),"a11.txt"),
&genai.UploadFileConfig{
MIMEType:"text/plain",
},
)
iferr!=nil{
log.Fatal(err)
}
parts:=[]*genai.Part{
genai.NewPartFromText("Here the Apollo 11 transcript:"),
genai.NewPartFromURI(file.URI,file.MIMEType),
}
contents:=[]*genai.Content{
genai.NewContentFromParts(parts,genai.RoleUser),
}
// Create cached content using a simple slice with text and a file.
cache,err:=client.Caches.Create(ctx,"gemini-1.5-flash-001",&genai.CreateCachedContentConfig{
Contents:contents,
})
iferr!=nil{
log.Fatal(err)
}
prompt:="Please give a short summary of this file."
countResp,err:=client.Models.CountTokens(ctx,"gemini-2.0-flash",[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
},nil)
iferr!=nil{
log.Fatal(err)
}
fmt.Printf("%d",countResp.TotalTokens)
response,err:=client.Models.GenerateContent(ctx,"gemini-1.5-flash-001",[]*genai.Content{
genai.NewContentFromText(prompt,genai.RoleUser),
},&genai.GenerateContentConfig{
CachedContent:cache.Name,
})
iferr!=nil{
log.Fatal(err)
}
usageMetadata,err:=json.MarshalIndent(response.UsageMetadata,""," ")
iferr!=nil{
log.Fatal(err)
}
// Returns `nil` for some reason
fmt.Println(string(usageMetadata))
_,err=client.Caches.Delete(ctx,cache.Name,&genai.DeleteCachedContentConfig{})System Instruction
Go
ctx:=context.Background()
client,err:=genai.NewClient(ctx,&genai.ClientConfig{
APIKey:os.Getenv("GEMINI_API_KEY"),
Backend:genai.BackendGeminiAPI,
})
iferr!=nil{
log.Fatal(err)
}
// Construct the user message contents.
contents:=[]*genai.Content{
genai.NewContentFromText("Good morning! How are you?",genai.RoleUser),
}
// Set the system instruction as a *genai.Content.
config:=&genai.GenerateContentConfig{
SystemInstruction:genai.NewContentFromText("You are a cat. Your name is Neko.",genai.RoleUser),
}
response,err:=client.Models.GenerateContent(ctx,"gemini-2.0-flash",contents,config)
iferr!=nil{
log.Fatal(err)
}
printResponse(response)Tools
Java
FunctionDeclarationmultiplyDefinition=
defineFunction(
/* name */"multiply",
/* description */"returns a * b.",
/* parameters */Arrays.asList(
Schema.numDouble("a","First parameter"),
Schema.numDouble("b","Second parameter")),
/* required */Arrays.asList("a","b"));
Tooltool=newTool(Arrays.asList(multiplyDefinition),null);
;
// Specify a Gemini model appropriate for your use case
GenerativeModelgm=
newGenerativeModel(
/* modelName */"gemini-1.5-flash",
// Access your API key as a Build Configuration variable (see "Set up your API key"
// above)
/* apiKey */BuildConfig.apiKey,
/* generationConfig (optional) */null,
/* safetySettings (optional) */null,
/* requestOptions (optional) */newRequestOptions(),
/* tools (optional) */Arrays.asList(tool));
GenerativeModelFuturesmodel=GenerativeModelFutures.from(gm);
ContentinputContent=newContent.Builder().addText("What's your name?.").build();
// For illustrative purposes only. You should use an executor that fits your needs.
Executorexecutor=Executors.newSingleThreadExecutor();
// For text-only input
ListenableFuture<CountTokensResponse>countTokensResponse=model.countTokens(inputContent);
Futures.addCallback(
countTokensResponse,
newFutureCallback<CountTokensResponse>(){
@Override
publicvoidonSuccess(CountTokensResponseresult){
inttotalTokens=result.getTotalTokens();
System.out.println("TotalTokens = "+totalTokens);
}
@Override
publicvoidonFailure(Throwablet){
t.printStackTrace();
}
},
executor);Response body
A response from models.countTokens.
It returns the model's tokenCount for the prompt.
If successful, the response body contains data with the following structure:
totalTokensinteger
The number of tokens that the Model tokenizes the prompt into. Always non-negative.
cachedContentTokenCountinteger
Number of tokens in the cached part of the prompt (the cached content).
Output only. List of modalities that were processed in the request input.
Output only. List of modalities that were processed in the cached content.
| JSON representation |
|---|
{ "totalTokens": integer, "cachedContentTokenCount": integer, "promptTokensDetails": [ { object ( |