Class Evals (1.122.0)

Evals(api_client_: google.genai._api_client.BaseApiClient)

API documentation for Evals class.

Methods

batch_evaluate

batch_evaluate(
 *,
 dataset: typing.Union[
 vertexai._genai.types.EvaluationDataset,
 vertexai._genai.types.EvaluationDatasetDict,
 ],
 metrics: list[
 typing.Union[vertexai._genai.types.Metric, vertexai._genai.types.MetricDict]
 ],
 dest: str,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.EvaluateDatasetConfig,
 vertexai._genai.types.EvaluateDatasetConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluateDatasetOperation

Evaluates a dataset based on a set of given metrics.

create_evaluation_item

create_evaluation_item(
 *,
 evaluation_item_type: vertexai._genai.types.EvaluationItemType,
 gcs_uri: str,
 display_name: typing.Optional[str] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.CreateEvaluationItemConfig,
 vertexai._genai.types.CreateEvaluationItemConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationItem

Creates an EvaluationItem.

create_evaluation_run

create_evaluation_run(
 *,
 name: str,
 dataset: typing.Union[
 vertexai._genai.types.EvaluationRunDataSource,
 vertexai._genai.types.EvaluationDataset,
 ],
 dest: str,
 display_name: typing.Optional[str] = None,
 metrics: typing.Optional[
 list[
 typing.Union[
 vertexai._genai.types.EvaluationRunMetric,
 vertexai._genai.types.EvaluationRunMetricDict,
 ]
 ]
 ] = None,
 agent_info: typing.Optional[vertexai._genai.types.AgentInfo] = None,
 labels: typing.Optional[dict[str, str]] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.CreateEvaluationRunConfig,
 vertexai._genai.types.CreateEvaluationRunConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationRun

Creates an EvaluationRun.

create_evaluation_set

create_evaluation_set(
 *,
 evaluation_items: list[str],
 display_name: typing.Optional[str] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.CreateEvaluationSetConfig,
 vertexai._genai.types.CreateEvaluationSetConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationSet

Creates an EvaluationSet.

evaluate

evaluate(
 *,
 dataset: typing.Union[
 vertexai._genai.types.EvaluationDataset,
 vertexai._genai.types.EvaluationDatasetDict,
 list[
 typing.Union[
 vertexai._genai.types.EvaluationDataset,
 vertexai._genai.types.EvaluationDatasetDict,
 ]
 ],
 ],
 metrics: typing.Optional[
 list[
 typing.Union[vertexai._genai.types.Metric, vertexai._genai.types.MetricDict]
 ]
 ] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.EvaluateMethodConfig,
 vertexai._genai.types.EvaluateMethodConfigDict,
 ]
 ] = None,
 **kwargs
) -> vertexai._genai.types.EvaluationResult

Evaluates candidate responses in the provided dataset(s) using the specified metrics.

evaluate_instances

evaluate_instances(
 *, metric_config: vertexai._genai.types._EvaluateInstancesRequestParameters
) -> vertexai._genai.types.EvaluateInstancesResponse

Evaluates an instance of a model.

generate_rubrics

generate_rubrics(
 *,
 src: typing.Union[str, pd.DataFrame, vertexai._genai.types.EvaluationDataset],
 rubric_group_name: str,
 prompt_template: typing.Optional[str] = None,
 generator_model_config: typing.Optional[genai_types.AutoraterConfigOrDict] = None,
 rubric_content_type: typing.Optional[types.RubricContentType] = None,
 rubric_type_ontology: typing.Optional[list[str]] = None,
 predefined_spec_name: typing.Optional[
 typing.Union[str, types.PrebuiltMetric]
 ] = None,
 metric_spec_parameters: typing.Optional[dict[str, typing.Any]] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.RubricGenerationConfig,
 vertexai._genai.types.RubricGenerationConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationDataset

Generates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.

You can generate rubrics by providing either:

  1. A predefined_spec_name to use a Vertex AI backend recipe.
  2. A prompt_template along with other configuration parameters (generator_model_config, rubric_content_type, rubric_type_ontology) for custom rubric generation.

These two modes are mutually exclusive.

get_evaluation_item

get_evaluation_item(
 *,
 name: str,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.GetEvaluationItemConfig,
 vertexai._genai.types.GetEvaluationItemConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationItem

Retrieves an EvaluationItem from the resource name.

get_evaluation_run

get_evaluation_run(
 *,
 name: str,
 include_evaluation_items: bool = False,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.GetEvaluationRunConfig,
 vertexai._genai.types.GetEvaluationRunConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationRun

Retrieves an EvaluationRun from the resource name.

get_evaluation_set

get_evaluation_set(
 *,
 name: str,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.GetEvaluationSetConfig,
 vertexai._genai.types.GetEvaluationSetConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationSet

Retrieves an EvaluationSet from the resource name.

run

run() -> vertexai._genai.types.EvaluateInstancesResponse

Evaluates an instance of a model.

This should eventually call _evaluate_instances()

run_inference

run_inference(
 *,
 src: typing.Union[
 str, pandas.core.frame.DataFrame, vertexai._genai.types.EvaluationDataset
 ],
 model: typing.Optional[
 typing.Union[str, typing.Callable[[typing.Any], typing.Any]]
 ] = None,
 agent: typing.Optional[typing.Union[str, vertexai._genai.types.AgentEngine]] = None,
 config: typing.Optional[
 typing.Union[
 vertexai._genai.types.EvalRunInferenceConfig,
 vertexai._genai.types.EvalRunInferenceConfigDict,
 ]
 ] = None
) -> vertexai._genai.types.EvaluationDataset

Runs inference on a dataset for evaluation.

Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Last updated 2025年10月30日 UTC.