import hashlib
import json
from typing import Any, Optional, Union
from mlflow.entities._mlflow_object import _MlflowObject
from mlflow.entities.evaluation import Evaluation as EvaluationEntity
from mlflow.entities.evaluation_tag import EvaluationTag # Assuming EvaluationTag is in this module
from mlflow.entities.metric import Metric
from mlflow.evaluation.assessment import Assessment
from mlflow.tracing.utils import TraceJSONEncoder
from mlflow.utils.annotations import experimental
[docs]@experimental
class Evaluation(_MlflowObject):
"""
Evaluation result data.
"""
def __init__(
self,
inputs: dict[str, Any],
outputs: Optional[dict[str, Any]] = None,
inputs_id: Optional[str] = None,
request_id: Optional[str] = None,
targets: Optional[dict[str, Any]] = None,
error_code: Optional[str] = None,
error_message: Optional[str] = None,
assessments: Optional[list[Assessment]] = None,
metrics: Optional[Union[dict[str, float], list[Metric]]] = None,
tags: Optional[dict[str, str]] = None,
):
"""
Construct a new Evaluation instance.
Args:
inputs: Input names and values for evaluation.
outputs: Outputs obtained during inference.
inputs_id: A unique identifier for the input names and values for evaluation.
request_id: The ID of an MLflow Trace corresponding to the inputs and outputs.
targets: Expected values that the model should produce during inference.
error_code: An error code representing any issues encountered during the evaluation.
error_message: A descriptive error message representing any issues encountered during
the evaluation.
assessments: Assessments for the evaluation.
metrics: Objective numerical metrics for the evaluation, e.g., "number of input tokens",
"number of output tokens".
tags: Dictionary of tags associated with the evaluation.
"""
if isinstance(metrics, dict):
metrics = [
Metric(key=key, value=value, timestamp=0, step=0) for key, value in metrics.items()
]
if isinstance(tags, dict):
tags = [EvaluationTag(key=str(key), value=str(value)) for key, value in tags.items()]
self._inputs = inputs
self._outputs = outputs
self._inputs_id = inputs_id or _generate_inputs_id(inputs)
self._request_id = request_id
self._targets = targets
self._error_code = error_code
self._error_message = error_message
self._assessments = assessments
self._metrics = metrics
self._tags = tags
@property
def inputs_id(self) -> str:
"""The evaluation inputs ID."""
return self._inputs_id
@property
def inputs(self) -> dict[str, Any]:
"""The evaluation inputs."""
return self._inputs
@property
def outputs(self) -> Optional[dict[str, Any]]:
"""The evaluation outputs."""
return self._outputs
@property
def request_id(self) -> Optional[str]:
"""The evaluation request ID."""
return self._request_id
@property
def targets(self) -> Optional[dict[str, Any]]:
"""The evaluation targets."""
return self._targets
@property
def error_code(self) -> Optional[str]:
"""The evaluation error code."""
return self._error_code
@property
def error_message(self) -> Optional[str]:
"""The evaluation error message."""
return self._error_message
@property
def assessments(self) -> Optional[list[Assessment]]:
"""The evaluation assessments."""
return self._assessments
@property
def metrics(self) -> Optional[list[Metric]]:
"""The evaluation metrics."""
return self._metrics
@property
def tags(self) -> Optional[dict[str, str]]:
"""The evaluation tags."""
return self._tags
def __eq__(self, __o):
if isinstance(__o, self.__class__):
return self.to_dictionary() == __o.to_dictionary()
return False
def _to_entity(self, run_id: str, evaluation_id: str) -> EvaluationEntity:
"""
Convert the Evaluation object to an EvaluationEntity object.
Returns:
EvaluationEntity: An EvaluationEntity object.
"""
return EvaluationEntity(
evaluation_id=evaluation_id,
run_id=run_id,
inputs_id=self.inputs_id,
inputs=self.inputs,
outputs=self.outputs,
request_id=self.request_id,
targets=self.targets,
error_code=self.error_code,
error_message=self.error_message,
assessments=[assess._to_entity(evaluation_id) for assess in self.assessments]
if self.assessments
else None,
metrics=self.metrics,
tags=self.tags,
)
[docs] def to_dictionary(self) -> dict[str, Any]:
"""
Convert the Evaluation object to a dictionary.
Returns:
dict: The Evaluation object represented as a dictionary.
"""
evaluation_dict = {
"inputs_id": self.inputs_id,
"inputs": self.inputs,
"outputs": self.outputs,
"request_id": self.request_id,
"targets": self.targets,
"error_code": self.error_code,
"error_message": self.error_message,
"assessments": [assess.to_dictionary() for assess in self.assessments]
if self.assessments
else None,
"metrics": [metric.to_dictionary() for metric in self.metrics]
if self.metrics
else None,
"tags": [tag.to_dictionary() for tag in self.tags] if self.tags else None,
}
return {k: v for k, v in evaluation_dict.items() if v is not None}
[docs] @classmethod
def from_dictionary(cls, evaluation_dict: dict[str, Any]):
"""
Create an Evaluation object from a dictionary.
Args:
evaluation_dict (dict): Dictionary containing evaluation information.
Returns:
Evaluation: The Evaluation object created from the dictionary.
"""
assessments = None
if "assessments" in evaluation_dict:
assessments = [
Assessment.from_dictionary(assess) for assess in evaluation_dict["assessments"]
]
metrics = None
if "metrics" in evaluation_dict:
metrics = [Metric.from_dictionary(metric) for metric in evaluation_dict["metrics"]]
tags = None
if "tags" in evaluation_dict:
tags = [EvaluationTag(tag["key"], tag["value"]) for tag in evaluation_dict["tags"]]
return cls(
inputs_id=evaluation_dict["inputs_id"],
inputs=evaluation_dict["inputs"],
outputs=evaluation_dict.get("outputs"),
request_id=evaluation_dict.get("request_id"),
targets=evaluation_dict.get("targets"),
error_code=evaluation_dict.get("error_code"),
error_message=evaluation_dict.get("error_message"),
assessments=assessments,
metrics=metrics,
tags=tags,
)
def _generate_inputs_id(inputs: dict[str, Any]) -> str:
"""
Generates a unique identifier for the inputs.
Args:
inputs (Dict[str, Any]): Input fields used by the model to compute outputs.
Returns:
str: A unique identifier for the inputs.
"""
inputs_json = json.dumps(inputs, sort_keys=True, cls=TraceJSONEncoder)
return hashlib.sha256(inputs_json.encode("utf-8")).hexdigest()