Source code for mlflow.metrics.genai.base

from dataclasses import dataclass
from typing import Dict, Optional, Union

from mlflow.metrics.genai.prompt_template import PromptTemplate
from mlflow.utils.annotations import experimental


[docs]@experimental @dataclass class EvaluationExample: """ Stores the sample example during few shot learning during LLM evaluation Args: input: The input provided to the model output: The output generated by the model score: The score given by the evaluator justification: The justification given by the evaluator grading_context: The grading_context provided to the evaluator for evaluation. Either a dictionary of grading context column names and grading context strings or a single grading context string. .. code-block:: python :caption: Example for creating an EvaluationExample from mlflow.metrics.base import EvaluationExample example = EvaluationExample( input="What is MLflow?", output="MLflow is an open-source platform for managing machine " "learning workflows, including experiment tracking, model packaging, " "versioning, and deployment, simplifying the ML lifecycle.", score=4, justification="The definition effectively explains what MLflow is " "its purpose, and its developer. It could be more concise for a 5-score.", grading_context={ "ground_truth": "MLflow is an open-source platform for managing " "the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, " "a company that specializes in big data and machine learning solutions. MLflow is " "designed to address the challenges that data scientists and machine learning " "engineers face when developing, training, and deploying machine learning models." }, ) print(str(example)) .. code-block:: text :caption: Output Input: What is MLflow? Provided output: "MLflow is an open-source platform for managing machine " "learning workflows, including experiment tracking, model packaging, " "versioning, and deployment, simplifying the ML lifecycle." Provided ground_truth: "MLflow is an open-source platform for managing " "the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, " "a company that specializes in big data and machine learning solutions. MLflow is " "designed to address the challenges that data scientists and machine learning " "engineers face when developing, training, and deploying machine learning models." Score: 4 Justification: "The definition effectively explains what MLflow is " "its purpose, and its developer. It could be more concise for a 5-score." """ output: str score: float justification: str input: Optional[str] = None grading_context: Optional[Union[Dict[str, str], str]] = None def _format_grading_context(self): if isinstance(self.grading_context, dict): return "\n".join( [f"key: {key}\nvalue:\n{value}" for key, value in self.grading_context.items()] ) else: return self.grading_context def __str__(self) -> str: return PromptTemplate( [ """ Example Input: {input} """, """ Example Output: {output} """, """ Additional information used by the model: {grading_context} """, """ Example score: {score} Example justification: {justification} """, ] ).format( input=self.input, output=self.output, grading_context=self._format_grading_context(), score=self.score, justification=self.justification, )