import logging
import os
import tempfile
from typing import Any, Optional, Union
import yaml
import mlflow
from mlflow import pyfunc
from mlflow.exceptions import MlflowException
from mlflow.llama_index.pyfunc_wrapper import create_pyfunc_wrapper
from mlflow.models import Model, ModelInputExample, ModelSignature
from mlflow.models.model import MLMODEL_FILE_NAME, MODEL_CODE_PATH, MODEL_CONFIG
from mlflow.models.signature import _infer_signature_from_input_example
from mlflow.models.utils import (
_load_model_code_path,
_save_example,
_validate_and_get_model_code_path,
)
from mlflow.tracing.provider import trace_disabled
from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
from mlflow.tracking.artifact_utils import _download_artifact_from_uri
from mlflow.utils.annotations import experimental
from mlflow.utils.autologging_utils import autologging_integration
from mlflow.utils.docstring_utils import LOG_MODEL_PARAM_DOCS, format_docstring
from mlflow.utils.environment import (
_CONDA_ENV_FILE_NAME,
_CONSTRAINTS_FILE_NAME,
_PYTHON_ENV_FILE_NAME,
_REQUIREMENTS_FILE_NAME,
_mlflow_conda_env,
_process_conda_env,
_process_pip_requirements,
_PythonEnv,
_validate_env_arguments,
)
from mlflow.utils.file_utils import get_total_file_size, write_to
from mlflow.utils.model_utils import (
_add_code_from_conf_to_system_path,
_get_flavor_configuration,
_validate_and_copy_code_paths,
_validate_and_copy_file_to_directory,
_validate_and_get_model_config_from_file,
_validate_and_prepare_target_save_path,
)
from mlflow.utils.requirements_utils import _get_pinned_requirement
FLAVOR_NAME = "llama_index"
_INDEX_PERSIST_FOLDER = "index"
_SETTINGS_FILE = "settings.json"
_logger = logging.getLogger(__name__)
[docs]def get_default_pip_requirements():
"""
Returns:
A list of default pip requirements for MLflow Models produced by this flavor.
Calls to :func:`save_model()` and :func:`log_model()` produce a pip environment
that, at a minimum, contains these requirements.
"""
return [_get_pinned_requirement("llama-index")]
[docs]def get_default_conda_env():
"""
Returns:
The default Conda environment for MLflow Models produced by calls to
:func:`save_model()` and :func:`log_model()`.
"""
return _mlflow_conda_env(additional_pip_deps=get_default_pip_requirements())
def _validate_engine_type(engine_type: str):
from mlflow.llama_index.pyfunc_wrapper import SUPPORTED_ENGINES
if engine_type not in SUPPORTED_ENGINES:
raise ValueError(
f"Currently mlflow only supports the following engine types: "
f"{SUPPORTED_ENGINES}. {engine_type} is not supported, so please "
"use one of the above types."
)
def _get_llama_index_version() -> str:
try:
import llama_index.core
return llama_index.core.__version__
except ImportError:
raise MlflowException(
"The llama_index module is not installed. "
"Please install it via `pip install llama-index`."
)
def _supported_classes():
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.core.chat_engine.types import BaseChatEngine
from llama_index.core.indices.base import BaseIndex
from llama_index.core.retrievers import BaseRetriever
supported = (BaseIndex, BaseChatEngine, BaseQueryEngine, BaseRetriever)
try:
from llama_index.core.workflow import Workflow
supported += (Workflow,)
except ImportError:
pass
return supported
[docs]@experimental
@format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
@trace_disabled # Suppress traces while loading model
def save_model(
llama_index_model,
path: str,
engine_type: Optional[str] = None,
model_config: Optional[Union[str, dict[str, Any]]] = None,
code_paths=None,
mlflow_model: Optional[Model] = None,
signature: Optional[ModelSignature] = None,
input_example: Optional[ModelInputExample] = None,
pip_requirements: Optional[Union[list[str], str]] = None,
extra_pip_requirements: Optional[Union[list[str], str]] = None,
conda_env=None,
metadata: Optional[dict[str, Any]] = None,
) -> None:
"""
Save a LlamaIndex model to a path on the local file system.
.. attention::
Saving a non-index object is only supported in the 'Model-from-Code' saving mode.
Please refer to the `Models From Code Guide <https://www.mlflow.org/docs/latest/model/models-from-code.html>`_
for more information.
.. note::
When logging a model, MLflow will automatically save the state of the ``Settings``
object so that you can use the same settings at inference time. However, please
note that some information in the ``Settings`` object will not be saved, including:
- API keys for avoiding key leakage.
- Function objects which are not serializable.
Args:
llama_index_model: A LlamaIndex object to be saved. Supported model types are:
1. An Index object.
2. An Engine object e.g. ChatEngine, QueryEngine, Retriever.
3. A `Workflow <https://docs.llamaindex.ai/en/stable/module_guides/workflow/>`_ object.
4. A string representing the path to a script contains LlamaIndex model definition
of the one of the above types.
path: Local path where the serialized model (as YAML) is to be saved.
engine_type: Required when saving an Index object to determine the inference interface
for the index when loaded as a pyfunc model. This field is **not** required when
saving other LlamaIndex objects. The supported values are as follows:
- ``"chat"``: load the index as an instance of the LlamaIndex
`ChatEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/>`_.
- ``"query"``: load the index as an instance of the LlamaIndex
`QueryEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/>`_.
- ``"retriever"``: load the index as an instance of the LlamaIndex
`Retriever <https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/>`_.
model_config: The model configuration to apply when loading the model back with
``mlflow.pyfunc.load_model()``. It will be applied in a different way depending on the
model type and saving method. See the docstring of :func:`log_model` for more details
and usage examples.
code_paths: {{ code_paths }}
mlflow_model: An MLflow model object that specifies the flavor that this model is being
added to.
signature: A Model Signature object that describes the input and output Schema of the
model. The model signature can be inferred using ``infer_signature`` function
of ``mlflow.models.signature``.
input_example: {{ input_example }}
pip_requirements: {{ pip_requirements }}
extra_pip_requirements: {{ extra_pip_requirements }}
conda_env: {{ conda_env }}
metadata: {{ metadata }}
"""
from llama_index.core.indices.base import BaseIndex
from mlflow.llama_index.serialize_objects import serialize_settings
# TODO: make this logic cleaner and maybe a util
with tempfile.TemporaryDirectory() as temp_dir:
model_or_code_path = _validate_and_prepare_llama_index_model_or_path(
llama_index_model, temp_dir
)
_validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements)
path = os.path.abspath(path)
_validate_and_prepare_target_save_path(path)
if isinstance(model_config, str):
model_config = _validate_and_get_model_config_from_file(model_config)
model_code_path = None
if isinstance(model_or_code_path, str):
model_code_path = model_or_code_path
llama_index_model = _load_model_code_path(model_code_path, model_config)
_validate_and_copy_file_to_directory(model_code_path, path, "code")
# Warn when user provides `engine_type` argument while saving an engine directly
if not isinstance(llama_index_model, BaseIndex) and engine_type is not None:
_logger.warning(
"The `engine_type` argument is ignored when saving a non-index object."
)
elif isinstance(model_or_code_path, BaseIndex):
_validate_engine_type(engine_type)
llama_index_model = model_or_code_path
elif isinstance(model_or_code_path, _supported_classes()):
raise MlflowException.invalid_parameter_value(
"Saving a non-index object is only supported in the 'Model-from-Code' saving mode. "
"The legacy serialization method is exclusively for saving index objects. Please "
"pass the path to the script containing the model definition to save a non-index "
"object. For more information, see "
"https://www.mlflow.org/docs/latest/model/models-from-code.html",
)
code_dir_subpath = _validate_and_copy_code_paths(code_paths, path)
if mlflow_model is None:
mlflow_model = Model()
saved_example = _save_example(mlflow_model, input_example, path)
if signature is None and saved_example is not None:
wrapped_model = create_pyfunc_wrapper(llama_index_model, engine_type, model_config)
signature = _infer_signature_from_input_example(saved_example, wrapped_model)
elif signature is False:
signature = None
if mlflow_model is None:
mlflow_model = Model()
if signature is not None:
mlflow_model.signature = signature
if metadata is not None:
mlflow_model.metadata = metadata
# NB: llama_index.core.Settings is a singleton that manages the storage/service context
# for a given llama_index application. Given it holds the required objects for most of
# the index's functionality, we look to serialize the entire object. For components of
# the object that are not serializable, we log a warning.
settings_path = os.path.join(path, _SETTINGS_FILE)
serialize_settings(settings_path)
# Do not save the index/engine object in model-from-code saving mode
if not isinstance(model_code_path, str) and isinstance(llama_index_model, BaseIndex):
_save_index(llama_index_model, path)
pyfunc.add_to_model(
mlflow_model,
loader_module="mlflow.llama_index",
conda_env=_CONDA_ENV_FILE_NAME,
python_env=_PYTHON_ENV_FILE_NAME,
code=code_dir_subpath,
model_code_path=model_code_path,
model_config=model_config,
)
mlflow_model.add_flavor(
FLAVOR_NAME,
llama_index_version=_get_llama_index_version(),
code=code_dir_subpath,
engine_type=engine_type,
)
if size := get_total_file_size(path):
mlflow_model.model_size_bytes = size
mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
if conda_env is None:
default_reqs = None
if pip_requirements is None:
default_reqs = get_default_pip_requirements()
inferred_reqs = mlflow.models.infer_pip_requirements(
str(path), FLAVOR_NAME, fallback=default_reqs
)
default_reqs = sorted(set(inferred_reqs).union(default_reqs))
else:
default_reqs = None
conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
default_reqs,
pip_requirements,
extra_pip_requirements,
)
else:
conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env)
with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
yaml.safe_dump(conda_env, stream=f, default_flow_style=False)
if pip_constraints:
write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints))
write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
_PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
[docs]@experimental
@format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
@trace_disabled # Suppress traces while loading model
def log_model(
llama_index_model,
artifact_path: str,
engine_type: Optional[str] = None,
model_config: Optional[dict[str, Any]] = None,
code_paths: Optional[list[str]] = None,
registered_model_name: Optional[str] = None,
signature: Optional[ModelSignature] = None,
input_example: Optional[ModelInputExample] = None,
await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
pip_requirements: Optional[Union[list[str], str]] = None,
extra_pip_requirements: Optional[Union[list[str], str]] = None,
conda_env=None,
metadata: Optional[dict[str, Any]] = None,
**kwargs,
):
"""
Log a LlamaIndex model as an MLflow artifact for the current run.
.. attention::
Saving a non-index object is only supported in the 'Model-from-Code' saving mode.
Please refer to the `Models From Code Guide <https://www.mlflow.org/docs/latest/model/models-from-code.html>`_
for more information.
.. note::
When logging a model, MLflow will automatically save the state of the ``Settings``
object so that you can use the same settings at inference time. However, please
note that some information in the ``Settings`` object will not be saved, including:
- API keys for avoiding key leakage.
- Function objects which are not serializable.
Args:
llama_index_model: A LlamaIndex object to be saved. Supported model types are:
1. An Index object.
2. An Engine object e.g. ChatEngine, QueryEngine, Retriever.
3. A `Workflow <https://docs.llamaindex.ai/en/stable/module_guides/workflow/>`_ object.
4. A string representing the path to a script contains LlamaIndex model definition
of the one of the above types.
artifact_path: Local path where the serialized model (as YAML) is to be saved.
engine_type: Required when saving an Index object to determine the inference interface
for the index when loaded as a pyfunc model. This field is **not** required when
saving other LlamaIndex objects. The supported values are as follows:
- ``"chat"``: load the index as an instance of the LlamaIndex
`ChatEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/>`_.
- ``"query"``: load the index as an instance of the LlamaIndex
`QueryEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/>`_.
- ``"retriever"``: load the index as an instance of the LlamaIndex
`Retriever <https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/>`_.
model_config: The model configuration to apply when loading the model back with
``mlflow.pyfunc.load_model()``. It will be applied in a different way depending on the
model type and saving method:
For in-memory Index objects saved directly, it will be passed as keyword arguments to
instantiate the LlamaIndex engine with the specified engine type at logging.
.. code-block:: python
with mlflow.start_run() as run:
model_info = mlflow.llama_index.log_model(
index,
artifact_path="index",
engine_type="chat",
model_config={"top_k": 10},
)
# When loading back, MLflow will call ``index.as_chat_engine(top_k=10)``
engine = mlflow.pyfunc.load_model(model_info.model_uri)
For other model types saved with the `Model-from-Code <https://www.mlflow.org/docs/latest/model/models-from-code.html>`
method, the config will be accessed via the :py:class`~mlflow.models.ModelConfig`
object within your model code.
.. code-block:: python
with mlflow.start_run() as run:
model_info = mlflow.llama_index.log_model(
"model.py",
artifact_path="model",
model_config={"qdrant_host": "localhost", "qdrant_port": 6333},
)
model.py:
.. code-block:: python
import mlflow
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client
# The model configuration is accessible via the ModelConfig singleton
model_config = mlflow.models.ModelConfig()
qdrant_host = model_config.get("top_k", 5)
qdrant_port = model_config.get("qdrant_port", 6333)
client = qdrant_client.Client(host=qdrant_host, port=qdrant_port)
vectorstore = QdrantVectorStore(client)
# the rest of the model definition...
code_paths: {{ code_paths }}
registered_model_name: This argument may change or be removed in a
future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
signature: A Model Signature object that describes the input and output Schema of the
model. The model signature can be inferred using ``infer_signature`` function
of `mlflow.models.signature`.
input_example: {{ input_example }}
await_registration_for: Number of seconds to wait for the model version
to finish being created and is in ``READY`` status.
By default, the function waits for five minutes.
Specify 0 or None to skip waiting.
pip_requirements: {{ pip_requirements }}
extra_pip_requirements: {{ extra_pip_requirements }}
conda_env: {{ conda_env }}
metadata: {{ metadata }}
kwargs: Additional arguments for :py:class:`mlflow.models.model.Model`
"""
return Model.log(
artifact_path=artifact_path,
engine_type=engine_type,
model_config=model_config,
flavor=mlflow.llama_index,
registered_model_name=registered_model_name,
llama_index_model=llama_index_model,
conda_env=conda_env,
code_paths=code_paths,
signature=signature,
input_example=input_example,
await_registration_for=await_registration_for,
pip_requirements=pip_requirements,
extra_pip_requirements=extra_pip_requirements,
metadata=metadata,
**kwargs,
)
def _validate_and_prepare_llama_index_model_or_path(llama_index_model, temp_dir=None):
if isinstance(llama_index_model, str):
return _validate_and_get_model_code_path(llama_index_model, temp_dir)
if not isinstance(llama_index_model, _supported_classes()):
supported_cls_names = [cls.__name__ for cls in _supported_classes()]
raise MlflowException.invalid_parameter_value(
message=f"The provided object of type {type(llama_index_model).__name__} is not "
"supported. MLflow llama-index flavor only supports saving LlamaIndex objects "
f"subclassed from one of the following classes: {supported_cls_names}.",
)
return llama_index_model
def _save_index(index, path):
"""Serialize the index."""
index_path = os.path.join(path, _INDEX_PERSIST_FOLDER)
index.storage_context.persist(persist_dir=index_path)
def _load_llama_model(path, flavor_conf):
"""Load the LlamaIndex index/engine/workflow from either model code or serialized index."""
from llama_index.core import StorageContext, load_index_from_storage
_add_code_from_conf_to_system_path(path, flavor_conf)
# Handle model-from-code
pyfunc_flavor_conf = _get_flavor_configuration(model_path=path, flavor_name=pyfunc.FLAVOR_NAME)
if model_code_path := pyfunc_flavor_conf.get(MODEL_CODE_PATH):
# TODO: The code path saved in the MLModel file is the local absolute path to the code
# file when it is saved. We should update the relative path in artifact directory.
model_code_path = os.path.join(path, os.path.basename(model_code_path))
model_config = pyfunc_flavor_conf.get(MODEL_CONFIG) or flavor_conf.get(MODEL_CONFIG, {})
if isinstance(model_config, str):
config_path = os.path.join(path, os.path.basename(model_config))
model_config = _validate_and_get_model_config_from_file(config_path)
return _load_model_code_path(model_code_path, model_config)
else:
# Use default vector store when loading from the serialized index
index_path = os.path.join(path, _INDEX_PERSIST_FOLDER)
storage_context = StorageContext.from_defaults(persist_dir=index_path)
return load_index_from_storage(storage_context)
[docs]@experimental
@trace_disabled # Suppress traces while loading model
def load_model(model_uri, dst_path=None):
"""
Load a LlamaIndex index/engine/workflow from a local file or a run.
Args:
model_uri: The location, in URI format, of the MLflow model. For example:
- ``/Users/me/path/to/local/model``
- ``relative/path/to/local/model``
- ``s3://my_bucket/path/to/model``
- ``runs:/<mlflow_run_id>/run-relative/path/to/model``
- ``mlflow-artifacts:/path/to/model``
For more information about supported URI schemes, see
`Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
artifact-locations>`_.
dst_path: The local filesystem path to utilize for downloading the model artifact.
This directory must already exist if provided. If unspecified, a local output
path will be created.
Returns:
A LlamaIndex index object.
"""
from mlflow.llama_index.serialize_objects import deserialize_settings
local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path)
flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME)
settings_path = os.path.join(local_model_path, _SETTINGS_FILE)
# NB: Settings is a singleton and can be loaded via llama_index.core.Settings
deserialize_settings(settings_path)
return _load_llama_model(local_model_path, flavor_conf)
def _load_pyfunc(path, model_config: Optional[dict[str, Any]] = None):
from mlflow.llama_index.pyfunc_wrapper import create_pyfunc_wrapper
index = load_model(path)
flavor_conf = _get_flavor_configuration(model_path=path, flavor_name=FLAVOR_NAME)
engine_type = flavor_conf.pop(
"engine_type", None
) # Not present when saving an non-index object
return create_pyfunc_wrapper(index, engine_type, model_config)
[docs]@experimental
def autolog(
log_traces: bool = True,
disable: bool = False,
silent: bool = False,
):
"""
Enables (or disables) and configures autologging from LlamaIndex to MLflow. Currently, MLflow
only supports autologging for tracing.
Args:
log_traces: If ``True``, traces are logged for LlamaIndex models by using. If ``False``,
no traces are collected during inference. Default to ``True``.
disable: If ``True``, disables the LlamaIndex autologging integration. If ``False``,
enables the LlamaIndex autologging integration.
silent: If ``True``, suppress all event logs and warnings from MLflow during LlamaIndex
autologging. If ``False``, show all events and warnings.
"""
from mlflow.llama_index.tracer import remove_llama_index_tracer, set_llama_index_tracer
# NB: The @autologging_integration annotation is used for adding shared logic. However, one
# caveat is that the wrapped function is NOT executed when disable=True is passed. This prevents
# us from running cleaning up logging when autologging is turned off. To workaround this, we
# annotate _autolog() instead of this entrypoint, and define the cleanup logic outside it.
# TODO: since this implementation is inconsistent, explore a universal way to solve the issue.
if log_traces and not disable:
set_llama_index_tracer()
else:
remove_llama_index_tracer()
_autolog(log_traces=log_traces, disable=disable, silent=silent)
# This is required by mlflow.autolog()
autolog.integration_name = FLAVOR_NAME
@autologging_integration(FLAVOR_NAME)
def _autolog(
log_traces: bool,
disable: bool = False,
silent: bool = False,
):
"""
TODO: Implement patching logic for autologging models and artifacts.
"""