Source code for mlflow.llama_index

import logging
import os
import tempfile
from typing import Any, Optional, Union

import yaml

import mlflow
from mlflow import pyfunc
from mlflow.exceptions import MlflowException
from mlflow.llama_index.pyfunc_wrapper import create_pyfunc_wrapper
from mlflow.models import Model, ModelInputExample, ModelSignature
from mlflow.models.model import MLMODEL_FILE_NAME, MODEL_CODE_PATH, MODEL_CONFIG
from mlflow.models.signature import _infer_signature_from_input_example
from mlflow.models.utils import (
    _load_model_code_path,
    _save_example,
    _validate_and_get_model_code_path,
)
from mlflow.tracing.provider import trace_disabled
from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
from mlflow.tracking.artifact_utils import _download_artifact_from_uri
from mlflow.utils.annotations import experimental
from mlflow.utils.autologging_utils import autologging_integration
from mlflow.utils.docstring_utils import LOG_MODEL_PARAM_DOCS, format_docstring
from mlflow.utils.environment import (
    _CONDA_ENV_FILE_NAME,
    _CONSTRAINTS_FILE_NAME,
    _PYTHON_ENV_FILE_NAME,
    _REQUIREMENTS_FILE_NAME,
    _mlflow_conda_env,
    _process_conda_env,
    _process_pip_requirements,
    _PythonEnv,
    _validate_env_arguments,
)
from mlflow.utils.file_utils import get_total_file_size, write_to
from mlflow.utils.model_utils import (
    _add_code_from_conf_to_system_path,
    _get_flavor_configuration,
    _validate_and_copy_code_paths,
    _validate_and_copy_file_to_directory,
    _validate_and_get_model_config_from_file,
    _validate_and_prepare_target_save_path,
)
from mlflow.utils.requirements_utils import _get_pinned_requirement

FLAVOR_NAME = "llama_index"
_INDEX_PERSIST_FOLDER = "index"
_SETTINGS_FILE = "settings.json"


_logger = logging.getLogger(__name__)


[docs]def get_default_pip_requirements(): """ Returns: A list of default pip requirements for MLflow Models produced by this flavor. Calls to :func:`save_model()` and :func:`log_model()` produce a pip environment that, at a minimum, contains these requirements. """ return [_get_pinned_requirement("llama-index")]
[docs]def get_default_conda_env(): """ Returns: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ return _mlflow_conda_env(additional_pip_deps=get_default_pip_requirements())
def _validate_engine_type(engine_type: str): from mlflow.llama_index.pyfunc_wrapper import SUPPORTED_ENGINES if engine_type not in SUPPORTED_ENGINES: raise ValueError( f"Currently mlflow only supports the following engine types: " f"{SUPPORTED_ENGINES}. {engine_type} is not supported, so please " "use one of the above types." ) def _get_llama_index_version() -> str: try: import llama_index.core return llama_index.core.__version__ except ImportError: raise MlflowException( "The llama_index module is not installed. " "Please install it via `pip install llama-index`." ) def _supported_classes(): from llama_index.core.base.base_query_engine import BaseQueryEngine from llama_index.core.chat_engine.types import BaseChatEngine from llama_index.core.indices.base import BaseIndex from llama_index.core.retrievers import BaseRetriever supported = (BaseIndex, BaseChatEngine, BaseQueryEngine, BaseRetriever) try: from llama_index.core.workflow import Workflow supported += (Workflow,) except ImportError: pass return supported
[docs]@experimental @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME)) @trace_disabled # Suppress traces while loading model def save_model( llama_index_model, path: str, engine_type: Optional[str] = None, model_config: Optional[Union[str, dict[str, Any]]] = None, code_paths=None, mlflow_model: Optional[Model] = None, signature: Optional[ModelSignature] = None, input_example: Optional[ModelInputExample] = None, pip_requirements: Optional[Union[list[str], str]] = None, extra_pip_requirements: Optional[Union[list[str], str]] = None, conda_env=None, metadata: Optional[dict[str, Any]] = None, ) -> None: """ Save a LlamaIndex model to a path on the local file system. .. attention:: Saving a non-index object is only supported in the 'Model-from-Code' saving mode. Please refer to the `Models From Code Guide <https://www.mlflow.org/docs/latest/model/models-from-code.html>`_ for more information. .. note:: When logging a model, MLflow will automatically save the state of the ``Settings`` object so that you can use the same settings at inference time. However, please note that some information in the ``Settings`` object will not be saved, including: - API keys for avoiding key leakage. - Function objects which are not serializable. Args: llama_index_model: A LlamaIndex object to be saved. Supported model types are: 1. An Index object. 2. An Engine object e.g. ChatEngine, QueryEngine, Retriever. 3. A `Workflow <https://docs.llamaindex.ai/en/stable/module_guides/workflow/>`_ object. 4. A string representing the path to a script contains LlamaIndex model definition of the one of the above types. path: Local path where the serialized model (as YAML) is to be saved. engine_type: Required when saving an Index object to determine the inference interface for the index when loaded as a pyfunc model. This field is **not** required when saving other LlamaIndex objects. The supported values are as follows: - ``"chat"``: load the index as an instance of the LlamaIndex `ChatEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/>`_. - ``"query"``: load the index as an instance of the LlamaIndex `QueryEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/>`_. - ``"retriever"``: load the index as an instance of the LlamaIndex `Retriever <https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/>`_. model_config: The model configuration to apply when loading the model back with ``mlflow.pyfunc.load_model()``. It will be applied in a different way depending on the model type and saving method. See the docstring of :func:`log_model` for more details and usage examples. code_paths: {{ code_paths }} mlflow_model: An MLflow model object that specifies the flavor that this model is being added to. signature: A Model Signature object that describes the input and output Schema of the model. The model signature can be inferred using ``infer_signature`` function of ``mlflow.models.signature``. input_example: {{ input_example }} pip_requirements: {{ pip_requirements }} extra_pip_requirements: {{ extra_pip_requirements }} conda_env: {{ conda_env }} metadata: {{ metadata }} """ from llama_index.core.indices.base import BaseIndex from mlflow.llama_index.serialize_objects import serialize_settings # TODO: make this logic cleaner and maybe a util with tempfile.TemporaryDirectory() as temp_dir: model_or_code_path = _validate_and_prepare_llama_index_model_or_path( llama_index_model, temp_dir ) _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) _validate_and_prepare_target_save_path(path) if isinstance(model_config, str): model_config = _validate_and_get_model_config_from_file(model_config) model_code_path = None if isinstance(model_or_code_path, str): model_code_path = model_or_code_path llama_index_model = _load_model_code_path(model_code_path, model_config) _validate_and_copy_file_to_directory(model_code_path, path, "code") # Warn when user provides `engine_type` argument while saving an engine directly if not isinstance(llama_index_model, BaseIndex) and engine_type is not None: _logger.warning( "The `engine_type` argument is ignored when saving a non-index object." ) elif isinstance(model_or_code_path, BaseIndex): _validate_engine_type(engine_type) llama_index_model = model_or_code_path elif isinstance(model_or_code_path, _supported_classes()): raise MlflowException.invalid_parameter_value( "Saving a non-index object is only supported in the 'Model-from-Code' saving mode. " "The legacy serialization method is exclusively for saving index objects. Please " "pass the path to the script containing the model definition to save a non-index " "object. For more information, see " "https://www.mlflow.org/docs/latest/model/models-from-code.html", ) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() saved_example = _save_example(mlflow_model, input_example, path) if signature is None and saved_example is not None: wrapped_model = create_pyfunc_wrapper(llama_index_model, engine_type, model_config) signature = _infer_signature_from_input_example(saved_example, wrapped_model) elif signature is False: signature = None if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if metadata is not None: mlflow_model.metadata = metadata # NB: llama_index.core.Settings is a singleton that manages the storage/service context # for a given llama_index application. Given it holds the required objects for most of # the index's functionality, we look to serialize the entire object. For components of # the object that are not serializable, we log a warning. settings_path = os.path.join(path, _SETTINGS_FILE) serialize_settings(settings_path) # Do not save the index/engine object in model-from-code saving mode if not isinstance(model_code_path, str) and isinstance(llama_index_model, BaseIndex): _save_index(llama_index_model, path) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.llama_index", conda_env=_CONDA_ENV_FILE_NAME, python_env=_PYTHON_ENV_FILE_NAME, code=code_dir_subpath, model_code_path=model_code_path, model_config=model_config, ) mlflow_model.add_flavor( FLAVOR_NAME, llama_index_version=_get_llama_index_version(), code=code_dir_subpath, engine_type=engine_type, ) if size := get_total_file_size(path): mlflow_model.model_size_bytes = size mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: default_reqs = None if pip_requirements is None: default_reqs = get_default_pip_requirements() inferred_reqs = mlflow.models.infer_pip_requirements( str(path), FLAVOR_NAME, fallback=default_reqs ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
[docs]@experimental @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME)) @trace_disabled # Suppress traces while loading model def log_model( llama_index_model, artifact_path: str, engine_type: Optional[str] = None, model_config: Optional[dict[str, Any]] = None, code_paths: Optional[list[str]] = None, registered_model_name: Optional[str] = None, signature: Optional[ModelSignature] = None, input_example: Optional[ModelInputExample] = None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements: Optional[Union[list[str], str]] = None, extra_pip_requirements: Optional[Union[list[str], str]] = None, conda_env=None, metadata: Optional[dict[str, Any]] = None, **kwargs, ): """ Log a LlamaIndex model as an MLflow artifact for the current run. .. attention:: Saving a non-index object is only supported in the 'Model-from-Code' saving mode. Please refer to the `Models From Code Guide <https://www.mlflow.org/docs/latest/model/models-from-code.html>`_ for more information. .. note:: When logging a model, MLflow will automatically save the state of the ``Settings`` object so that you can use the same settings at inference time. However, please note that some information in the ``Settings`` object will not be saved, including: - API keys for avoiding key leakage. - Function objects which are not serializable. Args: llama_index_model: A LlamaIndex object to be saved. Supported model types are: 1. An Index object. 2. An Engine object e.g. ChatEngine, QueryEngine, Retriever. 3. A `Workflow <https://docs.llamaindex.ai/en/stable/module_guides/workflow/>`_ object. 4. A string representing the path to a script contains LlamaIndex model definition of the one of the above types. artifact_path: Local path where the serialized model (as YAML) is to be saved. engine_type: Required when saving an Index object to determine the inference interface for the index when loaded as a pyfunc model. This field is **not** required when saving other LlamaIndex objects. The supported values are as follows: - ``"chat"``: load the index as an instance of the LlamaIndex `ChatEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/>`_. - ``"query"``: load the index as an instance of the LlamaIndex `QueryEngine <https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/>`_. - ``"retriever"``: load the index as an instance of the LlamaIndex `Retriever <https://docs.llamaindex.ai/en/stable/module_guides/querying/retriever/>`_. model_config: The model configuration to apply when loading the model back with ``mlflow.pyfunc.load_model()``. It will be applied in a different way depending on the model type and saving method: For in-memory Index objects saved directly, it will be passed as keyword arguments to instantiate the LlamaIndex engine with the specified engine type at logging. .. code-block:: python with mlflow.start_run() as run: model_info = mlflow.llama_index.log_model( index, artifact_path="index", engine_type="chat", model_config={"top_k": 10}, ) # When loading back, MLflow will call ``index.as_chat_engine(top_k=10)`` engine = mlflow.pyfunc.load_model(model_info.model_uri) For other model types saved with the `Model-from-Code <https://www.mlflow.org/docs/latest/model/models-from-code.html>` method, the config will be accessed via the :py:class`~mlflow.models.ModelConfig` object within your model code. .. code-block:: python with mlflow.start_run() as run: model_info = mlflow.llama_index.log_model( "model.py", artifact_path="model", model_config={"qdrant_host": "localhost", "qdrant_port": 6333}, ) model.py: .. code-block:: python import mlflow from llama_index.vector_stores.qdrant import QdrantVectorStore import qdrant_client # The model configuration is accessible via the ModelConfig singleton model_config = mlflow.models.ModelConfig() qdrant_host = model_config.get("top_k", 5) qdrant_port = model_config.get("qdrant_port", 6333) client = qdrant_client.Client(host=qdrant_host, port=qdrant_port) vectorstore = QdrantVectorStore(client) # the rest of the model definition... code_paths: {{ code_paths }} registered_model_name: This argument may change or be removed in a future release without warning. If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. signature: A Model Signature object that describes the input and output Schema of the model. The model signature can be inferred using ``infer_signature`` function of `mlflow.models.signature`. input_example: {{ input_example }} await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. pip_requirements: {{ pip_requirements }} extra_pip_requirements: {{ extra_pip_requirements }} conda_env: {{ conda_env }} metadata: {{ metadata }} kwargs: Additional arguments for :py:class:`mlflow.models.model.Model` """ return Model.log( artifact_path=artifact_path, engine_type=engine_type, model_config=model_config, flavor=mlflow.llama_index, registered_model_name=registered_model_name, llama_index_model=llama_index_model, conda_env=conda_env, code_paths=code_paths, signature=signature, input_example=input_example, await_registration_for=await_registration_for, pip_requirements=pip_requirements, extra_pip_requirements=extra_pip_requirements, metadata=metadata, **kwargs, )
def _validate_and_prepare_llama_index_model_or_path(llama_index_model, temp_dir=None): if isinstance(llama_index_model, str): return _validate_and_get_model_code_path(llama_index_model, temp_dir) if not isinstance(llama_index_model, _supported_classes()): supported_cls_names = [cls.__name__ for cls in _supported_classes()] raise MlflowException.invalid_parameter_value( message=f"The provided object of type {type(llama_index_model).__name__} is not " "supported. MLflow llama-index flavor only supports saving LlamaIndex objects " f"subclassed from one of the following classes: {supported_cls_names}.", ) return llama_index_model def _save_index(index, path): """Serialize the index.""" index_path = os.path.join(path, _INDEX_PERSIST_FOLDER) index.storage_context.persist(persist_dir=index_path) def _load_llama_model(path, flavor_conf): """Load the LlamaIndex index/engine/workflow from either model code or serialized index.""" from llama_index.core import StorageContext, load_index_from_storage _add_code_from_conf_to_system_path(path, flavor_conf) # Handle model-from-code pyfunc_flavor_conf = _get_flavor_configuration(model_path=path, flavor_name=pyfunc.FLAVOR_NAME) if model_code_path := pyfunc_flavor_conf.get(MODEL_CODE_PATH): # TODO: The code path saved in the MLModel file is the local absolute path to the code # file when it is saved. We should update the relative path in artifact directory. model_code_path = os.path.join(path, os.path.basename(model_code_path)) model_config = pyfunc_flavor_conf.get(MODEL_CONFIG) or flavor_conf.get(MODEL_CONFIG, {}) if isinstance(model_config, str): config_path = os.path.join(path, os.path.basename(model_config)) model_config = _validate_and_get_model_config_from_file(config_path) return _load_model_code_path(model_code_path, model_config) else: # Use default vector store when loading from the serialized index index_path = os.path.join(path, _INDEX_PERSIST_FOLDER) storage_context = StorageContext.from_defaults(persist_dir=index_path) return load_index_from_storage(storage_context)
[docs]@experimental @trace_disabled # Suppress traces while loading model def load_model(model_uri, dst_path=None): """ Load a LlamaIndex index/engine/workflow from a local file or a run. Args: model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``mlflow-artifacts:/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. dst_path: The local filesystem path to utilize for downloading the model artifact. This directory must already exist if provided. If unspecified, a local output path will be created. Returns: A LlamaIndex index object. """ from mlflow.llama_index.serialize_objects import deserialize_settings local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) settings_path = os.path.join(local_model_path, _SETTINGS_FILE) # NB: Settings is a singleton and can be loaded via llama_index.core.Settings deserialize_settings(settings_path) return _load_llama_model(local_model_path, flavor_conf)
def _load_pyfunc(path, model_config: Optional[dict[str, Any]] = None): from mlflow.llama_index.pyfunc_wrapper import create_pyfunc_wrapper index = load_model(path) flavor_conf = _get_flavor_configuration(model_path=path, flavor_name=FLAVOR_NAME) engine_type = flavor_conf.pop( "engine_type", None ) # Not present when saving an non-index object return create_pyfunc_wrapper(index, engine_type, model_config)
[docs]@experimental def autolog( log_traces: bool = True, disable: bool = False, silent: bool = False, ): """ Enables (or disables) and configures autologging from LlamaIndex to MLflow. Currently, MLflow only supports autologging for tracing. Args: log_traces: If ``True``, traces are logged for LlamaIndex models by using. If ``False``, no traces are collected during inference. Default to ``True``. disable: If ``True``, disables the LlamaIndex autologging integration. If ``False``, enables the LlamaIndex autologging integration. silent: If ``True``, suppress all event logs and warnings from MLflow during LlamaIndex autologging. If ``False``, show all events and warnings. """ from mlflow.llama_index.tracer import remove_llama_index_tracer, set_llama_index_tracer # NB: The @autologging_integration annotation is used for adding shared logic. However, one # caveat is that the wrapped function is NOT executed when disable=True is passed. This prevents # us from running cleaning up logging when autologging is turned off. To workaround this, we # annotate _autolog() instead of this entrypoint, and define the cleanup logic outside it. # TODO: since this implementation is inconsistent, explore a universal way to solve the issue. if log_traces and not disable: set_llama_index_tracer() else: remove_llama_index_tracer() _autolog(log_traces=log_traces, disable=disable, silent=silent)
# This is required by mlflow.autolog() autolog.integration_name = FLAVOR_NAME @autologging_integration(FLAVOR_NAME) def _autolog( log_traces: bool, disable: bool = False, silent: bool = False, ): """ TODO: Implement patching logic for autologging models and artifacts. """