Source code for langsmith._expect

"""Make approximate assertions as "expectations" on test results.

This module is designed to be used within test cases decorated with the
`@pytest.mark.decorator` decorator
It allows you to log scores about a test case and optionally make assertions that log as
"expectation" feedback to LangSmith.

Example usage:
    .. code-block:: python

        import pytest
        from langsmith import expect

        def test_output_semantically_close():
            response =
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": "Say hello!"},
            response_txt = response.choices[0].message.content
            # Intended usage

            # Score the test case
            matcher = expect.edit_distance(
            # Apply an assertion and log 'expectation' feedback to LangSmith

            # You can also directly make assertions on values directly
            # Or using a custom check
            expect.value(response_txt).against(lambda x: "Hello" in x)

            # You can even use this for basic metric logging within tests

            expect.score(0.7, key="similarity").to_be_greater_than(0.7)
"""  # noqa: E501

from __future__ import annotations

import atexit
import inspect
from typing import (

from langsmith import client as ls_client
from langsmith import run_helpers as rh
from langsmith import run_trees as rt
from langsmith import utils as ls_utils

    from langsmith._internal._edit_distance import EditDistanceConfig
    from langsmith._internal._embedding_distance import EmbeddingConfig

# Sentinel class used until PEP 0661 is accepted
    """A sentinel singleton class used to distinguish omitted keyword arguments
    from those passed in with the value None (which may have different behavior).
    """  # noqa: D205

    def __bool__(self) -> Literal[False]:
        return False

    def __repr__(self) -> str:
        return "NOT_GIVEN"


class _Matcher:
    """A class for making assertions on expectation values."""

    def __init__(
        client: Optional[ls_client.Client],
        key: str,
        value: Any,
        _executor: Optional[ls_utils.ContextThreadPoolExecutor] = None,
        run_id: Optional[str] = None,
        self._client = client
        self.key = key
        self.value = value
        self._executor = _executor or ls_utils.ContextThreadPoolExecutor(max_workers=3)
        rt = rh.get_current_run_tree()
        self._run_id = rt.trace_id if rt else run_id

    def _submit_feedback(self, score: int, message: Optional[str] = None) -> None:
        if not ls_utils.test_tracking_is_disabled():
            if not self._client:
                self._client = rt.get_cached_client()

    def _assert(self, condition: bool, message: str, method_name: str) -> None:
            assert condition, message
            self._submit_feedback(1, message=f"Success: {self.key}.{method_name}")
        except AssertionError as e:
            self._submit_feedback(0, repr(e))
            raise e from None

    def to_be_less_than(self, value: float) -> None:
        """Assert that the expectation value is less than the given value.

            value: The value to compare against.

            AssertionError: If the expectation value is not less than the given value.
            self.value < value,
            f"Expected {self.key} to be less than {value}, but got {self.value}",

    def to_be_greater_than(self, value: float) -> None:
        """Assert that the expectation value is greater than the given value.

            value: The value to compare against.

            AssertionError: If the expectation value is not
            greater than the given value.
            self.value > value,
            f"Expected {self.key} to be greater than {value}, but got {self.value}",

    def to_be_between(self, min_value: float, max_value: float) -> None:
        """Assert that the expectation value is between the given min and max values.

            min_value: The minimum value (exclusive).
            max_value: The maximum value (exclusive).

            AssertionError: If the expectation value
                is not between the given min and max.
            min_value < self.value < max_value,
            f"Expected {self.key} to be between {min_value} and {max_value},"
            f" but got {self.value}",

    def to_be_approximately(self, value: float, precision: int = 2) -> None:
        """Assert that the expectation value is approximately equal to the given value.

            value: The value to compare against.
            precision: The number of decimal places to round to for comparison.

            AssertionError: If the rounded expectation value
                does not equal the rounded given value.
            round(self.value, precision) == round(value, precision),
            f"Expected {self.key} to be approximately {value}, but got {self.value}",

    def to_equal(self, value: float) -> None:
        """Assert that the expectation value equals the given value.

            value: The value to compare against.

            AssertionError: If the expectation value does
                not exactly equal the given value.
            self.value == value,
            f"Expected {self.key} to be equal to {value}, but got {self.value}",

    def to_be_none(self) -> None:
        """Assert that the expectation value is None.

            AssertionError: If the expectation value is not None.
            self.value is None,
            f"Expected {self.key} to be None, but got {self.value}",

    def to_contain(self, value: Any) -> None:
        """Assert that the expectation value contains the given value.

            value: The value to check for containment.

            AssertionError: If the expectation value does not contain the given value.
            value in self.value,
            f"Expected {self.key} to contain {value}, but it does not",

    # Custom assertions
    def against(self, func: Callable, /) -> None:
        """Assert the expectation value against a custom function.

            func: A custom function that takes the expectation value as input.

            AssertionError: If the custom function returns False.
        func_signature = inspect.signature(func)
            f"Assertion {func_signature} failed for {self.key}",

[docs] class _Expect: """A class for setting expectations on test results."""
[docs] def __init__(self, *, client: Optional[ls_client.Client] = None): self._client = client self.executor = ls_utils.ContextThreadPoolExecutor(max_workers=3) atexit.register(self.executor.shutdown, wait=True)
[docs] def embedding_distance( self, prediction: str, reference: str, *, config: Optional[EmbeddingConfig] = None, ) -> _Matcher: """Compute the embedding distance between the prediction and reference. This logs the embedding distance to LangSmith and returns a `_Matcher` instance for making assertions on the distance value. By default, this uses the OpenAI API for computing embeddings. Args: prediction: The predicted string to compare. reference: The reference string to compare against. config: Optional configuration for the embedding distance evaluator. Supported options: - `encoder`: A custom encoder function to encode the list of input strings to embeddings. Defaults to the OpenAI API. - `metric`: The distance metric to use for comparison. Supported values: "cosine", "euclidean", "manhattan", "chebyshev", "hamming". Returns: A `_Matcher` instance for the embedding distance value. Examples: >>> expect.embedding_distance( ... prediction="hello", ... reference="hi", ... ).to_be_less_than(1.0) """ # noqa: E501 from langsmith._internal._embedding_distance import EmbeddingDistance config = config or {} encoder_func = "custom" if config.get("encoder") else "openai" evaluator = EmbeddingDistance(config=config) score = evaluator.evaluate(prediction=prediction, reference=reference) src_info = {"encoder": encoder_func, "metric": evaluator.distance} self._submit_feedback( "embedding_distance", { "score": score, "source_info": src_info, "comment": f"Using {encoder_func}, Metric: {evaluator.distance}", }, ) return _Matcher( self._client, "embedding_distance", score, _executor=self.executor )
[docs] def edit_distance( self, prediction: str, reference: str, *, config: Optional[EditDistanceConfig] = None, ) -> _Matcher: """Compute the string distance between the prediction and reference. This logs the string distance (Damerau-Levenshtein) to LangSmith and returns a `_Matcher` instance for making assertions on the distance value. This depends on the `rapidfuzz` package for string distance computation. Args: prediction: The predicted string to compare. reference: The reference string to compare against. config: Optional configuration for the string distance evaluator. Supported options: - `metric`: The distance metric to use for comparison. Supported values: "damerau_levenshtein", "levenshtein", "jaro", "jaro_winkler", "hamming", "indel". - `normalize_score`: Whether to normalize the score between 0 and 1. Returns: A `_Matcher` instance for the string distance value. Examples: >>> expect.edit_distance("hello", "helo").to_be_less_than(1) """ from langsmith._internal._edit_distance import EditDistance config = config or {} metric = config.get("metric") or "damerau_levenshtein" normalize = config.get("normalize_score", True) evaluator = EditDistance(config=config) score = evaluator.evaluate(prediction=prediction, reference=reference) src_info = {"metric": metric, "normalize": normalize} self._submit_feedback( "edit_distance", { "score": score, "source_info": src_info, "comment": f"Using {metric}, Normalize: {normalize}", }, ) return _Matcher( self._client, "edit_distance", score, _executor=self.executor, )
[docs] def value(self, value: Any) -> _Matcher: """Create a `_Matcher` instance for making assertions on the given value. Args: value: The value to make assertions on. Returns: A `_Matcher` instance for the given value. Examples: >>> expect.value(10).to_be_less_than(20) """ return _Matcher(self._client, "value", value, _executor=self.executor)
[docs] def score( self, score: Union[float, int, bool], *, key: str = "score", source_run_id: Optional[ls_client.ID_TYPE] = None, comment: Optional[str] = None, ) -> _Matcher: """Log a numeric score to LangSmith. Args: score: The score value to log. key: The key to use for logging the score. Defaults to "score". Examples: >>> expect.score(0.8) # doctest: +ELLIPSIS <langsmith._expect._Matcher object at ...> >>> expect.score(0.8, key="similarity").to_be_greater_than(0.7) """ self._submit_feedback( key, { "score": score, "source_info": {"method": "expect.score"}, "source_run_id": source_run_id, "comment": comment, }, ) return _Matcher(self._client, key, score, _executor=self.executor)
## Private Methods @overload def __call__(self, value: Any, /) -> _Matcher: ... @overload def __call__(self, /, *, client: ls_client.Client) -> _Expect: ... def __call__( self, value: Optional[Any] = NOT_GIVEN, /, client: Optional[ls_client.Client] = None, ) -> Union[_Expect, _Matcher]: expected = _Expect(client=client) if value is not NOT_GIVEN: return expected.value(value) return expected def _submit_feedback(self, key: str, results: dict): current_run = rh.get_current_run_tree() run_id = current_run.trace_id if current_run else None if not ls_utils.test_tracking_is_disabled(): if not self._client: self._client = rt.get_cached_client() self.executor.submit( self._client.create_feedback, run_id=run_id, key=key, **results )
expect = _Expect() __all__ = ["expect"]