"""Make approximate assertions as "expectations" on test results.This module is designed to be used within test cases decorated with the`@pytest.mark.decorator` decoratorIt allows you to log scores about a test case and optionally make assertions that log as"expectation" feedback to LangSmith.Example usage: .. code-block:: python import pytest from langsmith import expect @pytest.mark.langsmith def test_output_semantically_close(): response = oai_client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Say hello!"}, ], ) response_txt = response.choices[0].message.content # Intended usage expect.embedding_distance( prediction=response_txt, reference="Hello!", ).to_be_less_than(0.9) # Score the test case matcher = expect.edit_distance( prediction=response_txt, reference="Hello!", ) # Apply an assertion and log 'expectation' feedback to LangSmith matcher.to_be_less_than(1) # You can also directly make assertions on values directly expect.value(response_txt).to_contain("Hello!") # Or using a custom check expect.value(response_txt).against(lambda x: "Hello" in x) # You can even use this for basic metric logging within tests expect.score(0.8) expect.score(0.7, key="similarity").to_be_greater_than(0.7)"""# noqa: E501from__future__importannotationsimportatexitimportinspectfromtypingimport(TYPE_CHECKING,Any,Callable,Literal,Optional,Union,overload,)fromlangsmithimportclientasls_clientfromlangsmithimportrun_helpersasrhfromlangsmithimportrun_treesasrtfromlangsmithimportutilsasls_utilsifTYPE_CHECKING:fromlangsmith._internal._edit_distanceimportEditDistanceConfigfromlangsmith._internal._embedding_distanceimportEmbeddingConfig# Sentinel class used until PEP 0661 is acceptedclass_NULL_SENTRY:"""A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior). """# noqa: D205def__bool__(self)->Literal[False]:returnFalsedef__repr__(self)->str:return"NOT_GIVEN"NOT_GIVEN=_NULL_SENTRY()class_Matcher:"""A class for making assertions on expectation values."""def__init__(self,client:Optional[ls_client.Client],key:str,value:Any,_executor:Optional[ls_utils.ContextThreadPoolExecutor]=None,run_id:Optional[str]=None,):self._client=clientself.key=keyself.value=valueself._executor=_executororls_utils.ContextThreadPoolExecutor(max_workers=3)rt=rh.get_current_run_tree()self._run_id=rt.trace_idifrtelserun_iddef_submit_feedback(self,score:int,message:Optional[str]=None)->None:ifnotls_utils.test_tracking_is_disabled():ifnotself._client:self._client=rt.get_cached_client()self._executor.submit(self._client.create_feedback,run_id=self._run_id,key="expectation",score=score,comment=message,)def_assert(self,condition:bool,message:str,method_name:str)->None:try:assertcondition,messageself._submit_feedback(1,message=f"Success: {self.key}.{method_name}")exceptAssertionErrorase:self._submit_feedback(0,repr(e))raiseefromNonedefto_be_less_than(self,value:float)->None:"""Assert that the expectation value is less than the given value. Args: value: The value to compare against. Raises: AssertionError: If the expectation value is not less than the given value. """self._assert(self.value<value,f"Expected {self.key} to be less than {value}, but got {self.value}","to_be_less_than",)defto_be_greater_than(self,value:float)->None:"""Assert that the expectation value is greater than the given value. Args: value: The value to compare against. Raises: AssertionError: If the expectation value is not greater than the given value. """self._assert(self.value>value,f"Expected {self.key} to be greater than {value}, but got {self.value}","to_be_greater_than",)defto_be_between(self,min_value:float,max_value:float)->None:"""Assert that the expectation value is between the given min and max values. Args: min_value: The minimum value (exclusive). max_value: The maximum value (exclusive). Raises: AssertionError: If the expectation value is not between the given min and max. """self._assert(min_value<self.value<max_value,f"Expected {self.key} to be between {min_value} and {max_value},"f" but got {self.value}","to_be_between",)defto_be_approximately(self,value:float,precision:int=2)->None:"""Assert that the expectation value is approximately equal to the given value. Args: value: The value to compare against. precision: The number of decimal places to round to for comparison. Raises: AssertionError: If the rounded expectation value does not equal the rounded given value. """self._assert(round(self.value,precision)==round(value,precision),f"Expected {self.key} to be approximately {value}, but got {self.value}","to_be_approximately",)defto_equal(self,value:float)->None:"""Assert that the expectation value equals the given value. Args: value: The value to compare against. Raises: AssertionError: If the expectation value does not exactly equal the given value. """self._assert(self.value==value,f"Expected {self.key} to be equal to {value}, but got {self.value}","to_equal",)defto_be_none(self)->None:"""Assert that the expectation value is None. Raises: AssertionError: If the expectation value is not None. """self._assert(self.valueisNone,f"Expected {self.key} to be None, but got {self.value}","to_be_none",)defto_contain(self,value:Any)->None:"""Assert that the expectation value contains the given value. Args: value: The value to check for containment. Raises: AssertionError: If the expectation value does not contain the given value. """self._assert(valueinself.value,f"Expected {self.key} to contain {value}, but it does not","to_contain",)# Custom assertionsdefagainst(self,func:Callable,/)->None:"""Assert the expectation value against a custom function. Args: func: A custom function that takes the expectation value as input. Raises: AssertionError: If the custom function returns False. """func_signature=inspect.signature(func)self._assert(func(self.value),f"Assertion {func_signature} failed for {self.key}","against",)
[docs]class_Expect:"""A class for setting expectations on test results."""
[docs]defembedding_distance(self,prediction:str,reference:str,*,config:Optional[EmbeddingConfig]=None,)->_Matcher:"""Compute the embedding distance between the prediction and reference. This logs the embedding distance to LangSmith and returns a `_Matcher` instance for making assertions on the distance value. By default, this uses the OpenAI API for computing embeddings. Args: prediction: The predicted string to compare. reference: The reference string to compare against. config: Optional configuration for the embedding distance evaluator. Supported options: - `encoder`: A custom encoder function to encode the list of input strings to embeddings. Defaults to the OpenAI API. - `metric`: The distance metric to use for comparison. Supported values: "cosine", "euclidean", "manhattan", "chebyshev", "hamming". Returns: A `_Matcher` instance for the embedding distance value. Examples: >>> expect.embedding_distance( ... prediction="hello", ... reference="hi", ... ).to_be_less_than(1.0) """# noqa: E501fromlangsmith._internal._embedding_distanceimportEmbeddingDistanceconfig=configor{}encoder_func="custom"ifconfig.get("encoder")else"openai"evaluator=EmbeddingDistance(config=config)score=evaluator.evaluate(prediction=prediction,reference=reference)src_info={"encoder":encoder_func,"metric":evaluator.distance}self._submit_feedback("embedding_distance",{"score":score,"source_info":src_info,"comment":f"Using {encoder_func}, Metric: {evaluator.distance}",},)return_Matcher(self._client,"embedding_distance",score,_executor=self.executor)
[docs]defedit_distance(self,prediction:str,reference:str,*,config:Optional[EditDistanceConfig]=None,)->_Matcher:"""Compute the string distance between the prediction and reference. This logs the string distance (Damerau-Levenshtein) to LangSmith and returns a `_Matcher` instance for making assertions on the distance value. This depends on the `rapidfuzz` package for string distance computation. Args: prediction: The predicted string to compare. reference: The reference string to compare against. config: Optional configuration for the string distance evaluator. Supported options: - `metric`: The distance metric to use for comparison. Supported values: "damerau_levenshtein", "levenshtein", "jaro", "jaro_winkler", "hamming", "indel". - `normalize_score`: Whether to normalize the score between 0 and 1. Returns: A `_Matcher` instance for the string distance value. Examples: >>> expect.edit_distance("hello", "helo").to_be_less_than(1) """fromlangsmith._internal._edit_distanceimportEditDistanceconfig=configor{}metric=config.get("metric")or"damerau_levenshtein"normalize=config.get("normalize_score",True)evaluator=EditDistance(config=config)score=evaluator.evaluate(prediction=prediction,reference=reference)src_info={"metric":metric,"normalize":normalize}self._submit_feedback("edit_distance",{"score":score,"source_info":src_info,"comment":f"Using {metric}, Normalize: {normalize}",},)return_Matcher(self._client,"edit_distance",score,_executor=self.executor,)
[docs]defvalue(self,value:Any)->_Matcher:"""Create a `_Matcher` instance for making assertions on the given value. Args: value: The value to make assertions on. Returns: A `_Matcher` instance for the given value. Examples: >>> expect.value(10).to_be_less_than(20) """return_Matcher(self._client,"value",value,_executor=self.executor)
[docs]defscore(self,score:Union[float,int,bool],*,key:str="score",source_run_id:Optional[ls_client.ID_TYPE]=None,comment:Optional[str]=None,)->_Matcher:"""Log a numeric score to LangSmith. Args: score: The score value to log. key: The key to use for logging the score. Defaults to "score". Examples: >>> expect.score(0.8) # doctest: +ELLIPSIS <langsmith._expect._Matcher object at ...> >>> expect.score(0.8, key="similarity").to_be_greater_than(0.7) """self._submit_feedback(key,{"score":score,"source_info":{"method":"expect.score"},"source_run_id":source_run_id,"comment":comment,},)return_Matcher(self._client,key,score,_executor=self.executor)