Source code for langcheck.metrics.eval_clients._anthropic

from __future__ import annotations

import asyncio
import os
import warnings
from typing import Any

from anthropic import (
    Anthropic,
    AnthropicVertex,
    AsyncAnthropic,
    AsyncAnthropicVertex,
)

from langcheck.metrics.eval_clients.eval_response import (
    ResponsesWithMetadata,
)
from langcheck.utils.progress_bar import tqdm_wrapper

from ..prompts._utils import get_template
from ._base import EvalClient
from .extractor import Extractor


[docs] class AnthropicEvalClient(EvalClient): """EvalClient defined for Anthropic API.""" def __init__( self, anthropic_client: Anthropic | AsyncAnthropic | AnthropicVertex | AsyncAnthropicVertex | None = None, anthropic_args: dict[str, Any] | None = None, *, use_async: bool = False, vertexai: bool = False, system_prompt: str | None = None, extractor: Extractor | None = None, ): """ Initialize the Anthropic evaluation client. The authentication information is automatically read from the environment variables. If you want to use Anthropic API, please set `ANTHROPIC_API_KEY`. If you want to use Vertex AI API, set the `vertexai` argument to True, and please set the following environment variables: - ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id> - CLOUD_ML_REGION=<region> (e.g. europe-west1) - GOOGLE_APPLICATION_CREDENTIALS=<path-to-credentials-file> References: - https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude - https://cloud.google.com/docs/authentication/application-default-credentials Args: anthropic_client (Optional): The Anthropic client to use. anthropic_args (Optional): dict of additional args to pass in to the `client.messages.create` function use_async: If True, the async client will be used. Ignored when `anthropic_client` is provided. Defaults to False. vertexai: If True, the Vertex AI client will be used. Ignored when `anthropic_client` is provided. Defaults to False. system_prompt (Optional): The system prompt to use. If not provided, no system prompt will be used. extractor (Optional): The extractor to use. If not provided, the default extractor will be used. """ warnings.warn( "AnthropicEvalClient will be deprecated in the next release." "Please use LiteLLMEvalClient instead." ) if anthropic_client is None: if vertexai: # Vertex AI requires these environment variables for env_var in [ "ANTHROPIC_VERTEX_PROJECT_ID", "CLOUD_ML_REGION", "GOOGLE_APPLICATION_CREDENTIALS", ]: if not os.environ.get(env_var): raise ValueError( f"Environment variable '{env_var}' must be set when using Vertex AI." ) if not os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID"): raise ValueError( "`ANTHROPIC_VERTEX_PROJECT_ID` must be set when using Vertex AI." ) # Warn that `ANTHROPIC_API_KEY` is not used when using Vertex AI if os.environ.get("ANTHROPIC_API_KEY", None): warnings.warn( "`ANTHROPIC_API_KEY` is set when using Vertex AI. " "Vertex AI will take precedence over the API key from " "the environment variable." ) if use_async: self._client = AsyncAnthropicVertex() else: self._client = AnthropicVertex() else: if os.environ.get("ANTHROPIC_API_KEY", None) is None: raise ValueError( "`ANTHROPIC_API_KEY` is not set when using Anthropic API. " "Please set the `ANTHROPIC_API_KEY` environment variable." ) if use_async: self._client = AsyncAnthropic() else: self._client = Anthropic() self._vertexai = vertexai self._use_async = use_async else: self._client = anthropic_client self._vertexai = isinstance( anthropic_client, (AnthropicVertex, AsyncAnthropicVertex) ) self._use_async = isinstance( anthropic_client, (AsyncAnthropic, AsyncAnthropicVertex) ) # Client config will take precedence over the argument, and the # argument will be ignored. if self._vertexai and not vertexai: warnings.warn( "The provided `anthropic_client` is a Vertex AI client, " "so the `vertexai=False` argument will be ignored. The Vertex AI client will be used." ) elif not self._vertexai and vertexai: warnings.warn( "The provided `anthropic_client` is an Anthropic client, " "so the `vertexai=True` argument will be ignored. The Anthropic client will be used." ) if self._use_async and not use_async: warnings.warn( "The provided `anthropic_client` is an async client, " "so the `use_async=False` argument will be ignored. The async client will be used." ) elif not self._use_async and use_async: warnings.warn( "The provided `anthropic_client` is a synchronous client, " "so the `use_async=True` argument will be ignored. The synchronous client will be used." ) self._anthropic_args = anthropic_args or {} self._system_prompt = system_prompt if system_prompt and "system" in self._anthropic_args: warnings.warn( '"system" of anthropic_args will be ignored because ' "system_prompt is provided." ) if extractor is None: self._extractor = AnthropicExtractor( anthropic_client=self._client, use_async=self._use_async, vertexai=self._vertexai, ) else: self._extractor = extractor
[docs] def get_text_responses( self, prompts: list[str], *, tqdm_description: str | None = None, ) -> ResponsesWithMetadata[str]: """The function that gets responses to the given prompt texts. We use Anthropic's 'claude-3-haiku-20240307' model by default, but you can configure it by passing the 'model' parameter in the anthropic_args. Args: prompts: The prompts you want to get the responses for. Returns: A list of responses to the prompts. The responses can be None if the evaluation fails. """ config = { # The model names are slightly different for Anthropic API and Vertex AI API # Reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models "model": "claude-3-haiku@20240307" if self._vertexai else "claude-3-haiku-20240307", "max_tokens": 4096, "temperature": 0.0, } config.update(self._anthropic_args or {}) tqdm_description = tqdm_description or "Intermediate assessments (1/2)" responses = _call_api( client=self._client, prompts=prompts, config=config, use_async=self._use_async, tqdm_description=tqdm_description, system_prompt=self._system_prompt, ) response_texts = [ response.content[0].text if response else None for response in responses ] # Token usage is not supported in AnthropicEvalClient # If you need token usage, please use LiteLLMEvalClient instead. return ResponsesWithMetadata(response_texts, None)
[docs] def similarity_scorer(self): raise NotImplementedError( "Embedding-based metrics are not supported in AnthropicEvalClient." "Use other EvalClients to get these metrics." )
[docs] class AnthropicExtractor(Extractor): """Score extractor for Anthropic API.""" def __init__( self, anthropic_client: Anthropic | AsyncAnthropic | AnthropicVertex | AsyncAnthropicVertex | None = None, anthropic_args: dict[str, Any] | None = None, *, use_async: bool = False, vertexai: bool = False, ): """ Initialize the Anthropic score extractor. The authentication information is automatically read from the environment variables. If you want to use Anthropic API, please set `ANTHROPIC_API_KEY`. If you want to use Vertex AI API, set the `vertexai` argument to True, and please set the following environment variables: - ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id> - CLOUD_ML_REGION=<region> (e.g. europe-west1) - GOOGLE_APPLICATION_CREDENTIALS=<path-to-credentials-file> References: - https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude - https://cloud.google.com/docs/authentication/application-default-credentials Args: anthropic_client (Optional): The Anthropic client to use. anthropic_args (Optional): dict of additional args to pass in to the `client.messages.create` function use_async: If True, the async client will be used. Ignored when `anthropic_client` is provided. Defaults to False. vertexai: If True, the Vertex AI client will be used. Ignored when `anthropic_client` is provided. Defaults to False. """ warnings.warn( "AnthropicExtractor will be deprecated in the next release." "Please use LiteLLMExtractor instead." ) if anthropic_client is None: if vertexai: # Vertex AI requires these environment variables for env_var in [ "ANTHROPIC_VERTEX_PROJECT_ID", "CLOUD_ML_REGION", "GOOGLE_APPLICATION_CREDENTIALS", ]: if not os.environ.get(env_var): raise ValueError( f"Environment variable '{env_var}' must be set when using Vertex AI." ) if not os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID"): raise ValueError( "`ANTHROPIC_VERTEX_PROJECT_ID` must be set when using Vertex AI." ) # Warn that `ANTHROPIC_API_KEY` is not used when using Vertex AI if os.environ.get("ANTHROPIC_API_KEY", None): warnings.warn( "`ANTHROPIC_API_KEY` is set when using Vertex AI. " "Vertex AI will take precedence over the API key from " "the environment variable." ) if use_async: self._client = AsyncAnthropicVertex() else: self._client = AnthropicVertex() else: if os.environ.get("ANTHROPIC_API_KEY", None) is None: raise ValueError( "`ANTHROPIC_API_KEY` is not set when using Anthropic API. " "Please set the `ANTHROPIC_API_KEY` environment variable." ) if use_async: self._client = AsyncAnthropic() else: self._client = Anthropic() self._use_async = use_async self._vertexai = vertexai else: self._client = anthropic_client self._use_async = isinstance( anthropic_client, (AsyncAnthropic, AsyncAnthropicVertex) ) self._vertexai = isinstance( anthropic_client, (AnthropicVertex, AsyncAnthropicVertex) ) # Client config will take precedence over the argument, and the # argument will be ignored. if self._vertexai and not vertexai: warnings.warn( "The provided `anthropic_client` is a Vertex AI client, " "so the `vertexai=False` argument will be ignored. The Vertex AI client will be used." ) elif not self._vertexai and vertexai: warnings.warn( "The provided `anthropic_client` is an Anthropic client, " "so the `vertexai=True` argument will be ignored. The Anthropic client will be used." ) if self._use_async and not use_async: warnings.warn( "The provided `anthropic_client` is an async client, " "so the `use_async=False` argument will be ignored. The async client will be used." ) elif not self._use_async and use_async: warnings.warn( "The provided `anthropic_client` is a synchronous client, " "so the `use_async=True` argument will be ignored. The synchronous client will be used." ) self._anthropic_args = anthropic_args or {}
[docs] def get_float_score( self, metric_name: str, language: str, unstructured_assessment_result: list[str | None], score_map: dict[str, float], *, tqdm_description: str | None = None, ) -> ResponsesWithMetadata[float]: """The function that transforms the unstructured assessments (i.e. long texts that describe the evaluation results) into scores. Args: metric_name : The name of the metric to be used. (e.g. "toxicity") language: The language of the prompts. (e.g. "en") unstructured_assessment_result: The unstructured assessment results for the given assessment prompts. score_map: The mapping from the short assessment results (e.g. "Good") to the scores. tqdm_description: The description to be shown in the tqdm bar. Returns: A list of scores for the given prompts. The scores can be None if the evaluation fails. """ if language not in ["en", "ja", "de"]: raise ValueError(f"Unsupported language: {language}") options = list(score_map.keys()) get_score_template = get_template(f"{language}/get_score/plain_text.j2") get_score_prompts = [ get_score_template.render( { "metric": metric_name, "unstructured_assessment": unstructured_assessment, "options": options, } ) if unstructured_assessment else None for unstructured_assessment in unstructured_assessment_result ] config = { # The model names are slightly different for Anthropic API and Vertex AI API # Reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models "model": "claude-3-haiku@20240307" if self._vertexai else "claude-3-haiku-20240307", "max_tokens": 1024, } config.update(self._anthropic_args or {}) tqdm_description = tqdm_description or "Scores (2/2)" responses = _call_api( client=self._client, prompts=get_score_prompts, config=config, use_async=self._use_async, tqdm_description=tqdm_description, ) raw_response_texts = [ response.content[0].text if response else None for response in responses ] def _turn_to_score(response: str | None) -> float | None: if response is None: return None option_found = [option for option in options if option in response] # if response contains multiple options as substrings, return None if len(option_found) != 1: return None return score_map[option_found[0]] # Token usage is not supported in AnthropicExtractor # If you need token usage, please use LiteLLMExtractor instead. return ResponsesWithMetadata( [_turn_to_score(response) for response in raw_response_texts], None, )
def _call_api( client: Anthropic | AsyncAnthropic | AnthropicVertex | AsyncAnthropicVertex, prompts: list[str] | list[str | None], config: dict[str, Any], *, use_async: bool = False, system_prompt: str | None = None, tqdm_description: str | None = None, ) -> list[Any]: """A helper function to call the Anthropic API.""" # A helper function to call the API with exception filter for alignment # of exception handling with the async version. def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any: if model_input is None: return None try: return client.messages.create(**model_input) except Exception as e: return e if system_prompt: config["system"] = system_prompt model_inputs = [ { "messages": [{"role": "user", "content": prompt}], **config, } for prompt in prompts ] if use_async: # A helper function to call the async API. async def _call_async_api() -> list[Any]: responses = await asyncio.gather( *map( lambda model_input: client.messages.create(**model_input), model_inputs, ), return_exceptions=True, ) return responses responses = asyncio.run(_call_async_api()) else: responses = [ _call_api_with_exception_filter(model_input) for model_input in tqdm_wrapper(model_inputs, desc=tqdm_description) ] # Filter out exceptions and print them out. for i, response in enumerate(responses): if not isinstance(response, Exception): continue print( "Anthropic failed to return an assessment corresponding to " f"{i}th prompt: {response}" ) responses[i] = None return responses