Source code for langcheck.metrics.eval_clients._anthropic
from __future__ import annotations
import asyncio
import os
import warnings
from typing import Any
from anthropic import (
Anthropic,
AnthropicVertex,
AsyncAnthropic,
AsyncAnthropicVertex,
)
from langcheck.metrics.eval_clients.eval_response import (
ResponsesWithMetadata,
)
from langcheck.utils.progress_bar import tqdm_wrapper
from ..prompts._utils import get_template
from ._base import EvalClient
from .extractor import Extractor
[docs]
class AnthropicEvalClient(EvalClient):
"""EvalClient defined for Anthropic API."""
def __init__(
self,
anthropic_client: Anthropic
| AsyncAnthropic
| AnthropicVertex
| AsyncAnthropicVertex
| None = None,
anthropic_args: dict[str, Any] | None = None,
*,
use_async: bool = False,
vertexai: bool = False,
system_prompt: str | None = None,
extractor: Extractor | None = None,
):
"""
Initialize the Anthropic evaluation client. The authentication
information is automatically read from the environment variables.
If you want to use Anthropic API, please set `ANTHROPIC_API_KEY`.
If you want to use Vertex AI API, set the `vertexai` argument to True,
and please set the following environment variables:
- ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id>
- CLOUD_ML_REGION=<region> (e.g. europe-west1)
- GOOGLE_APPLICATION_CREDENTIALS=<path-to-credentials-file>
References:
- https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
- https://cloud.google.com/docs/authentication/application-default-credentials
Args:
anthropic_client (Optional): The Anthropic client to use.
anthropic_args (Optional): dict of additional args to pass in to
the `client.messages.create` function
use_async: If True, the async client will be used. Ignored when
`anthropic_client` is provided. Defaults to False.
vertexai: If True, the Vertex AI client will be used. Ignored when
`anthropic_client` is provided. Defaults to False.
system_prompt (Optional): The system prompt to use. If not provided,
no system prompt will be used.
extractor (Optional): The extractor to use. If not provided, the
default extractor will be used.
"""
warnings.warn(
"AnthropicEvalClient will be deprecated in the next release."
"Please use LiteLLMEvalClient instead."
)
if anthropic_client is None:
if vertexai:
# Vertex AI requires these environment variables
for env_var in [
"ANTHROPIC_VERTEX_PROJECT_ID",
"CLOUD_ML_REGION",
"GOOGLE_APPLICATION_CREDENTIALS",
]:
if not os.environ.get(env_var):
raise ValueError(
f"Environment variable '{env_var}' must be set when using Vertex AI."
)
if not os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID"):
raise ValueError(
"`ANTHROPIC_VERTEX_PROJECT_ID` must be set when using Vertex AI."
)
# Warn that `ANTHROPIC_API_KEY` is not used when using Vertex AI
if os.environ.get("ANTHROPIC_API_KEY", None):
warnings.warn(
"`ANTHROPIC_API_KEY` is set when using Vertex AI. "
"Vertex AI will take precedence over the API key from "
"the environment variable."
)
if use_async:
self._client = AsyncAnthropicVertex()
else:
self._client = AnthropicVertex()
else:
if os.environ.get("ANTHROPIC_API_KEY", None) is None:
raise ValueError(
"`ANTHROPIC_API_KEY` is not set when using Anthropic API. "
"Please set the `ANTHROPIC_API_KEY` environment variable."
)
if use_async:
self._client = AsyncAnthropic()
else:
self._client = Anthropic()
self._vertexai = vertexai
self._use_async = use_async
else:
self._client = anthropic_client
self._vertexai = isinstance(
anthropic_client, (AnthropicVertex, AsyncAnthropicVertex)
)
self._use_async = isinstance(
anthropic_client, (AsyncAnthropic, AsyncAnthropicVertex)
)
# Client config will take precedence over the argument, and the
# argument will be ignored.
if self._vertexai and not vertexai:
warnings.warn(
"The provided `anthropic_client` is a Vertex AI client, "
"so the `vertexai=False` argument will be ignored. The Vertex AI client will be used."
)
elif not self._vertexai and vertexai:
warnings.warn(
"The provided `anthropic_client` is an Anthropic client, "
"so the `vertexai=True` argument will be ignored. The Anthropic client will be used."
)
if self._use_async and not use_async:
warnings.warn(
"The provided `anthropic_client` is an async client, "
"so the `use_async=False` argument will be ignored. The async client will be used."
)
elif not self._use_async and use_async:
warnings.warn(
"The provided `anthropic_client` is a synchronous client, "
"so the `use_async=True` argument will be ignored. The synchronous client will be used."
)
self._anthropic_args = anthropic_args or {}
self._system_prompt = system_prompt
if system_prompt and "system" in self._anthropic_args:
warnings.warn(
'"system" of anthropic_args will be ignored because '
"system_prompt is provided."
)
if extractor is None:
self._extractor = AnthropicExtractor(
anthropic_client=self._client,
use_async=self._use_async,
vertexai=self._vertexai,
)
else:
self._extractor = extractor
[docs]
def get_text_responses(
self,
prompts: list[str],
*,
tqdm_description: str | None = None,
) -> ResponsesWithMetadata[str]:
"""The function that gets responses to the given prompt texts.
We use Anthropic's 'claude-3-haiku-20240307' model by default, but you
can configure it by passing the 'model' parameter in the anthropic_args.
Args:
prompts: The prompts you want to get the responses for.
Returns:
A list of responses to the prompts. The responses can be None if the
evaluation fails.
"""
config = {
# The model names are slightly different for Anthropic API and Vertex AI API
# Reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models
"model": "claude-3-haiku@20240307"
if self._vertexai
else "claude-3-haiku-20240307",
"max_tokens": 4096,
"temperature": 0.0,
}
config.update(self._anthropic_args or {})
tqdm_description = tqdm_description or "Intermediate assessments (1/2)"
responses = _call_api(
client=self._client,
prompts=prompts,
config=config,
use_async=self._use_async,
tqdm_description=tqdm_description,
system_prompt=self._system_prompt,
)
response_texts = [
response.content[0].text if response else None
for response in responses
]
# Token usage is not supported in AnthropicEvalClient
# If you need token usage, please use LiteLLMEvalClient instead.
return ResponsesWithMetadata(response_texts, None)
[docs]
def similarity_scorer(self):
raise NotImplementedError(
"Embedding-based metrics are not supported in AnthropicEvalClient."
"Use other EvalClients to get these metrics."
)
[docs]
class AnthropicExtractor(Extractor):
"""Score extractor for Anthropic API."""
def __init__(
self,
anthropic_client: Anthropic
| AsyncAnthropic
| AnthropicVertex
| AsyncAnthropicVertex
| None = None,
anthropic_args: dict[str, Any] | None = None,
*,
use_async: bool = False,
vertexai: bool = False,
):
"""
Initialize the Anthropic score extractor. The authentication information
is automatically read from the environment variables.
If you want to use Anthropic API, please set `ANTHROPIC_API_KEY`.
If you want to use Vertex AI API, set the `vertexai` argument to True,
and please set the following environment variables:
- ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id>
- CLOUD_ML_REGION=<region> (e.g. europe-west1)
- GOOGLE_APPLICATION_CREDENTIALS=<path-to-credentials-file>
References:
- https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
- https://cloud.google.com/docs/authentication/application-default-credentials
Args:
anthropic_client (Optional): The Anthropic client to use.
anthropic_args (Optional): dict of additional args to pass in to
the `client.messages.create` function
use_async: If True, the async client will be used. Ignored when
`anthropic_client` is provided. Defaults to False.
vertexai: If True, the Vertex AI client will be used. Ignored when
`anthropic_client` is provided. Defaults to False.
"""
warnings.warn(
"AnthropicExtractor will be deprecated in the next release."
"Please use LiteLLMExtractor instead."
)
if anthropic_client is None:
if vertexai:
# Vertex AI requires these environment variables
for env_var in [
"ANTHROPIC_VERTEX_PROJECT_ID",
"CLOUD_ML_REGION",
"GOOGLE_APPLICATION_CREDENTIALS",
]:
if not os.environ.get(env_var):
raise ValueError(
f"Environment variable '{env_var}' must be set when using Vertex AI."
)
if not os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID"):
raise ValueError(
"`ANTHROPIC_VERTEX_PROJECT_ID` must be set when using Vertex AI."
)
# Warn that `ANTHROPIC_API_KEY` is not used when using Vertex AI
if os.environ.get("ANTHROPIC_API_KEY", None):
warnings.warn(
"`ANTHROPIC_API_KEY` is set when using Vertex AI. "
"Vertex AI will take precedence over the API key from "
"the environment variable."
)
if use_async:
self._client = AsyncAnthropicVertex()
else:
self._client = AnthropicVertex()
else:
if os.environ.get("ANTHROPIC_API_KEY", None) is None:
raise ValueError(
"`ANTHROPIC_API_KEY` is not set when using Anthropic API. "
"Please set the `ANTHROPIC_API_KEY` environment variable."
)
if use_async:
self._client = AsyncAnthropic()
else:
self._client = Anthropic()
self._use_async = use_async
self._vertexai = vertexai
else:
self._client = anthropic_client
self._use_async = isinstance(
anthropic_client, (AsyncAnthropic, AsyncAnthropicVertex)
)
self._vertexai = isinstance(
anthropic_client, (AnthropicVertex, AsyncAnthropicVertex)
)
# Client config will take precedence over the argument, and the
# argument will be ignored.
if self._vertexai and not vertexai:
warnings.warn(
"The provided `anthropic_client` is a Vertex AI client, "
"so the `vertexai=False` argument will be ignored. The Vertex AI client will be used."
)
elif not self._vertexai and vertexai:
warnings.warn(
"The provided `anthropic_client` is an Anthropic client, "
"so the `vertexai=True` argument will be ignored. The Anthropic client will be used."
)
if self._use_async and not use_async:
warnings.warn(
"The provided `anthropic_client` is an async client, "
"so the `use_async=False` argument will be ignored. The async client will be used."
)
elif not self._use_async and use_async:
warnings.warn(
"The provided `anthropic_client` is a synchronous client, "
"so the `use_async=True` argument will be ignored. The synchronous client will be used."
)
self._anthropic_args = anthropic_args or {}
[docs]
def get_float_score(
self,
metric_name: str,
language: str,
unstructured_assessment_result: list[str | None],
score_map: dict[str, float],
*,
tqdm_description: str | None = None,
) -> ResponsesWithMetadata[float]:
"""The function that transforms the unstructured assessments (i.e. long
texts that describe the evaluation results) into scores.
Args:
metric_name : The name of the metric to be used. (e.g. "toxicity")
language: The language of the prompts. (e.g. "en")
unstructured_assessment_result: The unstructured assessment results
for the given assessment prompts.
score_map: The mapping from the short assessment results
(e.g. "Good") to the scores.
tqdm_description: The description to be shown in the tqdm bar.
Returns:
A list of scores for the given prompts. The scores can be None if
the evaluation fails.
"""
if language not in ["en", "ja", "de"]:
raise ValueError(f"Unsupported language: {language}")
options = list(score_map.keys())
get_score_template = get_template(f"{language}/get_score/plain_text.j2")
get_score_prompts = [
get_score_template.render(
{
"metric": metric_name,
"unstructured_assessment": unstructured_assessment,
"options": options,
}
)
if unstructured_assessment
else None
for unstructured_assessment in unstructured_assessment_result
]
config = {
# The model names are slightly different for Anthropic API and Vertex AI API
# Reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models
"model": "claude-3-haiku@20240307"
if self._vertexai
else "claude-3-haiku-20240307",
"max_tokens": 1024,
}
config.update(self._anthropic_args or {})
tqdm_description = tqdm_description or "Scores (2/2)"
responses = _call_api(
client=self._client,
prompts=get_score_prompts,
config=config,
use_async=self._use_async,
tqdm_description=tqdm_description,
)
raw_response_texts = [
response.content[0].text if response else None
for response in responses
]
def _turn_to_score(response: str | None) -> float | None:
if response is None:
return None
option_found = [option for option in options if option in response]
# if response contains multiple options as substrings, return None
if len(option_found) != 1:
return None
return score_map[option_found[0]]
# Token usage is not supported in AnthropicExtractor
# If you need token usage, please use LiteLLMExtractor instead.
return ResponsesWithMetadata(
[_turn_to_score(response) for response in raw_response_texts],
None,
)
def _call_api(
client: Anthropic | AsyncAnthropic | AnthropicVertex | AsyncAnthropicVertex,
prompts: list[str] | list[str | None],
config: dict[str, Any],
*,
use_async: bool = False,
system_prompt: str | None = None,
tqdm_description: str | None = None,
) -> list[Any]:
"""A helper function to call the Anthropic API."""
# A helper function to call the API with exception filter for alignment
# of exception handling with the async version.
def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any:
if model_input is None:
return None
try:
return client.messages.create(**model_input)
except Exception as e:
return e
if system_prompt:
config["system"] = system_prompt
model_inputs = [
{
"messages": [{"role": "user", "content": prompt}],
**config,
}
for prompt in prompts
]
if use_async:
# A helper function to call the async API.
async def _call_async_api() -> list[Any]:
responses = await asyncio.gather(
*map(
lambda model_input: client.messages.create(**model_input),
model_inputs,
),
return_exceptions=True,
)
return responses
responses = asyncio.run(_call_async_api())
else:
responses = [
_call_api_with_exception_filter(model_input)
for model_input in tqdm_wrapper(model_inputs, desc=tqdm_description)
]
# Filter out exceptions and print them out.
for i, response in enumerate(responses):
if not isinstance(response, Exception):
continue
print(
"Anthropic failed to return an assessment corresponding to "
f"{i}th prompt: {response}"
)
responses[i] = None
return responses