Source code for langcheck.augment.en._rephrase

from __future__ import annotations

from langcheck.metrics.eval_clients import (
    EvalClient,
)


[docs] def rephrase( instances: list[str] | str, *, num_perturbations: int = 1, eval_client: EvalClient, eval_prompt_version: str = "v2", ) -> list[str | None]: """Rephrases each string in instances (usually a list of prompts) without changing their meaning. We use a modified version of the prompt presented in `"Rethinking Benchmark and Contamination for Language Models with Rephrased Samples" <https://arxiv.org/abs/2311.04850>`__ to make an LLM rephrase the given text. Args: instances: A single string or a list of strings to be augmented. num_perturbations: The number of perturbed instances to generate for each string in instances eval_model: The type of model to use. eval_prompt_version: The version of the eval prompt to use when the EvalClient is used. The default version is 'v2' (latest). Returns: A list of rephrased instances. """ if eval_prompt_version not in ["v1", "v2"]: raise ValueError( f"Invalid eval_prompt_version: {eval_prompt_version}. The valid versions are 'v1' and 'v2'." ) prompt_template = eval_client.load_prompt_template( language="en", metric_name="rephrase", eval_prompt_version=eval_prompt_version, ) instances = [instances] if isinstance(instances, str) else instances prompt_template_inputs = [{"instance": instance} for instance in instances] return eval_client.repeat_requests_from_template( prompt_template_inputs=prompt_template_inputs, template=prompt_template, num_perturbations=num_perturbations, )