Skip to content

Commit

Permalink
fix context coverage metric input (#33)
Browse files Browse the repository at this point in the history
* fix context coverage metric input
  • Loading branch information
yisz committed Feb 17, 2024
1 parent 696f8d0 commit 4839e9e
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 31 deletions.
60 changes: 34 additions & 26 deletions continuous_eval/metrics/retrieval_LLM_based_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, model: Optional[LLMInterface] = None, use_few_shot: bool = Tr
def __str__(self):
return f"LLMBasedContextCoverage(model={self.model}, use_few_shot={self.use_few_shot})"

def calculate(self, question, retrieved_contexts, answer, **kwargs):
def calculate(self, question, retrieved_contexts, ground_truths, **kwargs):
"""
Calculate the context coverage score for the given datapoint.
"""
Expand Down Expand Up @@ -113,31 +113,39 @@ def calculate(self, question, retrieved_contexts, answer, **kwargs):
else ""
)

prompt = {
"system_prompt": (
"""
Given a question, context, and answer, analyze each statement in the answer and classify if the statement can be attributed to the given context or not. Output JSON strictly in the following format.
"""
+ few_shot_prompt
),
"user_prompt": ("question: " + question + "\ncontext: " + context + "\nanswer: " + answer),
}

content = self._llm.run(prompt)

try:
coverage = self.extract_attributed_from_broken_json(content)
except Exception as e:
print(f"{type(e).__name__} Error: {content}, skipping")
return {
"LLM_based_context_coverage": None,
"LLM_based_context_statements": content,
scores = []
for gt in ground_truths:
prompt = {
"system_prompt": (
"""
Given a question, context, and answer, analyze each statement in the answer and classify if the statement can be attributed to the given context or not. Output JSON strictly in the following format.
"""
+ few_shot_prompt
),
"user_prompt": ("question: " + question + "\ncontext: " + context + "\nanswer: " + gt),
}

return {
"LLM_based_context_coverage": coverage,
"LLM_based_context_statements": content,
}
content = self._llm.run(prompt)

try:
coverage = self.extract_attributed_from_broken_json(content)
except Exception as e:
print(f"{type(e).__name__} Error: {content}, skipping")
scores.append(
{
"LLM_based_context_coverage": -1.0,
"LLM_based_context_statements": content,
}
)
else:
scores.append(
{
"LLM_based_context_coverage": coverage,
"LLM_based_context_statements": content,
}
)

return max(scores, key=lambda x: x["LLM_based_context_coverage"])

@staticmethod
def extract_attributed_from_broken_json(statements):
Expand All @@ -147,6 +155,6 @@ def extract_attributed_from_broken_json(statements):
attributed_numbers = [int(num) for group in attributed_numbers for num in group if num]
except Exception as e:
print(f"{type(e).__name__} Error: {attributed_numbers}, skipping")
return None
coverage = sum(attributed_numbers) / len(attributed_numbers) if attributed_numbers else None
return -1.0
coverage = sum(attributed_numbers) / len(attributed_numbers) if attributed_numbers else -1.0
return coverage
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ $$
}
$$

This metric requires the LLM evaluator to output correct and complex JSON. If the JSON cannot be parsed, the score returns -1.0.


### Example Usage

Expand All @@ -41,9 +43,9 @@ print(metric.calculate(**datum))
### Sample Output

```JSON
{
'LLM_based_context_coverage': 0.5,
'LLM_based_context_statements':
{
'LLM_based_context_coverage': 0.5,
'LLM_based_context_statements':
{
"classification": [
{
Expand All @@ -52,8 +54,8 @@ print(metric.calculate(**datum))
"Attributed": 1
},
{
"statement_2": "Lyon is the second largest city in France.",
"reason": "The context does not provide information about the ranking of Lyon in terms of size compared to other French cities.",
"statement_2": "Marseille is the second largest city in France.",
"reason": "This information is not provided in the context, which only mentions Paris and Lyon.",
"Attributed": 0
}
]
Expand Down

0 comments on commit 4839e9e

Please sign in to comment.