|
使用内置和自定义评估器评估生成式 AI 应用程序性能。
bash
pip install azure-ai-evaluation
bash
python
from azure.ai.evaluation import (
GroundednessEvaluator,
RelevanceEvaluator,
CoherenceEvaluator,
FluencyEvaluator,
SimilarityEvaluator,
RetrievalEvaluator
)
groundedness = GroundednessEvaluator(model_config)
relevance = RelevanceEvaluator(model_config)
coherence = CoherenceEvaluator(model_config)
python
from azure.ai.evaluation import (
F1ScoreEvaluator,
RougeScoreEvaluator,
BleuScoreEvaluator,
GleuScoreEvaluator,
MeteorScoreEvaluator
)
f1 = F1ScoreEvaluator()
rouge = RougeScoreEvaluator()
bleu = BleuScoreEvaluator()
python
from azure.ai.evaluation import (
ViolenceEvaluator,
SexualEvaluator,
SelfHarmEvaluator,
HateUnfairnessEvaluator,
IndirectAttackEvaluator,
ProtectedMaterialEvaluator
)
violence = ViolenceEvaluator(azureaiproject=project_scope)
sexual = SexualEvaluator(azureaiproject=project_scope)
python
from azure.ai.evaluation import GroundednessEvaluator
groundedness = GroundednessEvaluator(model_config)
result = groundedness(
query=什么是 Azure AI?,
context=Azure AI 是微软的 AI 平台...,
response=Azure AI 提供 AI 服务和工具。
)
print(f基础性得分:{result[groundedness]})
print(f原因:{result[groundedness_reason]})
python
from azure.ai.evaluation import evaluate
result = evaluate(
data=test_data.jsonl,
evaluators={
groundedness: groundedness,
relevance: relevance,
coherence: coherence
},
evaluator_config={
default: {
column_mapping: {
query: ${data.query},
context: ${data.context},
response: ${data.response}
}
}
}
)
print(result[metrics])
python
from azure.ai.evaluation import QAEvaluator, ContentSafetyEvaluator
result = evaluate(
data=data.jsonl,
evaluators={
qa: qa_evaluator,
contentsafety: safetyevaluator
}
)
python
from azure.ai.evaluation import evaluate
from myapp import chatapp # 您的应用程序
result = evaluate(
data=queries.jsonl,
target=chat_app, # 可调用对象,接收查询,返回响应
evaluators={
groundedness: groundedness
},
evaluator_config={
default: {
column_mapping: {
query: ${data.query},
context: ${outputs.context},
response: ${outputs.response}
}
}
}
)
python
from azure.ai.evaluation import evaluator
@evaluator
def wordcountevaluator(response: str) -> dict:
return {word_count: len(response.split())}
python
from azure.ai.evaluation import PromptChatTarget
class CustomEvaluator:
def init(self, model_config):
self.model = PromptChatTarget(model_config)
def call(self, query: str, response: str) -> dict:
prompt = f评分此响应 1-5:查询:{query},响应:{response}
result = self.model.send_prompt(prompt)
return {custom_score: int(result)}
python
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
project = AIProjectClient.fromconnectionstring(
connstr=os.environ[AIPROJECTCONNECTION_STRING],
credential=DefaultAzureCredential()
)
result = evaluate(
data=data.jsonl,
evaluators={groundedness: groundedness},
azureaiproject=project.scope # 将结果记录到 Foundry
)
print(f查看结果:{result[studio_url]})
| 评估器 | 类型 | 指标 |
|---|---|---|
| GroundednessEvaluator | AI | groundedness(1-5) |
| RelevanceEvaluator |
| 文件 | 内容 |
|---|---|
| references/built-in-evaluators.md | AI 辅助、基于 NLP 和安全评估器的详细模式及配置表 |
| references/custom-evaluators.md |
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 azure-ai-evaluation-py-1776376221 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 azure-ai-evaluation-py-1776376221 技能
skillhub install azure-ai-evaluation-py-1776376221
文件大小: 12.82 KB | 发布时间: 2026-4-17 13:55