2025-04-21 18:19:09 +08:00

636 lines
19 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package report
import (
"fmt"
"os"
"path/filepath"
"sort"
"time"
"github.com/acmestudio/llm-api-benchmark-tool/benchmark"
"github.com/acmestudio/llm-api-benchmark-tool/config"
"github.com/acmestudio/llm-api-benchmark-tool/logger"
"github.com/go-echarts/go-echarts/v2/charts"
"github.com/go-echarts/go-echarts/v2/components"
"github.com/go-echarts/go-echarts/v2/opts"
"github.com/montanaflynn/stats"
)
// Metrics 表示性能指标
type Metrics struct {
TotalRequests int // 总请求数
SuccessfulRequests int // 成功请求数
FailedRequests int // 失败请求数
TimeoutRatio float64 // 超时比率
AvgResponseTime time.Duration // 平均响应时间
P90ResponseTime time.Duration // P90响应时间
P95ResponseTime time.Duration // P95响应时间
P99ResponseTime time.Duration // P99响应时间
MaxResponseTime time.Duration // 最大响应时间
MinResponseTime time.Duration // 最小响应时间
AvgQPS float64 // 平均QPS
MaxQPS float64 // 最大QPS
AvgTokenRate float64 // 平均Token生成速率
MaxTokenRate float64 // 最大Token生成速率
MaxConcurrency int // 最大有效并发用户数
}
// GenerateReport 生成性能报告
func GenerateReport(results *benchmark.BenchmarkResults, cfg *config.Config, outputPath string) error {
// 创建输出目录
logger.Info("创建报告输出目录: %s", outputPath)
if err := os.MkdirAll(outputPath, 0755); err != nil {
return fmt.Errorf("创建输出目录失败: %w", err)
}
// 检查结果是否为空
if len(results.Results) == 0 {
logger.Warn("没有测试结果数据,无法生成详细报告")
} else {
logger.Debug("共有 %d 条测试结果记录", len(results.Results))
}
// 计算性能指标
logger.Info("计算性能指标...")
metrics := calculateMetrics(results)
// 输出主要性能指标
logger.Info("性能指标摘要:")
logger.Info(" 总请求数: %d (成功: %d, 失败: %d)",
metrics.TotalRequests, metrics.SuccessfulRequests, metrics.FailedRequests)
logger.Info(" 超时比率: %.2f%%", metrics.TimeoutRatio * 100)
logger.Info(" 平均响应时间: %v", metrics.AvgResponseTime)
logger.Info(" P90响应时间: %v", metrics.P90ResponseTime)
logger.Info(" P95响应时间: %v", metrics.P95ResponseTime)
logger.Info(" P99响应时间: %v", metrics.P99ResponseTime)
logger.Info(" 平均QPS: %.2f", metrics.AvgQPS)
logger.Info(" 最大QPS: %.2f", metrics.MaxQPS)
logger.Info(" 平均Token生成速率: %.2f tokens/s", metrics.AvgTokenRate)
logger.Info(" 最大Token生成速率: %.2f tokens/s", metrics.MaxTokenRate)
logger.Info(" 最大有效并发用户数: %d", metrics.MaxConcurrency)
// 生成HTML报告
logger.Info("生成HTML报告...")
if err := generateHTMLReport(results, metrics, cfg, outputPath); err != nil {
return fmt.Errorf("生成HTML报告失败: %w", err)
}
// 生成CSV数据
logger.Info("生成CSV数据...")
if err := generateCSVData(results, outputPath); err != nil {
return fmt.Errorf("生成CSV数据失败: %w", err)
}
logger.Info("报告生成完成")
return nil
}
// calculateMetrics 计算性能指标
func calculateMetrics(results *benchmark.BenchmarkResults) *Metrics {
metrics := &Metrics{}
// 总请求数
metrics.TotalRequests = len(results.Results)
logger.Debug("计算指标 - 总请求数: %d", metrics.TotalRequests)
// 统计成功和失败请求
var successfulRequests []benchmark.Result
for _, result := range results.Results {
if result.Error == nil {
successfulRequests = append(successfulRequests, result)
}
}
metrics.SuccessfulRequests = len(successfulRequests)
metrics.FailedRequests = metrics.TotalRequests - metrics.SuccessfulRequests
logger.Debug("计算指标 - 成功请求: %d, 失败请求: %d",
metrics.SuccessfulRequests, metrics.FailedRequests)
// 计算超时比率
metrics.TimeoutRatio = float64(metrics.FailedRequests) / float64(metrics.TotalRequests)
logger.Debug("计算指标 - 超时比率: %.2f%%", metrics.TimeoutRatio * 100)
// 如果没有成功的请求,返回
if len(successfulRequests) == 0 {
logger.Warn("没有成功的请求,无法计算详细指标")
return metrics
}
// 计算响应时间指标
var responseTimes []float64
for _, result := range successfulRequests {
responseTimes = append(responseTimes, float64(result.ResponseTime.Milliseconds()))
}
// 排序响应时间
sort.Float64s(responseTimes)
// 计算响应时间百分位数
p90, _ := stats.Percentile(responseTimes, 90)
p95, _ := stats.Percentile(responseTimes, 95)
p99, _ := stats.Percentile(responseTimes, 99)
avg, _ := stats.Mean(responseTimes)
max := responseTimes[len(responseTimes)-1]
min := responseTimes[0]
metrics.AvgResponseTime = time.Duration(avg) * time.Millisecond
metrics.P90ResponseTime = time.Duration(p90) * time.Millisecond
metrics.P95ResponseTime = time.Duration(p95) * time.Millisecond
metrics.P99ResponseTime = time.Duration(p99) * time.Millisecond
metrics.MaxResponseTime = time.Duration(max) * time.Millisecond
metrics.MinResponseTime = time.Duration(min) * time.Millisecond
logger.Debug("计算指标 - 响应时间(ms) - 平均: %.2f, P90: %.2f, P95: %.2f, P99: %.2f, 最小: %.2f, 最大: %.2f",
avg, p90, p95, p99, min, max)
// 计算QPS
totalDuration := results.EndTime.Sub(results.StartTime).Seconds()
metrics.AvgQPS = float64(metrics.SuccessfulRequests) / totalDuration
logger.Debug("计算指标 - 平均QPS: %.2f (总持续时间: %.2f秒)", metrics.AvgQPS, totalDuration)
// 计算每个并发步骤的QPS
logger.Debug("计算各并发步骤的QPS...")
for concurrency, stepResults := range results.ConcurrencyData {
// 找出该步骤的第一个和最后一个请求
if len(stepResults) == 0 {
logger.Debug(" 并发步骤 %d: 没有请求数据", concurrency)
continue
}
// 排序结果,按时间戳
sort.Slice(stepResults, func(i, j int) bool {
return stepResults[i].Timestamp.Before(stepResults[j].Timestamp)
})
firstRequest := stepResults[0]
lastRequest := stepResults[len(stepResults)-1]
// 计算持续时间
stepDuration := lastRequest.Timestamp.Sub(firstRequest.Timestamp).Seconds()
if stepDuration <= 0 {
logger.Debug(" 并发步骤 %d: 持续时间过短", concurrency)
continue
}
// 计算成功请求数
var successCount int
for _, result := range stepResults {
if result.Error == nil {
successCount++
}
}
// 计算QPS
stepQPS := float64(successCount) / stepDuration
logger.Debug(" 并发步骤 %d: QPS = %.2f (成功请求: %d, 持续时间: %.2f秒)",
concurrency, stepQPS, successCount, stepDuration)
if stepQPS > metrics.MaxQPS {
metrics.MaxQPS = stepQPS
logger.Debug(" 更新最大QPS: %.2f (并发步骤: %d)", metrics.MaxQPS, concurrency)
}
// 更新最大并发用户数
if concurrency > metrics.MaxConcurrency && stepQPS > 0 {
metrics.MaxConcurrency = concurrency
logger.Debug(" 更新最大有效并发用户数: %d", metrics.MaxConcurrency)
}
}
// 计算Token生成速率
var totalTokens int
for _, result := range successfulRequests {
totalTokens += result.CompletionTokens
}
metrics.AvgTokenRate = float64(totalTokens) / totalDuration
logger.Debug("计算指标 - 平均Token生成速率: %.2f tokens/s (总Token: %d)",
metrics.AvgTokenRate, totalTokens)
// 计算每个并发步骤的Token生成速率
logger.Debug("计算各并发步骤的Token生成速率...")
for concurrency, stepResults := range results.ConcurrencyData {
// 找出该步骤的第一个和最后一个请求
if len(stepResults) == 0 {
continue
}
// 排序结果,按时间戳
sort.Slice(stepResults, func(i, j int) bool {
return stepResults[i].Timestamp.Before(stepResults[j].Timestamp)
})
firstRequest := stepResults[0]
lastRequest := stepResults[len(stepResults)-1]
// 计算持续时间
stepDuration := lastRequest.Timestamp.Sub(firstRequest.Timestamp).Seconds()
if stepDuration <= 0 {
continue
}
// 计算总Token数
var stepTokens int
for _, result := range stepResults {
if result.Error == nil {
stepTokens += result.CompletionTokens
}
}
// 计算Token生成速率
stepTokenRate := float64(stepTokens) / stepDuration
logger.Debug(" 并发步骤 %d: Token生成速率 = %.2f tokens/s (总Token: %d, 持续时间: %.2f秒)",
concurrency, stepTokenRate, stepTokens, stepDuration)
if stepTokenRate > metrics.MaxTokenRate {
metrics.MaxTokenRate = stepTokenRate
logger.Debug(" 更新最大Token生成速率: %.2f tokens/s (并发步骤: %d)",
metrics.MaxTokenRate, concurrency)
}
}
return metrics
}
// generateHTMLReport 生成HTML报告
func generateHTMLReport(results *benchmark.BenchmarkResults, metrics *Metrics, cfg *config.Config, outputPath string) error {
// 创建页面
logger.Debug("创建HTML报告页面...")
page := components.NewPage()
page.PageTitle = "LLM API 基准测试报告"
page.Theme = "white"
// 添加概述
logger.Debug("添加概述图表...")
page.AddCharts(createOverviewChart(results, metrics, cfg))
// 添加响应时间分布图
logger.Debug("添加响应时间分布图...")
page.AddCharts(createResponseTimeDistributionChart(results))
// 添加QPS随时间变化图
logger.Debug("添加QPS随时间变化图...")
page.AddCharts(createQPSOverTimeChart(results))
// 添加Token生成速率随时间变化图
logger.Debug("添加Token生成速率随时间变化图...")
page.AddCharts(createTokenRateOverTimeChart(results))
// 添加并发与响应时间关系图
logger.Debug("添加并发与响应时间关系图...")
page.AddCharts(createConcurrencyVsResponseTimeChart(results))
// 保存HTML报告
htmlPath := filepath.Join(outputPath, "report.html")
logger.Debug("保存HTML报告: %s", htmlPath)
f, err := os.Create(htmlPath)
if err != nil {
return err
}
defer f.Close()
return page.Render(f)
}
// createOverviewChart 创建概述图表
func createOverviewChart(results *benchmark.BenchmarkResults, metrics *Metrics, cfg *config.Config) *charts.Bar {
// 创建图表
chart := charts.NewBar()
chart.SetGlobalOptions(
charts.WithTitleOpts(opts.Title{
Title: "性能指标概述",
}),
)
// 添加数据
chart.SetXAxis([]string{"平均响应时间 (ms)", "P90响应时间 (ms)", "P95响应时间 (ms)", "P99响应时间 (ms)"})
chart.AddSeries("响应时间", []opts.BarData{
{Value: metrics.AvgResponseTime.Milliseconds()},
{Value: metrics.P90ResponseTime.Milliseconds()},
{Value: metrics.P95ResponseTime.Milliseconds()},
{Value: metrics.P99ResponseTime.Milliseconds()},
})
return chart
}
// createResponseTimeDistributionChart 创建响应时间分布图
func createResponseTimeDistributionChart(results *benchmark.BenchmarkResults) *charts.Bar {
// 创建图表
chart := charts.NewBar()
chart.SetGlobalOptions(
charts.WithTitleOpts(opts.Title{
Title: "响应时间分布",
}),
)
// 统计响应时间分布
timeRanges := []int{100, 200, 500, 1000, 2000, 5000, 10000}
counts := make([]int, len(timeRanges)+1)
for _, result := range results.Results {
if result.Error != nil {
continue
}
responseTime := result.ResponseTime.Milliseconds()
found := false
for i, timeRange := range timeRanges {
if responseTime <= int64(timeRange) {
counts[i]++
found = true
break
}
}
if !found {
counts[len(timeRanges)]++
}
}
// 构建X轴标签
xAxisLabels := make([]string, len(timeRanges)+1)
for i, timeRange := range timeRanges {
if i == 0 {
xAxisLabels[i] = fmt.Sprintf("≤%dms", timeRange)
} else {
xAxisLabels[i] = fmt.Sprintf("%d-%dms", timeRanges[i-1], timeRange)
}
}
xAxisLabels[len(timeRanges)] = fmt.Sprintf(">%dms", timeRanges[len(timeRanges)-1])
// 添加数据
chart.SetXAxis(xAxisLabels)
barData := make([]opts.BarData, len(counts))
for i, count := range counts {
barData[i] = opts.BarData{Value: count}
}
chart.AddSeries("请求数", barData)
return chart
}
// createQPSOverTimeChart 创建QPS随时间变化图
func createQPSOverTimeChart(results *benchmark.BenchmarkResults) *charts.Line {
// 创建图表
chart := charts.NewLine()
chart.SetGlobalOptions(
charts.WithTitleOpts(opts.Title{
Title: "QPS随时间变化",
}),
)
// 按并发步骤分组
for concurrency, stepResults := range results.ConcurrencyData {
// 按时间窗口统计QPS
windowSize := 5 * time.Second
// 找出该步骤的第一个和最后一个请求
if len(stepResults) == 0 {
continue
}
// 排序结果,按时间戳
sort.Slice(stepResults, func(i, j int) bool {
return stepResults[i].Timestamp.Before(stepResults[j].Timestamp)
})
firstRequest := stepResults[0]
lastRequest := stepResults[len(stepResults)-1]
// 计算时间窗口数
duration := lastRequest.Timestamp.Sub(firstRequest.Timestamp)
windowCount := int(duration / windowSize) + 1
// 初始化时间窗口
windows := make([]time.Time, windowCount)
counts := make([]int, windowCount)
for i := 0; i < windowCount; i++ {
windows[i] = firstRequest.Timestamp.Add(time.Duration(i) * windowSize)
}
// 统计每个时间窗口的请求数
for _, result := range stepResults {
if result.Error != nil {
continue
}
windowIndex := int(result.Timestamp.Sub(firstRequest.Timestamp) / windowSize)
if windowIndex >= 0 && windowIndex < windowCount {
counts[windowIndex]++
}
}
// 计算QPS
qpsData := make([]opts.LineData, windowCount)
xAxisData := make([]string, windowCount)
for i := 0; i < windowCount; i++ {
qps := float64(counts[i]) / windowSize.Seconds()
qpsData[i] = opts.LineData{Value: qps}
xAxisData[i] = windows[i].Format("15:04:05")
}
// 添加数据
chart.SetXAxis(xAxisData)
chart.AddSeries(fmt.Sprintf("并发 %d", concurrency), qpsData)
}
return chart
}
// createTokenRateOverTimeChart 创建Token生成速率随时间变化图
func createTokenRateOverTimeChart(results *benchmark.BenchmarkResults) *charts.Line {
// 创建图表
chart := charts.NewLine()
chart.SetGlobalOptions(
charts.WithTitleOpts(opts.Title{
Title: "Token生成速率随时间变化",
}),
)
// 按并发步骤分组
for concurrency, stepResults := range results.ConcurrencyData {
// 按时间窗口统计Token生成速率
windowSize := 5 * time.Second
// 找出该步骤的第一个和最后一个请求
if len(stepResults) == 0 {
continue
}
// 排序结果,按时间戳
sort.Slice(stepResults, func(i, j int) bool {
return stepResults[i].Timestamp.Before(stepResults[j].Timestamp)
})
firstRequest := stepResults[0]
lastRequest := stepResults[len(stepResults)-1]
// 计算时间窗口数
duration := lastRequest.Timestamp.Sub(firstRequest.Timestamp)
windowCount := int(duration / windowSize) + 1
// 初始化时间窗口
windows := make([]time.Time, windowCount)
tokens := make([]int, windowCount)
for i := 0; i < windowCount; i++ {
windows[i] = firstRequest.Timestamp.Add(time.Duration(i) * windowSize)
}
// 统计每个时间窗口的Token数
for _, result := range stepResults {
if result.Error != nil {
continue
}
windowIndex := int(result.Timestamp.Sub(firstRequest.Timestamp) / windowSize)
if windowIndex >= 0 && windowIndex < windowCount {
tokens[windowIndex] += result.CompletionTokens
}
}
// 计算Token生成速率
tokenRateData := make([]opts.LineData, windowCount)
xAxisData := make([]string, windowCount)
for i := 0; i < windowCount; i++ {
tokenRate := float64(tokens[i]) / windowSize.Seconds()
tokenRateData[i] = opts.LineData{Value: tokenRate}
xAxisData[i] = windows[i].Format("15:04:05")
}
// 添加数据
chart.SetXAxis(xAxisData)
chart.AddSeries(fmt.Sprintf("并发 %d", concurrency), tokenRateData)
}
return chart
}
// createConcurrencyVsResponseTimeChart 创建并发与响应时间关系图
func createConcurrencyVsResponseTimeChart(results *benchmark.BenchmarkResults) *charts.Line {
// 创建图表
chart := charts.NewLine()
chart.SetGlobalOptions(
charts.WithTitleOpts(opts.Title{
Title: "并发与响应时间关系",
}),
)
// 准备数据
var concurrencies []int
var avgResponseTimes []opts.LineData
var p90ResponseTimes []opts.LineData
var p95ResponseTimes []opts.LineData
// 按并发步骤排序
for concurrency := range results.ConcurrencyData {
concurrencies = append(concurrencies, concurrency)
}
sort.Ints(concurrencies)
// 计算每个并发步骤的响应时间
for _, concurrency := range concurrencies {
stepResults := results.ConcurrencyData[concurrency]
// 收集成功请求的响应时间
var responseTimes []float64
for _, result := range stepResults {
if result.Error == nil {
responseTimes = append(responseTimes, float64(result.ResponseTime.Milliseconds()))
}
}
// 如果没有成功的请求,跳过
if len(responseTimes) == 0 {
avgResponseTimes = append(avgResponseTimes, opts.LineData{Value: 0})
p90ResponseTimes = append(p90ResponseTimes, opts.LineData{Value: 0})
p95ResponseTimes = append(p95ResponseTimes, opts.LineData{Value: 0})
continue
}
// 计算响应时间指标
avg, _ := stats.Mean(responseTimes)
p90, _ := stats.Percentile(responseTimes, 90)
p95, _ := stats.Percentile(responseTimes, 95)
avgResponseTimes = append(avgResponseTimes, opts.LineData{Value: avg})
p90ResponseTimes = append(p90ResponseTimes, opts.LineData{Value: p90})
p95ResponseTimes = append(p95ResponseTimes, opts.LineData{Value: p95})
}
// 将并发数转换为字符串
xAxisData := make([]string, len(concurrencies))
for i, concurrency := range concurrencies {
xAxisData[i] = fmt.Sprintf("%d", concurrency)
}
// 添加数据
chart.SetXAxis(xAxisData)
chart.AddSeries("平均响应时间 (ms)", avgResponseTimes)
chart.AddSeries("P90响应时间 (ms)", p90ResponseTimes)
chart.AddSeries("P95响应时间 (ms)", p95ResponseTimes)
return chart
}
// generateCSVData 生成CSV数据
func generateCSVData(results *benchmark.BenchmarkResults, outputPath string) error {
// 创建CSV文件
csvPath := filepath.Join(outputPath, "results.csv")
logger.Debug("创建CSV文件: %s", csvPath)
f, err := os.Create(csvPath)
if err != nil {
return err
}
defer f.Close()
// 写入CSV头
logger.Debug("写入CSV头...")
_, err = f.WriteString("Timestamp,PromptType,PromptTokens,CompletionTokens,ResponseTime,Concurrency,Error\n")
if err != nil {
return err
}
// 检查结果是否为空
if len(results.Results) == 0 {
logger.Warn("没有测试结果数据CSV文件将只包含表头")
return nil
}
// 写入CSV数据
logger.Debug("写入CSV数据共%d条记录...", len(results.Results))
for _, result := range results.Results {
// 格式化错误信息
errorMsg := ""
if result.Error != nil {
errorMsg = result.Error.Error()
}
// 写入一行数据
line := fmt.Sprintf("%s,%s,%d,%d,%d,%d,%s\n",
result.Timestamp.Format(time.RFC3339),
result.PromptType,
result.PromptTokens,
result.CompletionTokens,
result.ResponseTime.Milliseconds(),
result.Concurrency,
errorMsg,
)
_, err = f.WriteString(line)
if err != nil {
return err
}
}
logger.Debug("CSV数据写入完成")
return nil
}