feat: enhance AI result handling with model name and usage statistics for query, action, and assertion types

This commit is contained in:
lilong.129
2025-06-24 15:06:58 +08:00
parent 8fc8d06604
commit b1719344c0
7 changed files with 130 additions and 49 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2506241342
v5.0.0-beta-2506241525

111
report.go
View File

@@ -417,16 +417,53 @@ func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} {
continue
}
for _, action := range step.Actions {
if action.Plannings == nil {
continue
}
for _, planning := range action.Plannings {
if planning.Usage == nil {
continue
// Calculate planning usage
if action.Plannings != nil {
for _, planning := range action.Plannings {
if planning.Usage != nil {
totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens
totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens
totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens
}
}
}
// Calculate AI operations usage (ai_query, ai_action, ai_assert)
if action.AIResult != nil {
var usage *map[string]interface{}
switch action.AIResult.Type {
case "query":
if action.AIResult.QueryResult != nil && action.AIResult.QueryResult.Usage != nil {
usage = &map[string]interface{}{
"prompt_tokens": action.AIResult.QueryResult.Usage.PromptTokens,
"completion_tokens": action.AIResult.QueryResult.Usage.CompletionTokens,
"total_tokens": action.AIResult.QueryResult.Usage.TotalTokens,
}
}
case "action":
if action.AIResult.PlanningResult != nil && action.AIResult.PlanningResult.Usage != nil {
usage = &map[string]interface{}{
"prompt_tokens": action.AIResult.PlanningResult.Usage.PromptTokens,
"completion_tokens": action.AIResult.PlanningResult.Usage.CompletionTokens,
"total_tokens": action.AIResult.PlanningResult.Usage.TotalTokens,
}
}
case "assert":
if action.AIResult.AssertionResult != nil && action.AIResult.AssertionResult.Usage != nil {
usage = &map[string]interface{}{
"prompt_tokens": action.AIResult.AssertionResult.Usage.PromptTokens,
"completion_tokens": action.AIResult.AssertionResult.Usage.CompletionTokens,
"total_tokens": action.AIResult.AssertionResult.Usage.TotalTokens,
}
}
}
if usage != nil {
totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + (*usage)["prompt_tokens"].(int)
totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + (*usage)["completion_tokens"].(int)
totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + (*usage)["total_tokens"].(int)
}
totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens
totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens
totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens
}
}
}
@@ -1318,6 +1355,21 @@ const htmlTemplate = `<!DOCTYPE html>
color: #495057;
}
.structured-data {
background: #f8f9fa;
border: 1px solid #28a745;
border-radius: 6px;
padding: 10px 12px;
margin: 8px 0;
font-size: 0.85em;
color: #495057;
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
white-space: pre-wrap;
word-wrap: break-word;
max-height: 200px;
overflow-y: auto;
}
@media screen and (max-width: 768px) {
.planning-three-columns {
flex-direction: column;
@@ -2431,15 +2483,37 @@ const htmlTemplate = `<!DOCTYPE html>
{{end}}
</div>
<div class="validator-ai-details">
{{if $action.AIResult.ModelName}}
<div class="model-info">🤖 Model: {{$action.AIResult.ModelName}}</div>
{{/* Model name and usage from specific result types */}}
{{if eq $action.AIResult.Type "query"}}
{{if $action.AIResult.QueryResult.ModelName}}
<div class="model-info">🤖 Model: {{$action.AIResult.QueryResult.ModelName}}</div>
{{end}}
{{if $action.AIResult.QueryResult.Usage}}
<div class="usage-info">📊 Tokens: {{$action.AIResult.QueryResult.Usage.PromptTokens}} in / {{$action.AIResult.QueryResult.Usage.CompletionTokens}} out / {{$action.AIResult.QueryResult.Usage.TotalTokens}} total</div>
{{end}}
{{/* Display structured data for query results */}}
{{if $action.AIResult.QueryResult.Data}}
<div class="model-info">📥 Structured Data:</div>
<div class="structured-data">{{safeHTML (toJSON $action.AIResult.QueryResult.Data)}}</div>
{{end}}
{{else if eq $action.AIResult.Type "action"}}
{{if $action.AIResult.PlanningResult.ModelName}}
<div class="model-info">🤖 Model: {{$action.AIResult.PlanningResult.ModelName}}</div>
{{end}}
{{if $action.AIResult.PlanningResult.Usage}}
<div class="usage-info">📊 Tokens: {{$action.AIResult.PlanningResult.Usage.PromptTokens}} in / {{$action.AIResult.PlanningResult.Usage.CompletionTokens}} out / {{$action.AIResult.PlanningResult.Usage.TotalTokens}} total</div>
{{end}}
{{else if eq $action.AIResult.Type "assert"}}
{{if $action.AIResult.AssertionResult.ModelName}}
<div class="model-info">🤖 Model: {{$action.AIResult.AssertionResult.ModelName}}</div>
{{end}}
{{if $action.AIResult.AssertionResult.Usage}}
<div class="usage-info">📊 Tokens: {{$action.AIResult.AssertionResult.Usage.PromptTokens}} in / {{$action.AIResult.AssertionResult.Usage.CompletionTokens}} out / {{$action.AIResult.AssertionResult.Usage.TotalTokens}} total</div>
{{end}}
{{end}}
{{if $action.AIResult.Resolution}}
<div class="model-info">📐 Resolution: {{$action.AIResult.Resolution.Width}}x{{$action.AIResult.Resolution.Height}}</div>
{{end}}
{{if $action.AIResult.Usage}}
<div class="usage-info">📊 Tokens: {{$action.AIResult.Usage.PromptTokens}} in / {{$action.AIResult.Usage.CompletionTokens}} out / {{$action.AIResult.Usage.TotalTokens}} total</div>
{{end}}
{{if $action.AIResult.Content}}
<div class="model-info">💬 {{title $action.AIResult.Type}} Result: {{$action.AIResult.Content}}</div>
{{end}}
@@ -2495,9 +2569,6 @@ const htmlTemplate = `<!DOCTYPE html>
<div class="screenshot-item small">
<div class="screenshot-info">
<span class="filename">{{base $screenshot.ImagePath}}</span>
{{if $screenshot.Resolution}}
<span class="resolution">{{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}</span>
{{end}}
</div>
<div class="screenshot-image">
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Screenshot" onclick="openImageModal(this.src)" />
@@ -2560,12 +2631,6 @@ const htmlTemplate = `<!DOCTYPE html>
<div class="screenshot-item">
<div class="screenshot-info">
<span class="filename">{{base $imagePath}}</span>
{{if $screenshot.Resolution}}
<span class="resolution">{{$screenshot.Resolution.Width}}x{{$screenshot.Resolution.Height}}</span>
{{else if index $screenshot "resolution"}}
{{$resolution := index $screenshot "resolution"}}
<span class="resolution">{{index $resolution "width"}}x{{index $resolution "height"}}</span>
{{end}}
</div>
<div class="screenshot-image">
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Screenshot" onclick="openImageModal(this.src)" />

View File

@@ -30,8 +30,10 @@ type AssertOptions struct {
// AssertionResult represents the response from an AI assertion
type AssertionResult struct {
Pass bool `json:"pass"`
Thought string `json:"thought"`
Pass bool `json:"pass"`
Thought string `json:"thought"`
ModelName string `json:"model_name"` // model name used for assertion
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
// Asserter handles assertion using different AI models
@@ -85,7 +87,7 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro
}
// Assert performs the assertion check on the screenshot
func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error) {
func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (result *AssertionResult, err error) {
// Validate input parameters
if err := validateAssertionInput(opts); err != nil {
return nil, errors.Wrap(err, "validate assertion parameters failed")
@@ -132,8 +134,15 @@ Here is the assertion. Please tell whether it is truthy according to the screens
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
defer func() {
// Extract usage information if available
if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
result.Usage = message.ResponseMeta.Usage
}
}()
// Parse result
result, err := parseAssertionResult(message.Content)
result, err = parseAssertionResult(message.Content, a.modelConfig.ModelType)
if err != nil {
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
@@ -159,7 +168,7 @@ func validateAssertionInput(opts *AssertOptions) error {
}
// parseAssertionResult parses the model response into AssertionResponse
func parseAssertionResult(content string) (*AssertionResult, error) {
func parseAssertionResult(content string, modelType option.LLMServiceType) (*AssertionResult, error) {
var result AssertionResult
// Use the generic structured response parser
@@ -170,5 +179,6 @@ func parseAssertionResult(content string) (*AssertionResult, error) {
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
result.ModelName = string(modelType)
return &result, nil
}

View File

@@ -136,7 +136,7 @@ func TestParseAssertionResult(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := parseAssertionResult(tt.input)
result, err := parseAssertionResult(tt.input, option.DOUBAO_1_5_UI_TARS_250328)
if tt.shouldSucceed {
require.NoError(t, err)
assert.NotNil(t, result)

View File

@@ -32,9 +32,11 @@ type QueryOptions struct {
// QueryResult represents the response from an AI query
type QueryResult struct {
Content string `json:"content"` // The extracted content/information
Thought string `json:"thought"` // The reasoning process
Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
Content string `json:"content"` // The extracted content/information
Thought string `json:"thought"` // The reasoning process
Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
ModelName string `json:"model_name"` // model name used for query
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
// Querier handles query operations using different AI models
@@ -89,7 +91,7 @@ func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error)
// callModelWithLogging calls the model with automatic logging and timing
// Query performs the information extraction from the screenshot
func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (result *QueryResult, err error) {
// Validate input parameters
if err := validateQueryInput(opts); err != nil {
return nil, errors.Wrap(err, "validate query parameters failed")
@@ -141,8 +143,15 @@ Here is the query. Please extract the requested information from the screenshot.
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
defer func() {
// Extract usage information if available
if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
result.Usage = message.ResponseMeta.Usage
}
}()
// Parse result
result, err := parseQueryResult(message.Content)
result, err = parseQueryResult(message.Content, q.modelConfig.ModelType)
if err != nil {
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
}
@@ -168,18 +177,20 @@ func validateQueryInput(opts *QueryOptions) error {
}
// parseQueryResult parses the model response into QueryResult
func parseQueryResult(content string) (*QueryResult, error) {
func parseQueryResult(content string, modelType option.LLMServiceType) (*QueryResult, error) {
var result QueryResult
// Use the generic structured response parser with enhanced error recovery
if err := parseStructuredResponse(content, &result); err != nil {
// If parseStructuredResponse fails completely, treat content as plain text
return &QueryResult{
Content: content,
Thought: "Failed to parse response, returning raw content",
Content: content,
Thought: "Failed to parse response, returning raw content",
ModelName: string(modelType),
}, nil
}
result.ModelName = string(modelType)
return &result, nil
}

View File

@@ -87,7 +87,6 @@ func loadTestImage(t *testing.T, path string) (string, types.Size) {
}
// Test functions
func TestParseQueryResult(t *testing.T) {
tests := []struct {
name string
@@ -130,7 +129,7 @@ func TestParseQueryResult(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := parseQueryResult(tt.content)
result, err := parseQueryResult(tt.content, option.DOUBAO_1_5_UI_TARS_250328)
assert.NoError(t, err)
assert.Equal(t, tt.expected.Content, result.Content)
assert.Equal(t, tt.expected.Thought, result.Thought)

View File

@@ -155,8 +155,6 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
ScreenshotElapsed: screenshotElapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
ModelName: planningResult.ModelName,
Usage: planningResult.Usage,
PlanningResult: &planningResult.PlanningResult,
Thought: planningResult.Thought,
Content: planningResult.Content,
@@ -331,13 +329,11 @@ type PlanningExecutionResult struct {
// AIExecutionResult represents a unified result structure for all AI operations
type AIExecutionResult struct {
Type string `json:"type"` // operation type: "query", "action", "assert"
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
ImagePath string `json:"image_path"` // path to screenshot used for operation
Resolution *types.Size `json:"resolution"` // screen resolution
ModelName string `json:"model_name"` // model name used for operation
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
Type string `json:"type"` // operation type: "query", "action", "assert"
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
ImagePath string `json:"image_path"` // path to screenshot used for operation
Resolution *types.Size `json:"resolution"` // screen resolution
// Operation-specific results (only one will be populated based on Type)
QueryResult *ai.QueryResult `json:"query_result,omitempty"` // for ai_query operations