feat: enhance AI query handling with detailed result structure and improved UI display

This commit is contained in:
lilong.129
2025-06-24 11:50:37 +08:00
parent b1b4e5c1dc
commit fc32b5d874
6 changed files with 93 additions and 53 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2506232232
v5.0.0-beta-2506241150

View File

@@ -2379,59 +2379,34 @@ const htmlTemplate = `<!DOCTYPE html>
{{end}}
{{end}}
{{/* Handle special case: ai_query needs enhanced display even when not in planning */}}
{{/* Enhanced AI Query Display - using QueryResult data structure */}}
{{if eq $action.Method "ai_query"}}
{{if $action.QueryResult}}
<div class="sub-action-item">
<!-- Enhanced AI Query Display -->
<div class="validator-ai-content">
<!-- Extract AI query details from step logs -->
{{$stepLogs := getStepLogs $step}}
{{$queryThought := ""}}
{{$queryModel := ""}}
{{$queryUsage := ""}}
{{$queryScreenshot := ""}}
{{$queryResult := ""}}
{{range $logEntry := $stepLogs}}
{{if and (eq $logEntry.Message "log response message") (index $logEntry.Fields "content")}}
{{$content := index $logEntry.Fields "content"}}
{{if $content}}
{{$queryResult = $content}}
{{end}}
{{end}}
{{if and (eq $logEntry.Message "call model service for query") (index $logEntry.Fields "model")}}
{{$queryModel = index $logEntry.Fields "model"}}
{{end}}
{{if and (eq $logEntry.Message "usage statistics") (index $logEntry.Fields "input_tokens")}}
{{$inputTokens := index $logEntry.Fields "input_tokens"}}
{{$outputTokens := index $logEntry.Fields "output_tokens"}}
{{$totalTokens := index $logEntry.Fields "total_tokens"}}
{{$queryUsage = printf "📊 Tokens: %v in / %v out / %v total" $inputTokens $outputTokens $totalTokens}}
{{end}}
{{if and (eq $logEntry.Message "log screenshot") (index $logEntry.Fields "imagePath")}}
{{$queryScreenshot = index $logEntry.Fields "imagePath"}}
{{end}}
<!-- Display AI Thought -->
{{if $action.QueryResult.Thought}}
<div class="thought">{{$action.QueryResult.Thought}}</div>
{{end}}
<!-- Display AI Query Result at the top -->
{{if $queryResult}}
<div class="thought">{{$queryResult}}</div>
{{end}}
<!-- AI Query Layout - similar to validator layout -->
<!-- AI Query Layout: Screenshot left, Analysis right -->
<div class="validator-ai-layout">
<!-- Left column: Screenshot -->
{{if $queryScreenshot}}
{{if $action.QueryResult.ImagePath}}
<div class="validator-column-screenshot">
<div class="validator-step-compact">
<div class="step-header-compact">
<span class="step-name">📸 Query Screenshot</span>
{{if $action.QueryResult.ScreenshotElapsed}}
<span class="duration">{{formatDuration $action.QueryResult.ScreenshotElapsed}}</span>
{{end}}
</div>
<div class="screenshot-display">
{{$base64Image := encodeImageBase64 $queryScreenshot}}
{{$base64Image := encodeImageBase64 $action.QueryResult.ImagePath}}
{{if $base64Image}}
<div class="screenshot-item-compact">
<div class="screenshot-image">
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Query Screenshot" onclick="openImageModal(this.src)" />
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="AI Query Screenshot" onclick="openImageModal(this.src)" />
</div>
</div>
{{end}}
@@ -2440,18 +2415,30 @@ const htmlTemplate = `<!DOCTYPE html>
</div>
{{end}}
<!-- Right column: AI Query -->
<!-- Right column: AI Query Analysis -->
<div class="validator-column-analysis">
<div class="validator-step-compact">
<div class="step-header-compact">
<span class="step-name">🤖 AI Query</span>
<span class="step-name">🤖 AI Query Analysis</span>
{{if $action.QueryResult.ModelCallElapsed}}
<span class="duration">{{formatDuration $action.QueryResult.ModelCallElapsed}}</span>
{{end}}
</div>
<div class="validator-ai-details">
{{if $queryModel}}
<div class="model-info">🤖 Model: {{$queryModel}}</div>
{{if $action.QueryResult.ModelName}}
<div class="model-info">🤖 Model: {{$action.QueryResult.ModelName}}</div>
{{end}}
{{if $queryUsage}}
<div class="usage-info">{{$queryUsage}}</div>
{{if $action.QueryResult.Resolution}}
<div class="model-info">📐 Resolution: {{$action.QueryResult.Resolution.Width}}x{{$action.QueryResult.Resolution.Height}}</div>
{{end}}
{{if $action.QueryResult.Usage}}
<div class="usage-info">📊 Tokens: {{$action.QueryResult.Usage.PromptTokens}} in / {{$action.QueryResult.Usage.CompletionTokens}} out / {{$action.QueryResult.Usage.TotalTokens}} total</div>
{{end}}
{{if $action.QueryResult.Content}}
<div class="model-info">💬 Query Result: {{$action.QueryResult.Content}}</div>
{{end}}
{{if $action.QueryResult.Error}}
<div class="model-info" style="color: #dc3545;">❌ Error: {{$action.QueryResult.Error}}</div>
{{end}}
</div>
</div>
@@ -2460,6 +2447,7 @@ const htmlTemplate = `<!DOCTYPE html>
</div>
</div>
{{end}}
{{end}}
{{/* Handle SessionData: display requests and screen results for non-planning actions */}}
{{if not $action.Plannings}}

11
step.go
View File

@@ -60,11 +60,12 @@ type TStep struct {
// one step contains one or multiple actions
type ActionResult struct {
option.MobileAction `json:",inline"`
StartTime int64 `json:"start_time"` // action start time in millisecond(ms)
Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms)
Error string `json:"error,omitempty"` // action execution result
Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions, which contains multiple sub-actions
uixt.SessionData // store session data for other actions besides start_to_goal
StartTime int64 `json:"start_time"` // action start time in millisecond(ms)
Elapsed int64 `json:"elapsed_ms"` // action elapsed time(ms)
Error string `json:"error,omitempty"` // action execution result
Plannings []*uixt.PlanningExecutionResult `json:"plannings,omitempty"` // store planning results for start_to_goal actions, which contains multiple sub-actions
QueryResult *uixt.QueryExecutionResult `json:"query_result,omitempty"` // store query result for ai_query actions
uixt.SessionData // store session data for other actions besides start_to_goal & ai_query
}
// one testcase contains one or multiple steps

View File

@@ -943,6 +943,24 @@ func runStepMobileUI(s *SessionRunner, step IStep) (stepResult *StepResult, err
actionResult.Plannings = planningResults
stepResult.Actions = append(stepResult.Actions, actionResult)
if err != nil {
actionResult.Error = err.Error()
if !code.IsErrorPredefined(err) {
err = errors.Wrap(code.MobileUIDriverError, err.Error())
}
return stepResult, err
}
continue
}
// handle ai_query action
if action.Method == option.ACTION_Query {
queryResult, err := uiDriver.AIQuery(
action.Params.(string), action.GetOptions()...)
actionResult.Elapsed = time.Since(actionStartTime).Milliseconds()
actionResult.QueryResult = queryResult
stepResult.Actions = append(stepResult.Actions, actionResult)
if err != nil {
actionResult.Error = err.Error()
if !code.IsErrorPredefined(err) {
err = errors.Wrap(code.MobileUIDriverError, err.Error())
}

View File

@@ -301,6 +301,17 @@ type PlanningExecutionResult struct {
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
}
// QueryExecutionResult contains the result of AI query execution with timing and metadata
type QueryExecutionResult struct {
ai.QueryResult // inherit from ai.QueryResult
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
ImagePath string `json:"image_path"` // path to screenshot used for query
Resolution *types.Size `json:"resolution"` // screen resolution
ModelName string `json:"model_name"` // model name used for query
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
// SubActionResult represents a sub-action within a start_to_goal action
type SubActionResult struct {
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
@@ -316,11 +327,21 @@ type SessionData struct {
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
}
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*ai.QueryResult, error) {
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*QueryExecutionResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")
}
// Step 1: Take screenshot and measure time
screenshotStartTime := time.Now()
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
return nil, err
@@ -329,6 +350,9 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*ai.Que
// parse action options to extract OutputSchema
actionOptions := option.NewActionOptions(opts...)
// Step 2: Call model and measure time
modelCallStartTime := time.Now()
// execute query
queryOpts := &ai.QueryOptions{
Query: text,
@@ -337,11 +361,20 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*ai.Que
OutputSchema: actionOptions.OutputSchema,
}
result, err := dExt.LLMService.Query(context.Background(), queryOpts)
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
if err != nil {
return nil, errors.Wrap(err, "AI query failed")
}
return result, nil
// Create QueryExecutionResult with all timing and metadata
queryExecResult := &QueryExecutionResult{
QueryResult: *result, // inherit from ai.QueryResult
ModelCallElapsed: modelCallElapsed, // model call timing
ScreenshotElapsed: screenshotElapsed, // screenshot timing
ImagePath: screenResult.ImagePath, // screenshot path
Resolution: &screenResult.Resolution, // screen resolution
}
return queryExecResult, nil
}
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) error {

View File

@@ -166,7 +166,7 @@ func (t *ToolAIQuery) Implement() server.ToolHandlerFunc {
opts := unifiedReq.Options()
// AI query logic with options
result, err := driverExt.AIQuery(unifiedReq.Prompt, opts...)
queryResult, err := driverExt.AIQuery(unifiedReq.Prompt, opts...)
if err != nil {
return NewMCPErrorResponse(fmt.Sprintf("AI query failed: %s", err.Error())), nil
}
@@ -174,7 +174,7 @@ func (t *ToolAIQuery) Implement() server.ToolHandlerFunc {
message := fmt.Sprintf("Successfully queried information with prompt: %s", unifiedReq.Prompt)
returnData := ToolAIQuery{
Prompt: unifiedReq.Prompt,
Result: result.Content,
Result: queryResult.Content,
}
return NewMCPSuccessResponse(message, &returnData), nil