mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-12 02:21:29 +08:00
619 lines
22 KiB
Go
619 lines
22 KiB
Go
package uixt
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/cloudwego/eino/schema"
|
|
"github.com/pkg/errors"
|
|
"github.com/rs/zerolog/log"
|
|
|
|
"github.com/httprunner/httprunner/v5/code"
|
|
"github.com/httprunner/httprunner/v5/uixt/ai"
|
|
"github.com/httprunner/httprunner/v5/uixt/option"
|
|
"github.com/httprunner/httprunner/v5/uixt/types"
|
|
)
|
|
|
|
// StartToGoal (original implementation - preserved)
|
|
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) ([]*PlanningExecutionResult, error) {
|
|
options := option.NewActionOptions(opts...)
|
|
logger := log.Info().Str("prompt", prompt)
|
|
if options.MaxRetryTimes > 0 {
|
|
logger = logger.Int("max_retry_times", options.MaxRetryTimes)
|
|
}
|
|
|
|
// Handle TimeLimit and Timeout with unified context mechanism
|
|
var isTimeLimitMode bool
|
|
if options.TimeLimit > 0 {
|
|
// TimeLimit takes precedence over Timeout
|
|
logger = logger.Int("time_limit_seconds", options.TimeLimit)
|
|
var cancel context.CancelFunc
|
|
ctx, cancel = context.WithTimeout(ctx, time.Duration(options.TimeLimit)*time.Second)
|
|
defer cancel()
|
|
isTimeLimitMode = true
|
|
} else if options.Timeout > 0 {
|
|
// Use Timeout only if TimeLimit is not set
|
|
logger = logger.Int("timeout_seconds", options.Timeout)
|
|
var cancel context.CancelFunc
|
|
ctx, cancel = context.WithTimeout(ctx, time.Duration(options.Timeout)*time.Second)
|
|
defer cancel()
|
|
}
|
|
logger.Msg("StartToGoal")
|
|
|
|
var allPlannings []*PlanningExecutionResult
|
|
var attempt int
|
|
for {
|
|
attempt++
|
|
log.Info().Int("attempt", attempt).Msg("planning attempt")
|
|
|
|
// Check for context cancellation (timeout, time limit, or interrupt)
|
|
select {
|
|
case <-ctx.Done():
|
|
cause := context.Cause(ctx)
|
|
// Handle TimeLimit timeout - return success
|
|
if isTimeLimitMode && errors.Is(cause, context.DeadlineExceeded) {
|
|
log.Info().
|
|
Int("attempt", attempt).
|
|
Int("completed_plannings", len(allPlannings)).
|
|
Int("time_limit_seconds", options.TimeLimit).
|
|
Msg("StartToGoal time limit reached, stopping gracefully")
|
|
return allPlannings, nil
|
|
}
|
|
|
|
// Handle other cancellations (Timeout, interrupt, external cancellation) - return error
|
|
log.Warn().
|
|
Int("attempt", attempt).
|
|
Int("completed_plannings", len(allPlannings)).
|
|
Err(cause).
|
|
Msg("StartToGoal cancelled")
|
|
return allPlannings, errors.Wrap(cause, "StartToGoal cancelled")
|
|
default:
|
|
}
|
|
|
|
// Plan next action with history reset on first attempt
|
|
planningStartTime := time.Now()
|
|
planningOpts := opts
|
|
if attempt == 1 {
|
|
// Add ResetHistory option for the first attempt
|
|
planningOpts = append(planningOpts, option.WithResetHistory(true))
|
|
}
|
|
|
|
planningResult, err := dExt.PlanNextAction(ctx, prompt, planningOpts...)
|
|
if err != nil {
|
|
// Check if this is a LLM service request error that should be retried
|
|
if errors.Is(err, code.LLMRequestServiceError) {
|
|
log.Warn().Err(err).Int("attempt", attempt).
|
|
Msg("LLM service request failed, retrying...")
|
|
time.Sleep(5 * time.Second)
|
|
continue
|
|
}
|
|
// Create planning result with error
|
|
errorResult := &PlanningExecutionResult{
|
|
PlanningResult: ai.PlanningResult{
|
|
Thought: "Planning failed",
|
|
ModelName: "",
|
|
Error: err.Error(),
|
|
},
|
|
StartTime: planningStartTime.UnixMilli(),
|
|
Elapsed: time.Since(planningStartTime).Milliseconds(),
|
|
}
|
|
allPlannings = append(allPlannings, errorResult)
|
|
return allPlannings, err
|
|
}
|
|
|
|
// Set planning execution timing
|
|
planningResult.StartTime = planningStartTime.UnixMilli()
|
|
planningResult.SubActions = []*SubActionResult{}
|
|
|
|
// Check if task is finished BEFORE executing actions
|
|
if dExt.isTaskFinished(planningResult) {
|
|
log.Info().Msg("task finished, stopping StartToGoal")
|
|
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
|
|
allPlannings = append(allPlannings, planningResult)
|
|
return allPlannings, nil
|
|
}
|
|
|
|
// Invoke tool calls
|
|
for _, toolCall := range planningResult.ToolCalls {
|
|
// Check for context cancellation (timeout, time limit, or interrupt) before each action
|
|
select {
|
|
case <-ctx.Done():
|
|
cause := context.Cause(ctx)
|
|
// Handle TimeLimit timeout - return success
|
|
if isTimeLimitMode && errors.Is(cause, context.DeadlineExceeded) {
|
|
log.Info().
|
|
Int("attempt", attempt).
|
|
Int("completed_plannings", len(allPlannings)).
|
|
Int("completed_tool_calls", len(planningResult.SubActions)).
|
|
Int("total_tool_calls", len(planningResult.ToolCalls)).
|
|
Int("time_limit_seconds", options.TimeLimit).
|
|
Msg("StartToGoal time limit reached during tool call execution, stopping gracefully")
|
|
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
|
|
allPlannings = append(allPlannings, planningResult)
|
|
return allPlannings, nil
|
|
}
|
|
|
|
// Handle other cancellations (Timeout, external cancellation) - return error
|
|
log.Warn().
|
|
Int("attempt", attempt).
|
|
Int("completed_plannings", len(allPlannings)).
|
|
Int("completed_tool_calls", len(planningResult.SubActions)).
|
|
Int("total_tool_calls", len(planningResult.ToolCalls)).
|
|
Err(cause).
|
|
Msg("invokeToolCalls cancelled")
|
|
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
|
|
allPlannings = append(allPlannings, planningResult)
|
|
return allPlannings, errors.Wrap(cause, "invokeToolCalls cancelled")
|
|
default:
|
|
}
|
|
|
|
// Execute each tool call in a separate function to ensure proper defer execution
|
|
err := func() error {
|
|
subActionStartTime := time.Now()
|
|
subActionResult := &SubActionResult{
|
|
ActionName: toolCall.Function.Name,
|
|
Arguments: toolCall.Function.Arguments,
|
|
StartTime: subActionStartTime.UnixMilli(),
|
|
}
|
|
|
|
// Use defer to ensure sub-action is always processed and added to results
|
|
defer func() {
|
|
subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds()
|
|
subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data
|
|
planningResult.SubActions = append(planningResult.SubActions, subActionResult)
|
|
}()
|
|
|
|
if err := dExt.invokeToolCall(ctx, toolCall, opts...); err != nil {
|
|
log.Error().Err(err).
|
|
Str("action", toolCall.Function.Name).
|
|
Msg("invoke tool call failed")
|
|
subActionResult.Error = err.Error()
|
|
return err
|
|
}
|
|
return nil
|
|
}()
|
|
if err != nil {
|
|
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
|
|
planningResult.Error = err.Error()
|
|
allPlannings = append(allPlannings, planningResult)
|
|
return allPlannings, err
|
|
}
|
|
}
|
|
|
|
// Complete this planning cycle
|
|
planningResult.Elapsed = time.Since(planningStartTime).Milliseconds()
|
|
allPlannings = append(allPlannings, planningResult)
|
|
|
|
if options.MaxRetryTimes > 0 && attempt > options.MaxRetryTimes {
|
|
return allPlannings, errors.New("reached max retry times")
|
|
}
|
|
|
|
// wait 3 seconds for tool calls to complete
|
|
time.Sleep(3 * time.Second)
|
|
}
|
|
}
|
|
|
|
// AIAction with WingsService priority support
|
|
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
|
log.Info().Str("prompt", prompt).Msg("performing AI action")
|
|
|
|
// Step 1: Take screenshot and convert to base64
|
|
screenResult, err := dExt.GetScreenResult(
|
|
option.WithScreenShotFileName("ai_action"),
|
|
option.WithScreenShotBase64(true),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Step 2: Check if WingsService is available and prioritize it
|
|
if dExt.WingsService != nil {
|
|
log.Info().Msg("using Wings service for AI action")
|
|
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.WingsService, "wings", opts...)
|
|
}
|
|
|
|
// Step 3: Fallback to LLM service
|
|
if dExt.LLMService == nil {
|
|
return nil, errors.New("neither Wings service nor LLM service is initialized")
|
|
}
|
|
|
|
log.Info().Msg("using LLM service for AI action")
|
|
return dExt.executeAIAction(ctx, prompt, screenResult, dExt.LLMService, "llm", opts...)
|
|
}
|
|
|
|
// executeAIAction executes AIAction using any AI service (generic implementation)
|
|
func (dExt *XTDriver) executeAIAction(ctx context.Context, prompt string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
|
// Add device context for Wings service if needed
|
|
if serviceType == "wings" {
|
|
ctx = dExt.addDeviceContextForWings(ctx)
|
|
}
|
|
|
|
// Step 1: Plan next action and measure time
|
|
modelCallStartTime := time.Now()
|
|
|
|
var planningResult *ai.PlanningResult
|
|
var err error
|
|
|
|
if serviceType == "llm" {
|
|
// For LLM service, use PlanNextAction which includes additional processing
|
|
planningExecutionResult, planErr := dExt.PlanNextAction(ctx, prompt, opts...)
|
|
if planErr != nil {
|
|
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
|
return &AIExecutionResult{
|
|
Type: "action",
|
|
ModelCallElapsed: modelCallElapsed,
|
|
ScreenshotElapsed: screenResult.Elapsed,
|
|
ImagePath: screenResult.ImagePath,
|
|
Resolution: &screenResult.Resolution,
|
|
Error: planErr.Error(),
|
|
}, errors.Wrap(planErr, "get next action failed")
|
|
}
|
|
planningResult = &planningExecutionResult.PlanningResult
|
|
} else {
|
|
// For Wings service, call Plan directly
|
|
planningOpts := &ai.PlanningOptions{
|
|
UserInstruction: prompt,
|
|
Message: &schema.Message{
|
|
Role: schema.User,
|
|
MultiContent: []schema.ChatMessagePart{
|
|
{
|
|
Type: schema.ChatMessagePartTypeImageURL,
|
|
ImageURL: &schema.ChatMessageImageURL{
|
|
URL: screenResult.Base64,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Size: screenResult.Resolution,
|
|
}
|
|
|
|
planningResult, err = service.Plan(ctx, planningOpts)
|
|
if err != nil {
|
|
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
|
return &AIExecutionResult{
|
|
Type: "action",
|
|
ModelCallElapsed: modelCallElapsed,
|
|
ScreenshotElapsed: screenResult.Elapsed,
|
|
ImagePath: screenResult.ImagePath,
|
|
Resolution: &screenResult.Resolution,
|
|
Error: err.Error(),
|
|
}, errors.Wrap(err, fmt.Sprintf("%s service planning failed", serviceType))
|
|
}
|
|
}
|
|
|
|
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
|
|
|
aiExecutionResult := &AIExecutionResult{
|
|
Type: "action",
|
|
ModelCallElapsed: modelCallElapsed,
|
|
ScreenshotElapsed: screenResult.Elapsed,
|
|
ImagePath: screenResult.ImagePath,
|
|
Resolution: &screenResult.Resolution,
|
|
PlanningResult: planningResult,
|
|
}
|
|
|
|
// Step 2: Execute tool calls
|
|
for _, toolCall := range planningResult.ToolCalls {
|
|
err = dExt.invokeToolCall(ctx, toolCall, opts...)
|
|
if err != nil {
|
|
log.Error().Err(err).
|
|
Str("action", toolCall.Function.Name).
|
|
Msg("invoke tool call failed")
|
|
aiExecutionResult.Error = err.Error()
|
|
return aiExecutionResult, errors.Wrap(err, "invoke tool call failed")
|
|
}
|
|
}
|
|
|
|
return aiExecutionResult, nil
|
|
}
|
|
|
|
// AIAssert with WingsService priority support
|
|
func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
|
log.Info().Str("assertion", assertion).Msg("performing AI assertion")
|
|
|
|
// Step 1: Take screenshot and convert to base64
|
|
screenResult, err := dExt.GetScreenResult(
|
|
option.WithScreenShotFileName("ai_assert"),
|
|
option.WithScreenShotBase64(true),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Step 2: Check if WingsService is available and prioritize it
|
|
if dExt.WingsService != nil {
|
|
log.Info().Msg("using Wings service for AI assertion")
|
|
return dExt.executeAIAssert(assertion, screenResult, dExt.WingsService, "wings", opts...)
|
|
}
|
|
|
|
// Step 3: Fallback to LLM service
|
|
if dExt.LLMService == nil {
|
|
return nil, errors.New("neither Wings service nor LLM service is initialized")
|
|
}
|
|
|
|
log.Info().Msg("using LLM service for AI assertion")
|
|
return dExt.executeAIAssert(assertion, screenResult, dExt.LLMService, "llm", opts...)
|
|
}
|
|
|
|
// executeAIAssert executes AIAssert using any AI service (generic implementation)
|
|
func (dExt *XTDriver) executeAIAssert(assertion string, screenResult *ScreenResult, service ai.ILLMService, serviceType string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
|
// Step 1: Prepare context and options
|
|
ctx := context.Background()
|
|
if serviceType == "wings" {
|
|
ctx = dExt.addDeviceContextForWings(ctx)
|
|
}
|
|
|
|
assertResult := &AIExecutionResult{
|
|
Type: "assert",
|
|
ScreenshotElapsed: screenResult.Elapsed,
|
|
ImagePath: screenResult.ImagePath,
|
|
Resolution: &screenResult.Resolution,
|
|
}
|
|
|
|
// Step 2: Call service and measure time
|
|
modelCallStartTime := time.Now()
|
|
assertOpts := &ai.AssertOptions{
|
|
Assertion: assertion,
|
|
Screenshot: screenResult.Base64,
|
|
Size: screenResult.Resolution,
|
|
}
|
|
|
|
result, err := service.Assert(ctx, assertOpts)
|
|
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
|
|
assertResult.AssertionResult = result
|
|
|
|
if err != nil {
|
|
assertResult.Error = err.Error()
|
|
return assertResult, errors.Wrap(err, fmt.Sprintf("%s assertion failed", serviceType))
|
|
}
|
|
|
|
if !result.Pass {
|
|
assertResult.Error = result.Thought
|
|
}
|
|
|
|
return assertResult, nil
|
|
}
|
|
|
|
// addDeviceContextForWings adds device information to context for Wings service
|
|
func (dExt *XTDriver) addDeviceContextForWings(ctx context.Context) context.Context {
|
|
device := dExt.GetDevice()
|
|
if device == nil {
|
|
return ctx
|
|
}
|
|
|
|
// Add device ID to context
|
|
ctx = context.WithValue(ctx, "device_id", device.UUID())
|
|
|
|
// Add platform type to context
|
|
platformType := "android" // default
|
|
switch device.(type) {
|
|
case *AndroidDevice:
|
|
platformType = "android"
|
|
case *IOSDevice:
|
|
platformType = "ios"
|
|
case *HarmonyDevice:
|
|
platformType = "harmony"
|
|
}
|
|
ctx = context.WithValue(ctx, "platform_type", platformType)
|
|
|
|
return ctx
|
|
}
|
|
|
|
// PlanNextAction (original implementation - preserved)
|
|
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*PlanningExecutionResult, error) {
|
|
if dExt.LLMService == nil {
|
|
return nil, errors.New("LLM service is not initialized")
|
|
}
|
|
|
|
// Parse action options to get ResetHistory setting
|
|
options := option.NewActionOptions(opts...)
|
|
resetHistory := options.ResetHistory
|
|
|
|
// Step 1: Take screenshot and convert to base64
|
|
screenResult, err := dExt.GetScreenResult(
|
|
option.WithScreenShotFileName("ai_planning"),
|
|
option.WithScreenShotBase64(true),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Clear session data after planning screenshot to avoid including it in sub-actions
|
|
// The planning screenshot is already stored in planningResult.ScreenResult
|
|
dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions
|
|
|
|
// Step 2: Call model
|
|
modelCallStartTime := time.Now()
|
|
planningOpts := &ai.PlanningOptions{
|
|
UserInstruction: prompt,
|
|
Message: &schema.Message{
|
|
Role: schema.User,
|
|
MultiContent: []schema.ChatMessagePart{
|
|
{
|
|
Type: schema.ChatMessagePartTypeImageURL,
|
|
ImageURL: &schema.ChatMessageImageURL{
|
|
URL: screenResult.Base64,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Size: screenResult.Resolution,
|
|
ResetHistory: resetHistory,
|
|
}
|
|
|
|
result, err := dExt.LLMService.Plan(ctx, planningOpts)
|
|
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
|
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to get next action from planner")
|
|
}
|
|
|
|
// Step 3: Parse result (this is already done in LLMService.Call, but we record it separately)
|
|
actionNames := make([]string, len(result.ToolCalls))
|
|
for i, toolCall := range result.ToolCalls {
|
|
actionNames[i] = toolCall.Function.Name
|
|
}
|
|
|
|
// Create unified planning result that inherits from ai.PlanningResult
|
|
planningResult := &PlanningExecutionResult{
|
|
PlanningResult: *result, // Inherit all fields from ai.PlanningResult
|
|
// Planning process timing and metadata
|
|
ScreenshotElapsed: screenResult.Elapsed,
|
|
ImagePath: screenResult.ImagePath,
|
|
Resolution: &screenResult.Resolution,
|
|
ScreenResult: screenResult,
|
|
ModelCallElapsed: modelCallElapsed,
|
|
ToolCallsCount: len(result.ToolCalls),
|
|
ActionNames: actionNames,
|
|
// Execution timing (will be set by StartToGoal)
|
|
StartTime: 0, // Will be set by caller
|
|
Elapsed: 0, // Will be set by caller
|
|
SubActions: nil, // Will be populated during execution
|
|
}
|
|
|
|
return planningResult, nil
|
|
}
|
|
|
|
// isTaskFinished (original implementation - preserved)
|
|
func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bool {
|
|
// Check if there are no tool calls (no actions to execute)
|
|
if len(planningResult.ToolCalls) == 0 {
|
|
log.Info().Msg("no tool calls returned, task may be finished")
|
|
return true
|
|
}
|
|
|
|
// Check if any tool call is a "finished" action
|
|
for _, toolCall := range planningResult.ToolCalls {
|
|
if toolCall.Function.Name == "uixt__finished" {
|
|
log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected")
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// invokeToolCall (original implementation - preserved)
|
|
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
|
|
// Parse arguments
|
|
arguments := make(map[string]interface{})
|
|
err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Create a MobileAction with options to reuse BuildMCPCallToolRequest
|
|
action := option.MobileAction{
|
|
Options: option.NewActionOptions(opts...),
|
|
}
|
|
|
|
req := BuildMCPCallToolRequest(
|
|
option.ActionName(toolCall.Function.Name),
|
|
arguments,
|
|
action,
|
|
)
|
|
_, err = dExt.client.CallTool(ctx, req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// PlanningExecutionResult (original implementation - preserved)
|
|
type PlanningExecutionResult struct {
|
|
ai.PlanningResult // Inherit all fields from ai.PlanningResult (ToolCalls, Thought, Content, Error, ModelName)
|
|
// Planning process information
|
|
ScreenshotElapsed int64 `json:"screenshot_elapsed_ms"` // screenshot elapsed time(ms)
|
|
ImagePath string `json:"image_path"` // screenshot image path
|
|
Resolution *types.Size `json:"resolution"` // image resolution
|
|
ScreenResult *ScreenResult `json:"screen_result"` // complete screen result data
|
|
ModelCallElapsed int64 `json:"model_call_elapsed_ms"` // model call elapsed time(ms)
|
|
ToolCallsCount int `json:"tool_calls_count"` // number of tool calls generated
|
|
ActionNames []string `json:"action_names"` // names of parsed actions
|
|
// Execution information
|
|
StartTime int64 `json:"start_time"` // planning start time
|
|
Elapsed int64 `json:"elapsed_ms"` // planning elapsed time(ms)
|
|
SubActions []*SubActionResult `json:"sub_actions,omitempty"` // sub-actions generated from this planning
|
|
}
|
|
|
|
// AIExecutionResult (original implementation - preserved)
|
|
type AIExecutionResult struct {
|
|
Type string `json:"type"` // operation type: "query", "action", "assert"
|
|
ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
|
|
ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
|
|
ImagePath string `json:"image_path"` // path to screenshot used for operation
|
|
Resolution *types.Size `json:"resolution"` // screen resolution
|
|
|
|
// Operation-specific results (only one will be populated based on Type)
|
|
QueryResult *ai.QueryResult `json:"query_result,omitempty"` // for ai_query operations
|
|
PlanningResult *ai.PlanningResult `json:"planning_result,omitempty"` // for ai_action operations
|
|
AssertionResult *ai.AssertionResult `json:"assertion_result,omitempty"` // for ai_assert operations
|
|
|
|
// Common fields
|
|
Error string `json:"error,omitempty"` // error message if operation failed
|
|
}
|
|
|
|
// SubActionResult (original implementation - preserved)
|
|
type SubActionResult struct {
|
|
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
|
|
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
|
|
StartTime int64 `json:"start_time"` // sub-action start time
|
|
Elapsed int64 `json:"elapsed_ms"` // sub-action elapsed time(ms)
|
|
Error string `json:"error,omitempty"` // sub-action execution result
|
|
SessionData
|
|
}
|
|
|
|
type SessionData struct {
|
|
Requests []*DriverRequests `json:"requests,omitempty"` // store sub-action specific requests
|
|
ScreenResults []*ScreenResult `json:"screen_results,omitempty"` // store sub-action specific screen_results
|
|
}
|
|
|
|
// AIQuery (original implementation - preserved)
|
|
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
|
if dExt.LLMService == nil {
|
|
return nil, errors.New("LLM service is not initialized")
|
|
}
|
|
|
|
// Step 1: Take screenshot and convert to base64
|
|
screenResult, err := dExt.GetScreenResult(
|
|
option.WithScreenShotFileName("ai_query"),
|
|
option.WithScreenShotBase64(true),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// parse action options to extract OutputSchema
|
|
actionOptions := option.NewActionOptions(opts...)
|
|
|
|
// Step 2: Call model and measure time
|
|
modelCallStartTime := time.Now()
|
|
|
|
// execute query
|
|
queryOpts := &ai.QueryOptions{
|
|
Query: text,
|
|
Screenshot: screenResult.Base64,
|
|
Size: screenResult.Resolution,
|
|
OutputSchema: actionOptions.OutputSchema,
|
|
}
|
|
result, err := dExt.LLMService.Query(context.Background(), queryOpts)
|
|
modelCallElapsed := time.Since(modelCallStartTime).Milliseconds()
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "AI query failed")
|
|
}
|
|
|
|
// Create AIExecutionResult with all timing and metadata
|
|
aiResult := &AIExecutionResult{
|
|
Type: "query",
|
|
ModelCallElapsed: modelCallElapsed, // model call timing
|
|
ScreenshotElapsed: screenResult.Elapsed, // screenshot timing
|
|
ImagePath: screenResult.ImagePath, // screenshot path
|
|
Resolution: &screenResult.Resolution, // screen resolution
|
|
QueryResult: result, // query-specific result
|
|
}
|
|
return aiResult, nil
|
|
}
|