diff --git a/internal/version/VERSION b/internal/version/VERSION index f5ecfe3a..2c3cf270 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506052317 +v5.0.0-beta-2506061529 diff --git a/uixt/ai/planner.go b/uixt/ai/planner.go index fd59f639..8ec0a42d 100644 --- a/uixt/ai/planner.go +++ b/uixt/ai/planner.go @@ -23,6 +23,7 @@ type PlanningOptions struct { UserInstruction string `json:"user_instruction"` // append to system prompt Message *schema.Message `json:"message"` Size types.Size `json:"size"` + ResetHistory bool `json:"reset_history"` // whether to reset conversation history before planning } // PlanningResult represents the result of planning @@ -85,6 +86,12 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes return nil, errors.Wrap(err, "validate planning parameters failed") } + // reset conversation history if requested + if opts.ResetHistory { + p.history.Clear() // Clear everything including system message for complete isolation + log.Info().Msg("conversation history reset for planner") + } + // prepare prompt if len(p.history) == 0 && opts.UserInstruction != "" { // add system message diff --git a/uixt/ai/session.go b/uixt/ai/session.go index 1c2fbaa7..c907336a 100644 --- a/uixt/ai/session.go +++ b/uixt/ai/session.go @@ -62,10 +62,15 @@ func (h *ConversationHistory) Append(msg *schema.Message) { } func (h *ConversationHistory) Clear() { - // Keep only the system message - systemMsg := (*h)[0] - *h = ConversationHistory{systemMsg} - log.Info().Msg("conversation history cleared") + // Check if history is empty + if len(*h) == 0 { + log.Info().Msg("conversation history is already empty") + return + } + + // Clear everything including system message + *h = ConversationHistory{} + log.Info().Msg("conversation history cleared completely") } func logRequest(messages ConversationHistory) { diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go index 28515708..eabc5bdc 100644 --- a/uixt/driver_ext_ai.go +++ b/uixt/driver_ext_ai.go @@ -21,6 +21,7 @@ import ( func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) error { options := option.NewActionOptions(opts...) log.Info().Int("max_retry_times", options.MaxRetryTimes).Msg("StartToGoal") + var attempt int for { attempt++ @@ -34,7 +35,14 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op default: } - if err := dExt.AIAction(ctx, prompt, opts...); err != nil { + // Plan next action with history reset on first attempt + planningOpts := opts + if attempt == 1 { + // Add ResetHistory option for the first attempt + planningOpts = append(planningOpts, option.WithResetHistory(true)) + } + result, err := dExt.PlanNextAction(ctx, prompt, planningOpts...) + if err != nil { // Check if this is a LLM service request error that should be retried if errors.Is(err, code.LLMRequestServiceError) { log.Warn().Err(err).Int("attempt", attempt). @@ -44,6 +52,17 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op return err } + // Check if task is finished BEFORE executing actions + if dExt.isTaskFinished(result) { + log.Info().Msg("task finished, stopping StartToGoal") + return nil + } + + // Execute actions only if task is not finished + if err := dExt.executeActions(ctx, result.ToolCalls); err != nil { + return err + } + if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes { return errors.New("reached max retry times") } @@ -59,42 +78,8 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio return err } - // do actions - for _, action := range result.ToolCalls { - // Check for context cancellation before each action - select { - case <-ctx.Done(): - log.Warn().Msg("interrupted in AIAction") - return errors.Wrap(code.InterruptError, "AIAction interrupted") - default: - } - - // call eino tool - arguments := make(map[string]interface{}) - err := json.Unmarshal([]byte(action.Function.Arguments), &arguments) - if err != nil { - return err - } - req := mcp.CallToolRequest{ - Params: struct { - Name string `json:"name"` - Arguments map[string]any `json:"arguments,omitempty"` - Meta *struct { - ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` - } `json:"_meta,omitempty"` - }{ - Name: action.Function.Name, - Arguments: arguments, - }, - } - - _, err = dExt.client.CallTool(ctx, req) - if err != nil { - return err - } - } - - return nil + // execute actions + return dExt.executeActions(ctx, result.ToolCalls) } func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*ai.PlanningResult, error) { @@ -128,6 +113,10 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return nil, errors.Wrap(code.DeviceGetInfoError, err.Error()) } + // Parse action options to get ResetHistory setting + options := option.NewActionOptions(opts...) + resetHistory := options.ResetHistory + planningOpts := &ai.PlanningOptions{ UserInstruction: prompt, Message: &schema.Message{ @@ -141,7 +130,8 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. }, }, }, - Size: size, + Size: size, + ResetHistory: resetHistory, } result, err := dExt.LLMService.Call(ctx, planningOpts) @@ -151,6 +141,64 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts .. return result, nil } +// isTaskFinished checks if the task is completed based on the planning result +func (dExt *XTDriver) isTaskFinished(result *ai.PlanningResult) bool { + // Check if there are no tool calls (no actions to execute) + if len(result.ToolCalls) == 0 { + log.Info().Msg("no tool calls returned, task may be finished") + return true + } + + // Check if any tool call is a "finished" action + for _, toolCall := range result.ToolCalls { + if toolCall.Function.Name == "uixt__finished" { + log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected") + return true + } + } + + return false +} + +// executeActions executes the planned actions +func (dExt *XTDriver) executeActions(ctx context.Context, toolCalls []schema.ToolCall) error { + for _, action := range toolCalls { + // Check for context cancellation before each action + select { + case <-ctx.Done(): + log.Warn().Msg("interrupted in executeActions") + return errors.Wrap(code.InterruptError, "executeActions interrupted") + default: + } + + // call eino tool + arguments := make(map[string]interface{}) + err := json.Unmarshal([]byte(action.Function.Arguments), &arguments) + if err != nil { + return err + } + req := mcp.CallToolRequest{ + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments,omitempty"` + Meta *struct { + ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` + } `json:"_meta,omitempty"` + }{ + Name: action.Function.Name, + Arguments: arguments, + }, + } + + _, err = dExt.client.CallTool(ctx, req) + if err != nil { + return err + } + } + + return nil +} + func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (string, error) { return "", nil } diff --git a/uixt/driver_ext_ai_test.go b/uixt/driver_ext_ai_test.go new file mode 100644 index 00000000..4c0ff4f3 --- /dev/null +++ b/uixt/driver_ext_ai_test.go @@ -0,0 +1,191 @@ +package uixt + +import ( + "context" + "testing" + + "github.com/cloudwego/eino/schema" + "github.com/httprunner/httprunner/v5/uixt/ai" + "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/httprunner/httprunner/v5/uixt/types" + "github.com/stretchr/testify/assert" +) + +func TestXTDriver_isTaskFinished(t *testing.T) { + driver := &XTDriver{} + + tests := []struct { + name string + result *ai.PlanningResult + expected bool + }{ + { + name: "no tool calls - task finished", + result: &ai.PlanningResult{ + ToolCalls: []schema.ToolCall{}, + Thought: "No actions needed", + }, + expected: true, + }, + { + name: "finished action - task finished", + result: &ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: "uixt__finished", + Arguments: `{"content": "Task completed successfully"}`, + }, + }, + }, + Thought: "Task completed", + }, + expected: true, + }, + { + name: "regular action - task not finished", + result: &ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: string(option.ACTION_TapXY), + Arguments: `{"x": 100, "y": 200}`, + }, + }, + }, + Thought: "Click on button", + }, + expected: false, + }, + { + name: "multiple actions with finished - task finished", + result: &ai.PlanningResult{ + ToolCalls: []schema.ToolCall{ + { + Function: schema.FunctionCall{ + Name: string(option.ACTION_TapXY), + Arguments: `{"x": 100, "y": 200}`, + }, + }, + { + Function: schema.FunctionCall{ + Name: "uixt__finished", + Arguments: `{"content": "All tasks completed"}`, + }, + }, + }, + Thought: "Complete all actions", + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := driver.isTaskFinished(tt.result) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestActionOptions_WithResetHistory(t *testing.T) { + // Test WithResetHistory option function + opts := option.NewActionOptions(option.WithResetHistory(true)) + assert.True(t, opts.ResetHistory) + + opts2 := option.NewActionOptions(option.WithResetHistory(false)) + assert.False(t, opts2.ResetHistory) + + // Test default value + opts3 := option.NewActionOptions() + assert.False(t, opts3.ResetHistory) // Default should be false +} + +func TestXTDriver_PlanNextAction_WithResetHistory(t *testing.T) { + // Create a minimal XTDriver for testing + driver := &XTDriver{} + + // Test with nil LLMService (should return error) + driver.LLMService = nil + + _, err := driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(true)) + assert.Error(t, err) + assert.Contains(t, err.Error(), "LLM service is not initialized") + + // Test that PlanNextAction accepts ResetHistory option + _, err = driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(false)) + assert.Error(t, err) // Should still error due to nil service + assert.Contains(t, err.Error(), "LLM service is not initialized") +} + +func TestStartToGoal_HistoryResetLogic(t *testing.T) { + // Test the logic for when history should be reset + tests := []struct { + name string + attempt int + expected bool + }{ + {"first attempt", 1, true}, + {"second attempt", 2, false}, + {"third attempt", 3, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Simulate the logic from StartToGoal + resetHistory := tt.attempt == 1 + assert.Equal(t, tt.expected, resetHistory) + + // Test that the option is correctly created + if resetHistory { + opts := option.NewActionOptions(option.WithResetHistory(true)) + assert.True(t, opts.ResetHistory) + } + }) + } +} + +func TestConversationHistory_Clear(t *testing.T) { + // Test Clear method - should clear everything including system message + history := ai.ConversationHistory{ + { + Role: schema.System, + Content: "System prompt with user instruction", + }, + { + Role: schema.User, + Content: "User message", + }, + { + Role: schema.Assistant, + Content: "Assistant response", + }, + } + + // Test clearing everything including system message + historyCopy := make(ai.ConversationHistory, len(history)) + copy(historyCopy, history) + historyCopy.Clear() + assert.Len(t, historyCopy, 0) + + // Test clearing empty history + emptyHistory := ai.ConversationHistory{} + emptyHistory.Clear() + assert.Len(t, emptyHistory, 0) +} + +func TestPlanningOptions_ResetHistory(t *testing.T) { + // Test that PlanningOptions includes ResetHistory field + opts := &ai.PlanningOptions{ + UserInstruction: "test instruction", + Message: &schema.Message{ + Role: schema.User, + Content: "test message", + }, + Size: types.Size{Width: 100, Height: 200}, + ResetHistory: true, + } + + assert.True(t, opts.ResetHistory) + assert.Equal(t, "test instruction", opts.UserInstruction) +} diff --git a/uixt/option/action.go b/uixt/option/action.go index fa87ad7d..65502028 100644 --- a/uixt/option/action.go +++ b/uixt/option/action.go @@ -53,6 +53,7 @@ const ( ACTION_SetIme ActionName = "set_ime" ACTION_GetSource ActionName = "get_source" ACTION_GetForegroundApp ActionName = "get_foreground_app" + ACTION_AppInfo ActionName = "app_info" // get app info action // UI handling ACTION_Home ActionName = "home" @@ -85,7 +86,6 @@ const ( ACTION_Upload ActionName = "upload" // upload action ACTION_PushMedia ActionName = "push_media" // push media action ACTION_CreateBrowser ActionName = "create_browser" // create browser action - ACTION_AppInfo ActionName = "app_info" // get app info action // device actions ACTION_ListAvailableDevices ActionName = "list_available_devices" @@ -183,10 +183,11 @@ type ActionOptions struct { Params []float64 `json:"params,omitempty" yaml:"params,omitempty" desc:"Generic parameter array"` // AI related - Prompt string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"` - Content string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"` - LLMService string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"` - CVService string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"` + Prompt string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"` + Content string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"` + LLMService string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"` + CVService string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"` + ResetHistory bool `json:"reset_history,omitempty" yaml:"reset_history,omitempty" desc:"Whether to reset conversation history before AI planning"` // Time related Seconds float64 `json:"seconds,omitempty" yaml:"seconds,omitempty" desc:"Sleep duration in seconds"` @@ -550,6 +551,12 @@ func WithAntiRisk(antiRisk bool) ActionOption { } } +func WithResetHistory(resetHistory bool) ActionOption { + return func(o *ActionOptions) { + o.ResetHistory = resetHistory + } +} + // HTTP API direct usage methods // ValidateForHTTPAPI validates the request for HTTP API usage