feat: implement UI automation test history isolation

- Add ResetHistory option to PlanningOptions and ActionOptions - Implement task completion detection with isTaskFinished() method - Add executeActions() method to separate action execution logic - Modify ConversationHistory.Clear() to completely clear all messages including system message - Refactor StartToGoal() to automatically reset history on first attempt - Add WithResetHistory() option function for consistent API - Consolidate test files into driver_ext_ai_test.go with comprehensive test coverage
2026-06-03 06:49:38 +08:00 · 2025-06-06 15:26:25 +08:00
parent 6e1bd5bbe2
commit b642ea004e
6 changed files with 306 additions and 48 deletions
--- a/uixt/ai/planner.go
+++ b/uixt/ai/planner.go
@@ -23,6 +23,7 @@ type PlanningOptions struct {
 	UserInstruction string          `json:"user_instruction"` // append to system prompt
 	Message         *schema.Message `json:"message"`
 	Size            types.Size      `json:"size"`
+	ResetHistory    bool            `json:"reset_history"` // whether to reset conversation history before planning
 }

 // PlanningResult represents the result of planning
@@ -85,6 +86,12 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
 		return nil, errors.Wrap(err, "validate planning parameters failed")
 	}

+	// reset conversation history if requested
+	if opts.ResetHistory {
+		p.history.Clear() // Clear everything including system message for complete isolation
+		log.Info().Msg("conversation history reset for planner")
+	}
+
 	// prepare prompt
 	if len(p.history) == 0 && opts.UserInstruction != "" {
 		// add system message
--- a/uixt/ai/session.go
+++ b/uixt/ai/session.go
@@ -62,10 +62,15 @@ func (h *ConversationHistory) Append(msg *schema.Message) {
 }

 func (h *ConversationHistory) Clear() {
-	// Keep only the system message
-	systemMsg := (*h)[0]
-	*h = ConversationHistory{systemMsg}
-	log.Info().Msg("conversation history cleared")
+	// Check if history is empty
+	if len(*h) == 0 {
+		log.Info().Msg("conversation history is already empty")
+		return
+	}
+
+	// Clear everything including system message
+	*h = ConversationHistory{}
+	log.Info().Msg("conversation history cleared completely")
 }

 func logRequest(messages ConversationHistory) {
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -21,6 +21,7 @@ import (
 func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) error {
 	options := option.NewActionOptions(opts...)
 	log.Info().Int("max_retry_times", options.MaxRetryTimes).Msg("StartToGoal")
+
 	var attempt int
 	for {
 		attempt++
@@ -34,7 +35,14 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 		default:
 		}

-		if err := dExt.AIAction(ctx, prompt, opts...); err != nil {
+		// Plan next action with history reset on first attempt
+		planningOpts := opts
+		if attempt == 1 {
+			// Add ResetHistory option for the first attempt
+			planningOpts = append(planningOpts, option.WithResetHistory(true))
+		}
+		result, err := dExt.PlanNextAction(ctx, prompt, planningOpts...)
+		if err != nil {
 			// Check if this is a LLM service request error that should be retried
 			if errors.Is(err, code.LLMRequestServiceError) {
 				log.Warn().Err(err).Int("attempt", attempt).
@@ -44,6 +52,17 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
 			return err
 		}

+		// Check if task is finished BEFORE executing actions
+		if dExt.isTaskFinished(result) {
+			log.Info().Msg("task finished, stopping StartToGoal")
+			return nil
+		}
+
+		// Execute actions only if task is not finished
+		if err := dExt.executeActions(ctx, result.ToolCalls); err != nil {
+			return err
+		}
+
 		if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes {
 			return errors.New("reached max retry times")
 		}
@@ -59,42 +78,8 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
 		return err
 	}

-	// do actions
-	for _, action := range result.ToolCalls {
-		// Check for context cancellation before each action
-		select {
-		case <-ctx.Done():
-			log.Warn().Msg("interrupted in AIAction")
-			return errors.Wrap(code.InterruptError, "AIAction interrupted")
-		default:
-		}
-
-		// call eino tool
-		arguments := make(map[string]interface{})
-		err := json.Unmarshal([]byte(action.Function.Arguments), &arguments)
-		if err != nil {
-			return err
-		}
-		req := mcp.CallToolRequest{
-			Params: struct {
-				Name      string         `json:"name"`
-				Arguments map[string]any `json:"arguments,omitempty"`
-				Meta      *struct {
-					ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"`
-				} `json:"_meta,omitempty"`
-			}{
-				Name:      action.Function.Name,
-				Arguments: arguments,
-			},
-		}
-
-		_, err = dExt.client.CallTool(ctx, req)
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
+	// execute actions
+	return dExt.executeActions(ctx, result.ToolCalls)
 }

 func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*ai.PlanningResult, error) {
@@ -128,6 +113,10 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 		return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
 	}

+	// Parse action options to get ResetHistory setting
+	options := option.NewActionOptions(opts...)
+	resetHistory := options.ResetHistory
+
 	planningOpts := &ai.PlanningOptions{
 		UserInstruction: prompt,
 		Message: &schema.Message{
@@ -141,7 +130,8 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 				},
 			},
 		},
-		Size: size,
+		Size:         size,
+		ResetHistory: resetHistory,
 	}

 	result, err := dExt.LLMService.Call(ctx, planningOpts)
@@ -151,6 +141,64 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
 	return result, nil
 }

+// isTaskFinished checks if the task is completed based on the planning result
+func (dExt *XTDriver) isTaskFinished(result *ai.PlanningResult) bool {
+	// Check if there are no tool calls (no actions to execute)
+	if len(result.ToolCalls) == 0 {
+		log.Info().Msg("no tool calls returned, task may be finished")
+		return true
+	}
+
+	// Check if any tool call is a "finished" action
+	for _, toolCall := range result.ToolCalls {
+		if toolCall.Function.Name == "uixt__finished" {
+			log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected")
+			return true
+		}
+	}
+
+	return false
+}
+
+// executeActions executes the planned actions
+func (dExt *XTDriver) executeActions(ctx context.Context, toolCalls []schema.ToolCall) error {
+	for _, action := range toolCalls {
+		// Check for context cancellation before each action
+		select {
+		case <-ctx.Done():
+			log.Warn().Msg("interrupted in executeActions")
+			return errors.Wrap(code.InterruptError, "executeActions interrupted")
+		default:
+		}
+
+		// call eino tool
+		arguments := make(map[string]interface{})
+		err := json.Unmarshal([]byte(action.Function.Arguments), &arguments)
+		if err != nil {
+			return err
+		}
+		req := mcp.CallToolRequest{
+			Params: struct {
+				Name      string         `json:"name"`
+				Arguments map[string]any `json:"arguments,omitempty"`
+				Meta      *struct {
+					ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"`
+				} `json:"_meta,omitempty"`
+			}{
+				Name:      action.Function.Name,
+				Arguments: arguments,
+			},
+		}
+
+		_, err = dExt.client.CallTool(ctx, req)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (string, error) {
 	return "", nil
 }
--- a/uixt/driver_ext_ai_test.go
+++ b/uixt/driver_ext_ai_test.go
@@ -0,0 +1,191 @@
+package uixt
+
+import (
+	"context"
+	"testing"
+
+	"github.com/cloudwego/eino/schema"
+	"github.com/httprunner/httprunner/v5/uixt/ai"
+	"github.com/httprunner/httprunner/v5/uixt/option"
+	"github.com/httprunner/httprunner/v5/uixt/types"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestXTDriver_isTaskFinished(t *testing.T) {
+	driver := &XTDriver{}
+
+	tests := []struct {
+		name     string
+		result   *ai.PlanningResult
+		expected bool
+	}{
+		{
+			name: "no tool calls - task finished",
+			result: &ai.PlanningResult{
+				ToolCalls: []schema.ToolCall{},
+				Thought:   "No actions needed",
+			},
+			expected: true,
+		},
+		{
+			name: "finished action - task finished",
+			result: &ai.PlanningResult{
+				ToolCalls: []schema.ToolCall{
+					{
+						Function: schema.FunctionCall{
+							Name:      "uixt__finished",
+							Arguments: `{"content": "Task completed successfully"}`,
+						},
+					},
+				},
+				Thought: "Task completed",
+			},
+			expected: true,
+		},
+		{
+			name: "regular action - task not finished",
+			result: &ai.PlanningResult{
+				ToolCalls: []schema.ToolCall{
+					{
+						Function: schema.FunctionCall{
+							Name:      string(option.ACTION_TapXY),
+							Arguments: `{"x": 100, "y": 200}`,
+						},
+					},
+				},
+				Thought: "Click on button",
+			},
+			expected: false,
+		},
+		{
+			name: "multiple actions with finished - task finished",
+			result: &ai.PlanningResult{
+				ToolCalls: []schema.ToolCall{
+					{
+						Function: schema.FunctionCall{
+							Name:      string(option.ACTION_TapXY),
+							Arguments: `{"x": 100, "y": 200}`,
+						},
+					},
+					{
+						Function: schema.FunctionCall{
+							Name:      "uixt__finished",
+							Arguments: `{"content": "All tasks completed"}`,
+						},
+					},
+				},
+				Thought: "Complete all actions",
+			},
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := driver.isTaskFinished(tt.result)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestActionOptions_WithResetHistory(t *testing.T) {
+	// Test WithResetHistory option function
+	opts := option.NewActionOptions(option.WithResetHistory(true))
+	assert.True(t, opts.ResetHistory)
+
+	opts2 := option.NewActionOptions(option.WithResetHistory(false))
+	assert.False(t, opts2.ResetHistory)
+
+	// Test default value
+	opts3 := option.NewActionOptions()
+	assert.False(t, opts3.ResetHistory) // Default should be false
+}
+
+func TestXTDriver_PlanNextAction_WithResetHistory(t *testing.T) {
+	// Create a minimal XTDriver for testing
+	driver := &XTDriver{}
+
+	// Test with nil LLMService (should return error)
+	driver.LLMService = nil
+
+	_, err := driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(true))
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "LLM service is not initialized")
+
+	// Test that PlanNextAction accepts ResetHistory option
+	_, err = driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(false))
+	assert.Error(t, err) // Should still error due to nil service
+	assert.Contains(t, err.Error(), "LLM service is not initialized")
+}
+
+func TestStartToGoal_HistoryResetLogic(t *testing.T) {
+	// Test the logic for when history should be reset
+	tests := []struct {
+		name     string
+		attempt  int
+		expected bool
+	}{
+		{"first attempt", 1, true},
+		{"second attempt", 2, false},
+		{"third attempt", 3, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Simulate the logic from StartToGoal
+			resetHistory := tt.attempt == 1
+			assert.Equal(t, tt.expected, resetHistory)
+
+			// Test that the option is correctly created
+			if resetHistory {
+				opts := option.NewActionOptions(option.WithResetHistory(true))
+				assert.True(t, opts.ResetHistory)
+			}
+		})
+	}
+}
+
+func TestConversationHistory_Clear(t *testing.T) {
+	// Test Clear method - should clear everything including system message
+	history := ai.ConversationHistory{
+		{
+			Role:    schema.System,
+			Content: "System prompt with user instruction",
+		},
+		{
+			Role:    schema.User,
+			Content: "User message",
+		},
+		{
+			Role:    schema.Assistant,
+			Content: "Assistant response",
+		},
+	}
+
+	// Test clearing everything including system message
+	historyCopy := make(ai.ConversationHistory, len(history))
+	copy(historyCopy, history)
+	historyCopy.Clear()
+	assert.Len(t, historyCopy, 0)
+
+	// Test clearing empty history
+	emptyHistory := ai.ConversationHistory{}
+	emptyHistory.Clear()
+	assert.Len(t, emptyHistory, 0)
+}
+
+func TestPlanningOptions_ResetHistory(t *testing.T) {
+	// Test that PlanningOptions includes ResetHistory field
+	opts := &ai.PlanningOptions{
+		UserInstruction: "test instruction",
+		Message: &schema.Message{
+			Role:    schema.User,
+			Content: "test message",
+		},
+		Size:         types.Size{Width: 100, Height: 200},
+		ResetHistory: true,
+	}
+
+	assert.True(t, opts.ResetHistory)
+	assert.Equal(t, "test instruction", opts.UserInstruction)
+}
--- a/uixt/option/action.go
+++ b/uixt/option/action.go
@@ -53,6 +53,7 @@ const (
 	ACTION_SetIme           ActionName = "set_ime"
 	ACTION_GetSource        ActionName = "get_source"
 	ACTION_GetForegroundApp ActionName = "get_foreground_app"
+	ACTION_AppInfo          ActionName = "app_info" // get app info action

 	// UI handling
 	ACTION_Home                     ActionName = "home"
@@ -85,7 +86,6 @@ const (
 	ACTION_Upload                   ActionName = "upload"         // upload action
 	ACTION_PushMedia                ActionName = "push_media"     // push media action
 	ACTION_CreateBrowser            ActionName = "create_browser" // create browser action
-	ACTION_AppInfo                  ActionName = "app_info"       // get app info action

 	// device actions
 	ACTION_ListAvailableDevices ActionName = "list_available_devices"
@@ -183,10 +183,11 @@ type ActionOptions struct {
 	Params []float64 `json:"params,omitempty" yaml:"params,omitempty" desc:"Generic parameter array"`

 	// AI related
-	Prompt     string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"`
-	Content    string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"`
-	LLMService string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"`
-	CVService  string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"`
+	Prompt       string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"`
+	Content      string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"`
+	LLMService   string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"`
+	CVService    string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"`
+	ResetHistory bool   `json:"reset_history,omitempty" yaml:"reset_history,omitempty" desc:"Whether to reset conversation history before AI planning"`

 	// Time related
 	Seconds      float64 `json:"seconds,omitempty" yaml:"seconds,omitempty" desc:"Sleep duration in seconds"`
@@ -550,6 +551,12 @@ func WithAntiRisk(antiRisk bool) ActionOption {
 	}
 }

+func WithResetHistory(resetHistory bool) ActionOption {
+	return func(o *ActionOptions) {
+		o.ResetHistory = resetHistory
+	}
+}
+
 // HTTP API direct usage methods

 // ValidateForHTTPAPI validates the request for HTTP API usage