feat: implement UI automation test history isolation

- Add ResetHistory option to PlanningOptions and ActionOptions
- Implement task completion detection with isTaskFinished() method
- Add executeActions() method to separate action execution logic
- Modify ConversationHistory.Clear() to completely clear all messages including system message
- Refactor StartToGoal() to automatically reset history on first attempt
- Add WithResetHistory() option function for consistent API
- Consolidate test files into driver_ext_ai_test.go with comprehensive test coverage
This commit is contained in:
lilong.129
2025-06-06 15:26:25 +08:00
parent 6e1bd5bbe2
commit b642ea004e
6 changed files with 306 additions and 48 deletions

View File

@@ -23,6 +23,7 @@ type PlanningOptions struct {
UserInstruction string `json:"user_instruction"` // append to system prompt
Message *schema.Message `json:"message"`
Size types.Size `json:"size"`
ResetHistory bool `json:"reset_history"` // whether to reset conversation history before planning
}
// PlanningResult represents the result of planning
@@ -85,6 +86,12 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
return nil, errors.Wrap(err, "validate planning parameters failed")
}
// reset conversation history if requested
if opts.ResetHistory {
p.history.Clear() // Clear everything including system message for complete isolation
log.Info().Msg("conversation history reset for planner")
}
// prepare prompt
if len(p.history) == 0 && opts.UserInstruction != "" {
// add system message

View File

@@ -62,10 +62,15 @@ func (h *ConversationHistory) Append(msg *schema.Message) {
}
func (h *ConversationHistory) Clear() {
// Keep only the system message
systemMsg := (*h)[0]
*h = ConversationHistory{systemMsg}
log.Info().Msg("conversation history cleared")
// Check if history is empty
if len(*h) == 0 {
log.Info().Msg("conversation history is already empty")
return
}
// Clear everything including system message
*h = ConversationHistory{}
log.Info().Msg("conversation history cleared completely")
}
func logRequest(messages ConversationHistory) {

View File

@@ -21,6 +21,7 @@ import (
func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...option.ActionOption) error {
options := option.NewActionOptions(opts...)
log.Info().Int("max_retry_times", options.MaxRetryTimes).Msg("StartToGoal")
var attempt int
for {
attempt++
@@ -34,7 +35,14 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
default:
}
if err := dExt.AIAction(ctx, prompt, opts...); err != nil {
// Plan next action with history reset on first attempt
planningOpts := opts
if attempt == 1 {
// Add ResetHistory option for the first attempt
planningOpts = append(planningOpts, option.WithResetHistory(true))
}
result, err := dExt.PlanNextAction(ctx, prompt, planningOpts...)
if err != nil {
// Check if this is a LLM service request error that should be retried
if errors.Is(err, code.LLMRequestServiceError) {
log.Warn().Err(err).Int("attempt", attempt).
@@ -44,6 +52,17 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
return err
}
// Check if task is finished BEFORE executing actions
if dExt.isTaskFinished(result) {
log.Info().Msg("task finished, stopping StartToGoal")
return nil
}
// Execute actions only if task is not finished
if err := dExt.executeActions(ctx, result.ToolCalls); err != nil {
return err
}
if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes {
return errors.New("reached max retry times")
}
@@ -59,42 +78,8 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
return err
}
// do actions
for _, action := range result.ToolCalls {
// Check for context cancellation before each action
select {
case <-ctx.Done():
log.Warn().Msg("interrupted in AIAction")
return errors.Wrap(code.InterruptError, "AIAction interrupted")
default:
}
// call eino tool
arguments := make(map[string]interface{})
err := json.Unmarshal([]byte(action.Function.Arguments), &arguments)
if err != nil {
return err
}
req := mcp.CallToolRequest{
Params: struct {
Name string `json:"name"`
Arguments map[string]any `json:"arguments,omitempty"`
Meta *struct {
ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"`
} `json:"_meta,omitempty"`
}{
Name: action.Function.Name,
Arguments: arguments,
},
}
_, err = dExt.client.CallTool(ctx, req)
if err != nil {
return err
}
}
return nil
// execute actions
return dExt.executeActions(ctx, result.ToolCalls)
}
func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*ai.PlanningResult, error) {
@@ -128,6 +113,10 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
}
// Parse action options to get ResetHistory setting
options := option.NewActionOptions(opts...)
resetHistory := options.ResetHistory
planningOpts := &ai.PlanningOptions{
UserInstruction: prompt,
Message: &schema.Message{
@@ -141,7 +130,8 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
},
},
},
Size: size,
Size: size,
ResetHistory: resetHistory,
}
result, err := dExt.LLMService.Call(ctx, planningOpts)
@@ -151,6 +141,64 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
return result, nil
}
// isTaskFinished checks if the task is completed based on the planning result
func (dExt *XTDriver) isTaskFinished(result *ai.PlanningResult) bool {
// Check if there are no tool calls (no actions to execute)
if len(result.ToolCalls) == 0 {
log.Info().Msg("no tool calls returned, task may be finished")
return true
}
// Check if any tool call is a "finished" action
for _, toolCall := range result.ToolCalls {
if toolCall.Function.Name == "uixt__finished" {
log.Info().Str("reason", toolCall.Function.Arguments).Msg("finished action detected")
return true
}
}
return false
}
// executeActions executes the planned actions
func (dExt *XTDriver) executeActions(ctx context.Context, toolCalls []schema.ToolCall) error {
for _, action := range toolCalls {
// Check for context cancellation before each action
select {
case <-ctx.Done():
log.Warn().Msg("interrupted in executeActions")
return errors.Wrap(code.InterruptError, "executeActions interrupted")
default:
}
// call eino tool
arguments := make(map[string]interface{})
err := json.Unmarshal([]byte(action.Function.Arguments), &arguments)
if err != nil {
return err
}
req := mcp.CallToolRequest{
Params: struct {
Name string `json:"name"`
Arguments map[string]any `json:"arguments,omitempty"`
Meta *struct {
ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"`
} `json:"_meta,omitempty"`
}{
Name: action.Function.Name,
Arguments: arguments,
},
}
_, err = dExt.client.CallTool(ctx, req)
if err != nil {
return err
}
}
return nil
}
func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (string, error) {
return "", nil
}

191
uixt/driver_ext_ai_test.go Normal file
View File

@@ -0,0 +1,191 @@
package uixt
import (
"context"
"testing"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/stretchr/testify/assert"
)
func TestXTDriver_isTaskFinished(t *testing.T) {
driver := &XTDriver{}
tests := []struct {
name string
result *ai.PlanningResult
expected bool
}{
{
name: "no tool calls - task finished",
result: &ai.PlanningResult{
ToolCalls: []schema.ToolCall{},
Thought: "No actions needed",
},
expected: true,
},
{
name: "finished action - task finished",
result: &ai.PlanningResult{
ToolCalls: []schema.ToolCall{
{
Function: schema.FunctionCall{
Name: "uixt__finished",
Arguments: `{"content": "Task completed successfully"}`,
},
},
},
Thought: "Task completed",
},
expected: true,
},
{
name: "regular action - task not finished",
result: &ai.PlanningResult{
ToolCalls: []schema.ToolCall{
{
Function: schema.FunctionCall{
Name: string(option.ACTION_TapXY),
Arguments: `{"x": 100, "y": 200}`,
},
},
},
Thought: "Click on button",
},
expected: false,
},
{
name: "multiple actions with finished - task finished",
result: &ai.PlanningResult{
ToolCalls: []schema.ToolCall{
{
Function: schema.FunctionCall{
Name: string(option.ACTION_TapXY),
Arguments: `{"x": 100, "y": 200}`,
},
},
{
Function: schema.FunctionCall{
Name: "uixt__finished",
Arguments: `{"content": "All tasks completed"}`,
},
},
},
Thought: "Complete all actions",
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := driver.isTaskFinished(tt.result)
assert.Equal(t, tt.expected, result)
})
}
}
func TestActionOptions_WithResetHistory(t *testing.T) {
// Test WithResetHistory option function
opts := option.NewActionOptions(option.WithResetHistory(true))
assert.True(t, opts.ResetHistory)
opts2 := option.NewActionOptions(option.WithResetHistory(false))
assert.False(t, opts2.ResetHistory)
// Test default value
opts3 := option.NewActionOptions()
assert.False(t, opts3.ResetHistory) // Default should be false
}
func TestXTDriver_PlanNextAction_WithResetHistory(t *testing.T) {
// Create a minimal XTDriver for testing
driver := &XTDriver{}
// Test with nil LLMService (should return error)
driver.LLMService = nil
_, err := driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(true))
assert.Error(t, err)
assert.Contains(t, err.Error(), "LLM service is not initialized")
// Test that PlanNextAction accepts ResetHistory option
_, err = driver.PlanNextAction(context.Background(), "test prompt", option.WithResetHistory(false))
assert.Error(t, err) // Should still error due to nil service
assert.Contains(t, err.Error(), "LLM service is not initialized")
}
func TestStartToGoal_HistoryResetLogic(t *testing.T) {
// Test the logic for when history should be reset
tests := []struct {
name string
attempt int
expected bool
}{
{"first attempt", 1, true},
{"second attempt", 2, false},
{"third attempt", 3, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Simulate the logic from StartToGoal
resetHistory := tt.attempt == 1
assert.Equal(t, tt.expected, resetHistory)
// Test that the option is correctly created
if resetHistory {
opts := option.NewActionOptions(option.WithResetHistory(true))
assert.True(t, opts.ResetHistory)
}
})
}
}
func TestConversationHistory_Clear(t *testing.T) {
// Test Clear method - should clear everything including system message
history := ai.ConversationHistory{
{
Role: schema.System,
Content: "System prompt with user instruction",
},
{
Role: schema.User,
Content: "User message",
},
{
Role: schema.Assistant,
Content: "Assistant response",
},
}
// Test clearing everything including system message
historyCopy := make(ai.ConversationHistory, len(history))
copy(historyCopy, history)
historyCopy.Clear()
assert.Len(t, historyCopy, 0)
// Test clearing empty history
emptyHistory := ai.ConversationHistory{}
emptyHistory.Clear()
assert.Len(t, emptyHistory, 0)
}
func TestPlanningOptions_ResetHistory(t *testing.T) {
// Test that PlanningOptions includes ResetHistory field
opts := &ai.PlanningOptions{
UserInstruction: "test instruction",
Message: &schema.Message{
Role: schema.User,
Content: "test message",
},
Size: types.Size{Width: 100, Height: 200},
ResetHistory: true,
}
assert.True(t, opts.ResetHistory)
assert.Equal(t, "test instruction", opts.UserInstruction)
}

View File

@@ -53,6 +53,7 @@ const (
ACTION_SetIme ActionName = "set_ime"
ACTION_GetSource ActionName = "get_source"
ACTION_GetForegroundApp ActionName = "get_foreground_app"
ACTION_AppInfo ActionName = "app_info" // get app info action
// UI handling
ACTION_Home ActionName = "home"
@@ -85,7 +86,6 @@ const (
ACTION_Upload ActionName = "upload" // upload action
ACTION_PushMedia ActionName = "push_media" // push media action
ACTION_CreateBrowser ActionName = "create_browser" // create browser action
ACTION_AppInfo ActionName = "app_info" // get app info action
// device actions
ACTION_ListAvailableDevices ActionName = "list_available_devices"
@@ -183,10 +183,11 @@ type ActionOptions struct {
Params []float64 `json:"params,omitempty" yaml:"params,omitempty" desc:"Generic parameter array"`
// AI related
Prompt string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"`
Content string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"`
LLMService string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"`
CVService string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"`
Prompt string `json:"prompt,omitempty" yaml:"prompt,omitempty" desc:"AI action prompt"`
Content string `json:"content,omitempty" yaml:"content,omitempty" desc:"Content for finished action"`
LLMService string `json:"llm_service,omitempty" yaml:"llm_service,omitempty" desc:"LLM service type for AI actions"`
CVService string `json:"cv_service,omitempty" yaml:"cv_service,omitempty" desc:"Computer vision service type for AI actions"`
ResetHistory bool `json:"reset_history,omitempty" yaml:"reset_history,omitempty" desc:"Whether to reset conversation history before AI planning"`
// Time related
Seconds float64 `json:"seconds,omitempty" yaml:"seconds,omitempty" desc:"Sleep duration in seconds"`
@@ -550,6 +551,12 @@ func WithAntiRisk(antiRisk bool) ActionOption {
}
}
func WithResetHistory(resetHistory bool) ActionOption {
return func(o *ActionOptions) {
o.ResetHistory = resetHistory
}
}
// HTTP API direct usage methods
// ValidateForHTTPAPI validates the request for HTTP API usage