mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-06 20:32:44 +08:00
fix: add direction parameter support for scroll operations in UI-TARS parser
- Handle direction parameter in convertProcessedArgs for scroll actions - Ensure scroll operations map to swipe with both coordinates and direction - Add comprehensive test coverage for scroll action parsing - Fix issue where scroll direction was missing from tool call arguments
This commit is contained in:
@@ -127,6 +127,7 @@ var (
|
||||
LLMRequestServiceError = errors.New("request LLM service error") // 112
|
||||
LLMParsePlanningResponseError = errors.New("parse LLM planning response error") // 113
|
||||
LLMParseAssertionResponseError = errors.New("parse LLM assertion response error") // 114
|
||||
LLMParseQueryResponseError = errors.New("parse LLM query response error") // 115
|
||||
)
|
||||
|
||||
var errorsMap = map[error]int{
|
||||
@@ -217,6 +218,7 @@ var errorsMap = map[error]int{
|
||||
LLMRequestServiceError: 112,
|
||||
LLMParsePlanningResponseError: 113,
|
||||
LLMParseAssertionResponseError: 114,
|
||||
LLMParseQueryResponseError: 115,
|
||||
|
||||
// trackings related
|
||||
TrackingGetError: 90,
|
||||
|
||||
@@ -1 +1 @@
|
||||
v5.0.0-beta-2506101609
|
||||
v5.0.0-beta-2506101640
|
||||
|
||||
@@ -1276,3 +1276,83 @@ func TestNormalizeActionCoordinates_StringArray(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUITARSContentParser_ParseScrollAction(t *testing.T) {
|
||||
parser := &UITARSContentParser{
|
||||
modelType: option.DOUBAO_1_5_UI_TARS_250328,
|
||||
systemPrompt: doubao_1_5_ui_tars_planning_prompt,
|
||||
actionMapping: doubao_1_5_ui_tars_action_mapping,
|
||||
}
|
||||
|
||||
size := types.Size{Width: 1080, Height: 1920}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
expectedDirection string
|
||||
}{
|
||||
{
|
||||
name: "scroll left with bbox format",
|
||||
content: `Thought: 我需要向左滑动
|
||||
Action: scroll(direction='left', start_box='<bbox>850 500 850 500</bbox>')`,
|
||||
expectedDirection: "left",
|
||||
},
|
||||
{
|
||||
name: "scroll up with array format",
|
||||
content: `Thought: 我需要向上滑动
|
||||
Action: scroll(direction='up', start_box='[400, 600]')`,
|
||||
expectedDirection: "up",
|
||||
},
|
||||
{
|
||||
name: "scroll down with array format",
|
||||
content: `Thought: 我需要向下滑动
|
||||
Action: scroll(direction='down', start_box='[500, 800]')`,
|
||||
expectedDirection: "down",
|
||||
},
|
||||
{
|
||||
name: "real log example - scroll left",
|
||||
content: `Thought: 我仔细观察了当前的游戏局面,发现两个2分别位于右下角和右中位置。之前尝试了几次滑动都没有成功,现在我需要重新思考策略。既然向上滑动没有效果,那我决定换个方向,尝试向左滑动看看。这样应该能让这两个2相遇并合并,为后续的游戏进展打下基础。
|
||||
Action: scroll(direction='left', start_box='<bbox>850 500 850 500</bbox>')`,
|
||||
expectedDirection: "left",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := parser.Parse(tt.content, size)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.Len(t, result.ToolCalls, 1)
|
||||
|
||||
toolCall := result.ToolCalls[0]
|
||||
|
||||
// Verify tool call structure
|
||||
assert.Equal(t, "uixt__swipe", toolCall.Function.Name)
|
||||
assert.Equal(t, "function", toolCall.Type)
|
||||
assert.NotEmpty(t, toolCall.ID)
|
||||
|
||||
// Parse and verify arguments
|
||||
var args map[string]interface{}
|
||||
err = json.Unmarshal([]byte(toolCall.Function.Arguments), &args)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify direction parameter is present and correct
|
||||
assert.Contains(t, args, "direction")
|
||||
assert.Equal(t, tt.expectedDirection, args["direction"])
|
||||
|
||||
// Verify coordinates are present and reasonable
|
||||
assert.Contains(t, args, "x")
|
||||
assert.Contains(t, args, "y")
|
||||
assert.IsType(t, float64(0), args["x"])
|
||||
assert.IsType(t, float64(0), args["y"])
|
||||
|
||||
// Verify coordinates are within screen bounds
|
||||
x := args["x"].(float64)
|
||||
y := args["y"].(float64)
|
||||
assert.Greater(t, x, 0.0)
|
||||
assert.Less(t, x, float64(size.Width))
|
||||
assert.Greater(t, y, 0.0)
|
||||
assert.Less(t, y, float64(size.Height))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,6 +290,26 @@ func convertProcessedArgs(processedArgs map[string]interface{}, actionType strin
|
||||
return options.ToMap(), nil
|
||||
}
|
||||
|
||||
// For scroll operations, handle both coordinates and direction
|
||||
if actionType == "scroll" && hasStartBox {
|
||||
startCoords, ok := startBox.([]float64)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid coordinate format for scroll operation")
|
||||
}
|
||||
|
||||
options := option.ActionOptions{
|
||||
X: builtin.RoundToOneDecimal(startCoords[0]),
|
||||
Y: builtin.RoundToOneDecimal(startCoords[1]),
|
||||
}
|
||||
|
||||
// Add direction parameter if present
|
||||
if direction, hasDirection := processedArgs["direction"]; hasDirection {
|
||||
options.Direction = direction.(string)
|
||||
}
|
||||
|
||||
return options.ToMap(), nil
|
||||
}
|
||||
|
||||
// For single coordinate operations, return the coordinate array directly
|
||||
if hasStartBox {
|
||||
startCoords, ok := startBox.([]float64)
|
||||
|
||||
@@ -39,7 +39,7 @@ var doubao_1_5_ui_tars_action_mapping = map[string]option.ActionName{
|
||||
"drag": option.ACTION_Drag,
|
||||
"hotkey": option.ACTION_KeyCode,
|
||||
"type": option.ACTION_Input,
|
||||
"scroll": option.ACTION_Scroll,
|
||||
"scroll": option.ACTION_Swipe, // swipe up/down/left/right
|
||||
"wait": option.ACTION_Sleep,
|
||||
"finished": option.ACTION_Finished,
|
||||
}
|
||||
@@ -136,7 +136,7 @@ var doubao_1_5_thinking_vision_pro_action_mapping = map[string]option.ActionName
|
||||
"drag": option.ACTION_Drag,
|
||||
"hotkey": option.ACTION_KeyCode,
|
||||
"type": option.ACTION_Input,
|
||||
"scroll": option.ACTION_Scroll,
|
||||
"scroll": option.ACTION_Swipe, // swipe up/down/left/right
|
||||
"wait": option.ACTION_Sleep,
|
||||
"finished": option.ACTION_Finished,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user