fix: update invokeToolCall to accept options and refactor action type handling in MarkUIOperation

This commit is contained in:
lilong.129
2025-06-29 15:59:48 +08:00
parent 9794852c1a
commit 5baabee89c
3 changed files with 11 additions and 6 deletions

View File

@@ -1 +1 @@
v5.0.0-250628
v5.0.0-250629

View File

@@ -115,7 +115,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}()
// Execute the tool call
if err := dExt.invokeToolCall(ctx, toolCall); err != nil {
if err := dExt.invokeToolCall(ctx, toolCall, opts...); err != nil {
subActionResult.Error = err
return err
}
@@ -173,7 +173,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
// Step 3: Execute tool calls
for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall)
err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil {
aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "invoke tool call failed")
@@ -286,7 +286,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
}
// invokeToolCall invokes the tool call
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall) error {
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments
arguments := make(map[string]interface{})
err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments)
@@ -294,6 +294,10 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return err
}
// Merge StartToGoal options into tool call arguments
// This ensures options like PreMarkOperation are passed to specific tool implementations
extractActionOptionsToArguments(opts, arguments)
// Execute the action
req := mcp.CallToolRequest{
Params: struct {

View File

@@ -470,14 +470,15 @@ func MarkUIOperation(driver IDriver, actionType option.ActionName, actionCoordin
fmt.Sprintf("action_%s_pre_%s.png", timestamp, actionType),
)
if actionType == option.ACTION_TapAbsXY || actionType == option.ACTION_DoubleTapXY {
switch actionType {
case option.ACTION_TapAbsXY, option.ACTION_DoubleTapXY:
if len(actionCoordinates) != 2 {
return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates)
}
x, y := actionCoordinates[0], actionCoordinates[1]
point := image.Point{X: int(x), Y: int(y)}
err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath)
} else if actionType == option.ACTION_SwipeDirection || actionType == option.ACTION_SwipeCoordinate || actionType == option.ACTION_Drag {
case option.ACTION_SwipeDirection, option.ACTION_SwipeCoordinate, option.ACTION_Drag:
if len(actionCoordinates) != 4 {
return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates)
}