package uixt import ( "context" "encoding/base64" "fmt" "path/filepath" "github.com/cloudwego/eino/schema" "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/config" "github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/mark3labs/mcp-go/mcp" "github.com/pkg/errors" "github.com/rs/zerolog/log" ) func (dExt *XTDriver) StartToGoal(text string, opts ...option.ActionOption) error { options := option.NewActionOptions(opts...) var attempt int for { attempt++ log.Info().Int("attempt", attempt).Msg("planning attempt") if err := dExt.AIAction(text, opts...); err != nil { return err } if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes { return errors.New("reached max retry times") } } } func (dExt *XTDriver) AIAction(text string, opts ...option.ActionOption) error { // plan next action result, err := dExt.PlanNextAction(text, opts...) if err != nil { return err } // do actions for _, action := range result.ToolCalls { // call eino tool arguments := make(map[string]interface{}) err := json.Unmarshal([]byte(action.Function.Arguments), &arguments) if err != nil { return err } req := mcp.CallToolRequest{ Params: struct { Name string `json:"name"` Arguments map[string]any `json:"arguments,omitempty"` Meta *struct { ProgressToken mcp.ProgressToken `json:"progressToken,omitempty"` } `json:"_meta,omitempty"` }{ Name: action.Function.Name, Arguments: arguments, }, } _, err = dExt.Client.CallTool(context.Background(), req) if err != nil { return err } } return nil } func (dExt *XTDriver) PlanNextAction(text string, opts ...option.ActionOption) (*ai.PlanningResult, error) { if dExt.LLMService == nil { return nil, errors.New("LLM service is not initialized") } compressedBufSource, err := getScreenShotBuffer(dExt.IDriver) if err != nil { return nil, err } // convert buffer to base64 string screenShotBase64 := "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(compressedBufSource.Bytes()) // save screenshot to file imagePath := filepath.Join( config.GetConfig().ScreenShotsPath, fmt.Sprintf("%s.jpeg", builtin.GenNameWithTimestamp("%d_screenshot")), ) go func() { err := saveScreenShot(compressedBufSource, imagePath) if err != nil { log.Error().Err(err).Msg("save screenshot file failed") } }() size, err := dExt.IDriver.WindowSize() if err != nil { return nil, errors.Wrap(code.DeviceGetInfoError, err.Error()) } planningOpts := &ai.PlanningOptions{ UserInstruction: text, Message: &schema.Message{ Role: schema.User, MultiContent: []schema.ChatMessagePart{ { Type: schema.ChatMessagePartTypeImageURL, ImageURL: &schema.ChatMessageImageURL{ URL: screenShotBase64, }, }, }, }, Size: size, } result, err := dExt.LLMService.Call(context.Background(), planningOpts) if err != nil { return nil, errors.Wrap(err, "failed to get next action from planner") } return result, nil } func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (string, error) { return "", nil } func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) error { if dExt.LLMService == nil { return errors.New("LLM service is not initialized") } screenShotBase64, err := GetScreenShotBufferBase64(dExt.IDriver) if err != nil { return err } // get window size size, err := dExt.IDriver.WindowSize() if err != nil { return errors.Wrap(err, "get window size for AI assertion failed") } // execute assertion assertOpts := &ai.AssertOptions{ Assertion: assertion, Screenshot: screenShotBase64, Size: size, } result, err := dExt.LLMService.Assert(context.Background(), assertOpts) if err != nil { return errors.Wrap(err, "AI assertion failed") } if !result.Pass { return errors.New(result.Thought) } return nil }