package ai import ( "context" "time" "github.com/cloudwego/eino-ext/components/model/openai" "github.com/cloudwego/eino/components/model" "github.com/cloudwego/eino/schema" "github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" "github.com/rs/zerolog/log" ) type IPlanner interface { Call(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) } // PlanningOptions represents the input options for planning type PlanningOptions struct { UserInstruction string `json:"user_instruction"` // append to system prompt Message *schema.Message `json:"message"` Size types.Size `json:"size"` } // PlanningResult represents the result of planning type PlanningResult struct { ToolCalls []schema.ToolCall `json:"tool_calls"` // TODO: merge to NextActions NextActions []ParsedAction `json:"actions"` ActionSummary string `json:"summary"` Error string `json:"error,omitempty"` } func NewPlanner(ctx context.Context, modelConfig *ModelConfig) (*Planner, error) { planner := &Planner{ modelConfig: modelConfig, parser: NewLLMContentParser(modelConfig.ModelType), } var err error planner.model, err = openai.NewChatModel(ctx, modelConfig.ChatModelConfig) if err != nil { return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error()) } return planner, nil } type Planner struct { modelConfig *ModelConfig model model.ToolCallingChatModel parser LLMContentParser history ConversationHistory tools []*schema.ToolInfo } func (p *Planner) SystemPrompt() string { return p.parser.SystemPrompt() } func (p *Planner) History() *ConversationHistory { return &p.history } func (p *Planner) RegisterTools(tools []*schema.ToolInfo) error { if p.modelConfig.ModelType == option.LLMServiceTypeUITARS { // tools have been registered in ui-tars system prompt return nil } // register tools for models with function calling toolCallingModel, err := p.model.WithTools(tools) if err != nil { return errors.Wrap(err, "failed to register tools") } p.tools = tools p.model = toolCallingModel return nil } // Call performs UI planning using Vision Language Model func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) { // validate input parameters if err := validatePlanningInput(opts); err != nil { return nil, errors.Wrap(err, "validate planning parameters failed") } // prepare prompt if len(p.history) == 0 && opts.UserInstruction != "" { // add system message p.history = ConversationHistory{ { Role: schema.System, Content: p.parser.SystemPrompt() + opts.UserInstruction, }, } } // append user image message p.history.Append(opts.Message) // call model service, generate response logRequest(p.history) startTime := time.Now() message, err := p.model.Generate(ctx, p.history) log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()). Str("model", string(p.modelConfig.ModelType)).Msg("call model service") if err != nil { return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } logResponse(message) // handle tool calls if len(message.ToolCalls) > 0 { // history will be appended with tool calls execution result result := &PlanningResult{ ToolCalls: message.ToolCalls, ActionSummary: message.Content, } return result, nil } // parse message content to actions (tool calls) result, err := p.parser.Parse(message.Content, opts.Size) if err != nil { result = &PlanningResult{ ActionSummary: message.Content, Error: err.Error(), } log.Debug().Str("reason", err.Error()).Msg("parse content to actions failed") // append assistant message p.history.Append(&schema.Message{ Role: schema.Assistant, Content: message.Content, }) } log.Info(). Interface("summary", result.ActionSummary). Interface("actions", result.NextActions). Msg("get VLM planning result") return result, nil } func validatePlanningInput(opts *PlanningOptions) error { if opts.UserInstruction == "" { return errors.Wrap(code.LLMPrepareRequestError, "user instruction is empty") } if opts.Message == nil || opts.Message.Role == "" { return errors.Wrap(code.LLMPrepareRequestError, "user message is empty") } if opts.Message.Role == schema.User { // check MultiContent if len(opts.Message.MultiContent) > 0 { for _, content := range opts.Message.MultiContent { if content.Type == schema.ChatMessagePartTypeImageURL && content.ImageURL == nil { return errors.Wrap(code.LLMPrepareRequestError, "invalid image data") } } } } return nil }