Files
httprunner/uixt/ai/planner.go
lilong.129 81a92ae155 docs: update AI module README with latest features
- Add comprehensive documentation for the new Query functionality
- Update interface method names from Call to Plan for consistency
- Add OpenAI GPT-4O model support documentation
- Include detailed usage examples for basic and custom schema queries
- Add configuration examples for multiple model services
- Document new features like ResetHistory, Usage statistics, and automatic type conversion
- Expand advanced features section with custom output format examples
- Update all code examples to reflect the latest API changes

The documentation now reflects the current state of the AI module with all three core capabilities:
- Planning (renamed from Call)
- Assertion
- Query (new feature)

All examples and configurations are updated to match the latest implementation.
2025-06-10 20:52:44 +08:00

198 lines
5.7 KiB
Go

package ai
import (
"context"
"github.com/cloudwego/eino-ext/components/model/openai"
"github.com/cloudwego/eino/components/model"
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/httprunner/httprunner/v5/uixt/types"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
)
type IPlanner interface {
Plan(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error)
}
// PlanningOptions represents the input options for planning
type PlanningOptions struct {
UserInstruction string `json:"user_instruction"` // append to system prompt
Message *schema.Message `json:"message"`
Size types.Size `json:"size"`
ResetHistory bool `json:"reset_history"` // whether to reset conversation history before planning
}
// PlanningResult represents the result of planning
type PlanningResult struct {
ToolCalls []schema.ToolCall `json:"tool_calls"`
Thought string `json:"thought"`
Content string `json:"content"` // original content from model
Error string `json:"error,omitempty"`
ModelName string `json:"model_name"` // model name used for planning
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
func NewPlanner(ctx context.Context, modelConfig *ModelConfig) (*Planner, error) {
planner := &Planner{
modelConfig: modelConfig,
parser: NewLLMContentParser(modelConfig.ModelType),
}
var err error
planner.model, err = openai.NewChatModel(ctx, modelConfig.ChatModelConfig)
if err != nil {
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
}
return planner, nil
}
type Planner struct {
modelConfig *ModelConfig
model model.ToolCallingChatModel
parser LLMContentParser
history ConversationHistory
}
func (p *Planner) SystemPrompt() string {
return p.parser.SystemPrompt()
}
func (p *Planner) History() *ConversationHistory {
return &p.history
}
func (p *Planner) RegisterTools(tools []*schema.ToolInfo) error {
if option.IS_UI_TARS(p.modelConfig.ModelType) {
// tools have been registered in ui-tars system prompt
return nil
}
// register tools for models with function calling
toolCallingModel, err := p.model.WithTools(tools)
if err != nil {
return errors.Wrap(err, "failed to register tools")
}
var toolNames []string
for _, tool := range tools {
toolNames = append(toolNames, tool.Name)
}
log.Debug().Strs("tools", toolNames).
Str("model", string(p.modelConfig.ModelType)).
Msg("registered tools to model")
p.model = toolCallingModel
return nil
}
// Plan performs UI planning using Vision Language Model
func (p *Planner) Plan(ctx context.Context, opts *PlanningOptions) (result *PlanningResult, err error) {
// validate input parameters
if err := validatePlanningInput(opts); err != nil {
return nil, errors.Wrap(err, "validate planning parameters failed")
}
// reset conversation history if requested
if opts.ResetHistory {
p.history.Clear() // Clear everything including system message for complete isolation
}
// prepare prompt
if len(p.history) == 0 && opts.UserInstruction != "" {
// add system message
p.history = ConversationHistory{
{
Role: schema.System,
Content: p.parser.SystemPrompt() + opts.UserInstruction,
},
}
}
// append user image message
p.history.Append(opts.Message)
// call model service, generate response
message, err := callModelWithLogging(ctx, p.model, p.history,
p.modelConfig.ModelType, "planning")
if err != nil {
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
defer func() {
// Extract usage information if available
if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
result.Usage = message.ResponseMeta.Usage
}
}()
// handle tool calls
if len(message.ToolCalls) > 0 {
// append tool call message
toolCallID := ""
for _, toolCall := range message.ToolCalls {
toolCallID += toolCall.ID
}
p.history.Append(&schema.Message{
Role: schema.Tool,
Content: message.Content,
ToolCalls: message.ToolCalls,
ToolCallID: toolCallID,
})
// history will be appended with tool calls execution result
result = &PlanningResult{
ToolCalls: message.ToolCalls,
Thought: message.Content,
ModelName: string(p.modelConfig.ModelType),
}
return result, nil
}
// parse message content to actions (tool calls)
result, err = p.parser.Parse(message.Content, opts.Size)
if err != nil {
result = &PlanningResult{
Thought: message.Content,
Error: err.Error(),
ModelName: string(p.modelConfig.ModelType),
}
log.Debug().Str("reason", err.Error()).Msg("parse content to actions failed")
}
// append assistant message (since we're parsing content, not using native function calling)
p.history.Append(&schema.Message{
Role: schema.Assistant,
Content: message.Content,
})
log.Info().
Interface("thought", result.Thought).
Interface("tool_calls", result.ToolCalls).
Msg("get VLM planning result")
return result, nil
}
func validatePlanningInput(opts *PlanningOptions) error {
if opts.UserInstruction == "" {
return errors.Wrap(code.LLMPrepareRequestError, "user instruction is empty")
}
if opts.Message == nil || opts.Message.Role == "" {
return errors.Wrap(code.LLMPrepareRequestError, "user message is empty")
}
if opts.Message.Role == schema.User {
// check MultiContent
if len(opts.Message.MultiContent) > 0 {
for _, content := range opts.Message.MultiContent {
if content.Type == schema.ChatMessagePartTypeImageURL && content.ImageURL == nil {
return errors.Wrap(code.LLMPrepareRequestError, "invalid image data")
}
}
}
}
return nil
}