From 7c45acd0615ace7fe89ad7abc18aed8876a955bd Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Tue, 10 Jun 2025 20:41:35 +0800 Subject: [PATCH] feat: add AI Querier module with custom output schema support and refactor common model calling logic - Add new AI Querier module for structured information extraction from screenshots - Support custom output schema for structured data response - Implement automatic type conversion and data validation - Add comprehensive test suite with various data structure examples - Refactor callModelWithLogging to utils.go as shared function for planner, asserter, and querier - Eliminate code duplication across AI modules (30+ lines of repeated code) - Improve maintainability with unified logging and timing logic - Add environment variable checks in test setup to handle missing API keys gracefully Key features: - Custom output schema support with JSON Schema generation - Automatic data type conversion with reflection - Fallback mechanisms for robust parsing - Comprehensive documentation and usage examples - Backward compatibility with existing functionality --- internal/version/VERSION | 2 +- uixt/ai/asserter.go | 10 +- uixt/ai/asserter_test.go | 2 +- uixt/ai/planner.go | 10 +- uixt/ai/planner_test.go | 8 +- uixt/ai/querier.go | 515 +++++++++++++++++++++++++++++++ uixt/ai/querier.md | 299 ++++++++++++++++++ uixt/ai/querier_prompts.go | 20 ++ uixt/ai/querier_test.go | 617 +++++++++++++++++++++++++++++++++++++ uixt/ai/utils.go | 34 ++ 10 files changed, 1495 insertions(+), 22 deletions(-) create mode 100644 uixt/ai/querier.go create mode 100644 uixt/ai/querier.md create mode 100644 uixt/ai/querier_prompts.go create mode 100644 uixt/ai/querier_test.go diff --git a/internal/version/VERSION b/internal/version/VERSION index c1e6a3c7..cf1fe76d 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506101816 +v5.0.0-beta-2506102041 diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go index 66031909..8fd0ddf0 100644 --- a/uixt/ai/asserter.go +++ b/uixt/ai/asserter.go @@ -3,7 +3,6 @@ package ai import ( "context" "fmt" - "time" "github.com/cloudwego/eino-ext/components/model/openai" openai2 "github.com/cloudwego/eino-ext/libs/acl/openai" @@ -15,7 +14,6 @@ import ( "github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/types" "github.com/pkg/errors" - "github.com/rs/zerolog/log" ) // IAsserter interface defines the contract for assertion operations @@ -128,15 +126,11 @@ Here is the assertion. Please tell whether it is truthy according to the screens a.history.Append(userMsg) // Call model service, generate response - logRequest(a.history) - startTime := time.Now() - message, err := a.model.Generate(ctx, a.history) - log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()). - Str("model", string(a.modelConfig.ModelType)).Msg("call model service for assertion") + message, err := callModelWithLogging(ctx, a.model, a.history, + a.modelConfig.ModelType, "assertion") if err != nil { return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } - logResponse(message) // Parse result result, err := parseAssertionResult(message.Content) diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go index 18824ded..9012260a 100644 --- a/uixt/ai/asserter_test.go +++ b/uixt/ai/asserter_test.go @@ -12,7 +12,7 @@ import ( ) func createAsserter(t *testing.T) *Asserter { - modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428) + modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328) require.NoError(t, err) asserter, err := NewAsserter(context.Background(), modelConfig) require.NoError(t, err) diff --git a/uixt/ai/planner.go b/uixt/ai/planner.go index 6db959ca..0557de56 100644 --- a/uixt/ai/planner.go +++ b/uixt/ai/planner.go @@ -2,7 +2,6 @@ package ai import ( "context" - "time" "github.com/cloudwego/eino-ext/components/model/openai" "github.com/cloudwego/eino/components/model" @@ -116,15 +115,11 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *Plan p.history.Append(opts.Message) // call model service, generate response - logRequest(p.history) - startTime := time.Now() - message, err := p.model.Generate(ctx, p.history) - log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()). - Str("model", string(p.modelConfig.ModelType)).Msg("call model service for planning") + message, err := callModelWithLogging(ctx, p.model, p.history, + p.modelConfig.ModelType, "planning") if err != nil { return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } - logResponse(message) defer func() { // Extract usage information if available @@ -174,7 +169,6 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *Plan log.Info(). Interface("thought", result.Thought). Interface("tool_calls", result.ToolCalls). - Float64("elapsed(s)", time.Since(startTime).Seconds()). Msg("get VLM planning result") return result, nil } diff --git a/uixt/ai/planner_test.go b/uixt/ai/planner_test.go index 8be317f4..1c780971 100644 --- a/uixt/ai/planner_test.go +++ b/uixt/ai/planner_test.go @@ -29,7 +29,7 @@ func TestVLMPlanning(t *testing.T) { userInstruction += "\n\n请基于以上游戏规则,给出下一步可点击的两个图标坐标" - modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428) + modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328) require.NoError(t, err) planner, err := NewPlanner(context.Background(), modelConfig) @@ -72,7 +72,7 @@ func TestXHSPlanning(t *testing.T) { userInstruction := "点击第二个帖子的作者头像" - modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428) + modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328) require.NoError(t, err) planner, err := NewPlanner(context.Background(), modelConfig) @@ -115,7 +115,7 @@ func TestChatList(t *testing.T) { userInstruction := "请结合图片的文字信息,请告诉我一共有多少个群聊,哪些群聊右下角有绿点" - modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428) + modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328) require.NoError(t, err) planner, err := NewPlanner(context.Background(), modelConfig) @@ -147,7 +147,7 @@ func TestChatList(t *testing.T) { func TestHandleSwitch(t *testing.T) { userInstruction := "检查发送框下方的联网搜索开关,蓝色为开启状态,灰色为关闭状态;若开关处于关闭状态,则点击进行开启" - modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428) + modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328) require.NoError(t, err) planner, err := NewPlanner(context.Background(), modelConfig) diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go new file mode 100644 index 00000000..6a9def4b --- /dev/null +++ b/uixt/ai/querier.go @@ -0,0 +1,515 @@ +package ai + +import ( + "context" + "fmt" + "reflect" + + "github.com/cloudwego/eino-ext/components/model/openai" + openai2 "github.com/cloudwego/eino-ext/libs/acl/openai" + "github.com/cloudwego/eino/components/model" + "github.com/cloudwego/eino/schema" + "github.com/getkin/kin-openapi/openapi3gen" + "github.com/httprunner/httprunner/v5/code" + "github.com/httprunner/httprunner/v5/internal/json" + "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/httprunner/httprunner/v5/uixt/types" + "github.com/pkg/errors" +) + +// IQuerier interface defines the contract for query operations +type IQuerier interface { + Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) +} + +// QueryOptions represents the input options for query +type QueryOptions struct { + Query string `json:"query"` // The query text to extract information + Screenshot string `json:"screenshot"` // Base64 encoded screenshot + Size types.Size `json:"size"` // Screen dimensions + OutputSchema interface{} `json:"outputSchema,omitempty"` // Custom output schema for structured response +} + +// QueryResult represents the response from an AI query +type QueryResult struct { + Content string `json:"content"` // The extracted content/information + Thought string `json:"thought"` // The reasoning process + Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided +} + +// Querier handles query operations using different AI models +type Querier struct { + modelConfig *ModelConfig + model model.ToolCallingChatModel + systemPrompt string + history ConversationHistory +} + +// NewQuerier creates a new Querier instance +func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error) { + querier := &Querier{ + modelConfig: modelConfig, + systemPrompt: defaultQueryPrompt, + } + + if option.IS_UI_TARS(modelConfig.ModelType) { + querier.systemPrompt += "\n" + uiTarsQueryResponseFormat + } else { + // define default output format + type OutputFormat struct { + Content string `json:"content"` + Thought string `json:"thought"` + Error string `json:"error,omitempty"` + } + outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(&OutputFormat{}, nil) + if err != nil { + return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error()) + } + // set structured response format + modelConfig.ChatModelConfig.ResponseFormat = &openai2.ChatCompletionResponseFormat{ + Type: openai2.ChatCompletionResponseFormatTypeJSONSchema, + JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{ + Name: "query_result", + Description: "data that describes query result", + Schema: outputFormatSchema.Value, + Strict: false, + }, + } + } + + var err error + querier.model, err = openai.NewChatModel(ctx, modelConfig.ChatModelConfig) + if err != nil { + return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error()) + } + + return querier, nil +} + +// callModelWithLogging calls the model with automatic logging and timing + +// Query performs the information extraction from the screenshot +func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) { + // Validate input parameters + if err := validateQueryInput(opts); err != nil { + return nil, errors.Wrap(err, "validate query parameters failed") + } + + // Handle custom output schema if provided + if opts.OutputSchema != nil { + return q.queryWithCustomSchema(ctx, opts) + } + + // Reset history for each new query + q.history = ConversationHistory{ + { + Role: schema.System, + Content: q.systemPrompt, + }, + } + + // Create user message with screenshot and query + userMsg := &schema.Message{ + Role: schema.User, + MultiContent: []schema.ChatMessagePart{ + { + Type: schema.ChatMessagePartTypeImageURL, + ImageURL: &schema.ChatMessageImageURL{ + URL: opts.Screenshot, + Detail: schema.ImageURLDetailAuto, + }, + }, + { + Type: schema.ChatMessagePartTypeText, + Text: fmt.Sprintf(` +Here is the query. Please extract the requested information from the screenshot. +===================================== +%s +===================================== + `, opts.Query), + }, + }, + } + + // Append user message to history + q.history.Append(userMsg) + + // Call model service with logging + message, err := callModelWithLogging(ctx, q.model, q.history, + q.modelConfig.ModelType, "query") + if err != nil { + return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) + } + + // Parse result + result, err := parseQueryResult(message.Content) + if err != nil { + return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error()) + } + + // Append assistant message to history + q.history.Append(&schema.Message{ + Role: schema.Assistant, + Content: message.Content, + }) + + return result, nil +} + +// validateQueryInput validates the input parameters for query +func validateQueryInput(opts *QueryOptions) error { + if opts.Query == "" { + return errors.Wrap(code.LLMPrepareRequestError, "query text is required") + } + if opts.Screenshot == "" { + return errors.Wrap(code.LLMPrepareRequestError, "screenshot is required") + } + return nil +} + +// parseQueryResult parses the model response into QueryResult +func parseQueryResult(content string) (*QueryResult, error) { + // Extract JSON content from response + jsonContent := extractJSONFromContent(content) + if jsonContent == "" { + // If no JSON found, treat the entire content as the result + // This handles cases where the model returns plain text instead of JSON + return &QueryResult{ + Content: content, + Thought: "Direct response from model", + }, nil + } + + // Parse JSON response + var result QueryResult + if err := json.Unmarshal([]byte(jsonContent), &result); err != nil { + // If JSON parsing fails, treat the content as plain text result + return &QueryResult{ + Content: content, + Thought: "Failed to parse as JSON, returning raw content", + }, nil + } + + return &result, nil +} + +// queryWithCustomSchema performs query with custom output schema +func (q *Querier) queryWithCustomSchema(ctx context.Context, opts *QueryOptions) (*QueryResult, error) { + // Create a new model config with custom schema + modelConfig := *q.modelConfig + + if !option.IS_UI_TARS(modelConfig.ModelType) { + // Generate schema from the provided output schema + outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(opts.OutputSchema, nil) + if err != nil { + return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error()) + } + + // Create custom response format with the provided schema + modelConfig.ChatModelConfig.ResponseFormat = &openai2.ChatCompletionResponseFormat{ + Type: openai2.ChatCompletionResponseFormatTypeJSONSchema, + JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{ + Name: "custom_query_result", + Description: "custom structured data response", + Schema: outputFormatSchema.Value, + Strict: false, + }, + } + } + + // Create a new model instance with custom schema + model, err := openai.NewChatModel(ctx, modelConfig.ChatModelConfig) + if err != nil { + return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error()) + } + + // Reset history for each new query + systemPrompt := q.systemPrompt + if option.IS_UI_TARS(modelConfig.ModelType) { + systemPrompt += "\n" + uiTarsQueryResponseFormat + } else { + // Add instruction for structured output + systemPrompt += "\n\nPlease respond with structured data according to the specified schema. Include both the structured data and your reasoning process." + } + + history := ConversationHistory{ + { + Role: schema.System, + Content: systemPrompt, + }, + } + + // Create user message with screenshot and query + userMsg := &schema.Message{ + Role: schema.User, + MultiContent: []schema.ChatMessagePart{ + { + Type: schema.ChatMessagePartTypeImageURL, + ImageURL: &schema.ChatMessageImageURL{ + URL: opts.Screenshot, + Detail: schema.ImageURLDetailAuto, + }, + }, + { + Type: schema.ChatMessagePartTypeText, + Text: fmt.Sprintf(` +Here is the query. Please extract the requested information from the screenshot and return it in the specified structured format. +===================================== +%s +===================================== + `, opts.Query), + }, + }, + } + + // Append user message to history + history.Append(userMsg) + + // Call model service with logging + message, err := callModelWithLogging(ctx, model, history, modelConfig.ModelType, "custom schema query") + if err != nil { + return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) + } + + // Parse result with custom schema + result, err := parseCustomSchemaResult(message.Content, opts.OutputSchema) + if err != nil { + return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error()) + } + + // Append assistant message to history + q.history.Append(&schema.Message{ + Role: schema.Assistant, + Content: message.Content, + }) + + return result, nil +} + +// setDefaultFieldValue sets a default value for a field in the structured data using reflection +func setDefaultFieldValue(structValue reflect.Value, fieldName, defaultValue string) { + if field := structValue.FieldByName(fieldName); field.IsValid() && field.CanSet() && field.Kind() == reflect.String { + field.SetString(defaultValue) + } +} + +// ensureDefaultValues ensures that Content and Thought fields have default values if empty +func ensureDefaultValues(result *QueryResult, structuredData interface{}) { + const ( + defaultContent = "Structured data extracted successfully" + defaultThought = "Parsed structured response according to custom schema" + ) + + // Set defaults for QueryResult + if result.Content == "" { + result.Content = defaultContent + } + if result.Thought == "" { + result.Thought = defaultThought + } + + // Set defaults in structured data if it's a pointer to struct + if structuredData != nil { + if structValue := reflect.ValueOf(structuredData); structValue.Kind() == reflect.Ptr { + if elem := structValue.Elem(); elem.IsValid() && elem.Kind() == reflect.Struct { + if result.Content == defaultContent { + setDefaultFieldValue(elem, "Content", defaultContent) + } + if result.Thought == defaultThought { + setDefaultFieldValue(elem, "Thought", defaultThought) + } + } + } + } +} + +// parseCustomSchemaResult parses the model response with custom schema +func parseCustomSchemaResult(content string, outputSchema interface{}) (*QueryResult, error) { + // Extract JSON content from response + jsonContent := extractJSONFromContent(content) + if jsonContent == "" { + // If no JSON found, treat the entire content as the result + return &QueryResult{ + Content: content, + Thought: "Direct response from model", + }, nil + } + + // Create a new instance of the same type as outputSchema + schemaType := reflect.TypeOf(outputSchema) + if schemaType.Kind() == reflect.Ptr { + schemaType = schemaType.Elem() + } + + // Create a new instance of the schema type + newInstance := reflect.New(schemaType).Interface() + + // Try to unmarshal directly into the schema type + if err := json.Unmarshal([]byte(jsonContent), newInstance); err == nil { + // Successfully parsed into the expected schema type + result := &QueryResult{ + Data: newInstance, // Store the typed pointer directly + } + + // Try to extract content and thought if the schema has these fields + schemaValue := reflect.ValueOf(newInstance).Elem() + if contentField := schemaValue.FieldByName("Content"); contentField.IsValid() && contentField.Kind() == reflect.String { + result.Content = contentField.String() + } + if thoughtField := schemaValue.FieldByName("Thought"); thoughtField.IsValid() && thoughtField.Kind() == reflect.String { + result.Thought = thoughtField.String() + } + + // If no standard fields found, try to extract from map representation + if result.Content == "" && result.Thought == "" { + var dataMap map[string]interface{} + if err := json.Unmarshal([]byte(jsonContent), &dataMap); err == nil { + if content, exists := dataMap["content"]; exists { + if contentStr, ok := content.(string); ok { + result.Content = contentStr + } + } + if thought, exists := dataMap["thought"]; exists { + if thoughtStr, ok := thought.(string); ok { + result.Thought = thoughtStr + } + } + } + } + + // Ensure default values are set + ensureDefaultValues(result, newInstance) + return result, nil + } + + // Fallback: try to parse as generic map and then convert + var structuredData interface{} + if err := json.Unmarshal([]byte(jsonContent), &structuredData); err == nil { + // Try to convert the generic data to the expected schema type + if convertedData, err := convertToSchemaType(structuredData, outputSchema); err == nil { + result := &QueryResult{ + Data: convertedData, // Store the converted typed data + } + + // Extract content and thought from the original map + if dataMap, ok := structuredData.(map[string]interface{}); ok { + if content, exists := dataMap["content"]; exists { + if contentStr, ok := content.(string); ok { + result.Content = contentStr + } + } + if thought, exists := dataMap["thought"]; exists { + if thoughtStr, ok := thought.(string); ok { + result.Thought = thoughtStr + } + } + } + + // Ensure default values are set + ensureDefaultValues(result, convertedData) + return result, nil + } + + // If conversion failed, fall back to storing the generic data + if dataMap, ok := structuredData.(map[string]interface{}); ok { + result := &QueryResult{ + Data: structuredData, + } + + // Extract content and thought if present + if content, exists := dataMap["content"]; exists { + if contentStr, ok := content.(string); ok { + result.Content = contentStr + } + } + if thought, exists := dataMap["thought"]; exists { + if thoughtStr, ok := thought.(string); ok { + result.Thought = thoughtStr + } + } + + // Ensure default values are set + ensureDefaultValues(result, nil) + return result, nil + } + } + + // Fallback to treating as plain text + return &QueryResult{ + Content: content, + Thought: "Failed to parse as structured data, returning raw content", + }, nil +} + +// convertToSchemaType converts generic data to the specified schema type +func convertToSchemaType(data interface{}, outputSchema interface{}) (interface{}, error) { + // Get the type of the output schema + schemaType := reflect.TypeOf(outputSchema) + if schemaType.Kind() == reflect.Ptr { + schemaType = schemaType.Elem() + } + + // Create a new instance of the schema type + newInstance := reflect.New(schemaType).Interface() + + // Convert via JSON marshaling/unmarshaling + jsonData, err := json.Marshal(data) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal data to JSON") + } + + if err := json.Unmarshal(jsonData, newInstance); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal data to target type") + } + + return newInstance, nil +} + +// ConvertQueryResultData converts QueryResult.Data to the specified type T +// This is a helper function for type-safe conversion of the structured data +// +// Note: When using QueryOptions.OutputSchema, the Data field is automatically +// converted to the correct type, so this function is typically not needed. +// This function is mainly useful for: +// 1. Converting data when OutputSchema was not used +// 2. Converting to a different type than the original OutputSchema +// 3. Handling legacy code or edge cases +func ConvertQueryResultData[T any](result *QueryResult) (*T, error) { + if result.Data == nil { + return nil, errors.New("no structured data available") + } + + // If Data is already of the correct type, return it directly + if typedData, ok := result.Data.(*T); ok { + return typedData, nil + } + + // If Data is a pointer to the correct type, dereference and return + if reflect.TypeOf(result.Data).Kind() == reflect.Ptr { + if typedData, ok := result.Data.(*T); ok { + return typedData, nil + } + // Try to get the value that the pointer points to + dataValue := reflect.ValueOf(result.Data) + if dataValue.Kind() == reflect.Ptr && !dataValue.IsNil() { + elem := dataValue.Elem() + if elem.Type() == reflect.TypeOf((*T)(nil)).Elem() { + typedData := elem.Interface().(T) + return &typedData, nil + } + } + } + + // Fallback: try to convert via JSON marshaling/unmarshaling + jsonData, err := json.Marshal(result.Data) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal data to JSON") + } + + var converted T + if err := json.Unmarshal(jsonData, &converted); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal data to target type") + } + + return &converted, nil +} diff --git a/uixt/ai/querier.md b/uixt/ai/querier.md new file mode 100644 index 00000000..42e9cae1 --- /dev/null +++ b/uixt/ai/querier.md @@ -0,0 +1,299 @@ +# HttpRunner AI Querier - 自定义输出格式功能 + +## 功能概述 + +HttpRunner 的 AI Querier 模块支持自定义输出格式功能,允许用户指定特定的数据结构,让 AI 模型返回结构化的数据响应。适用于: + +- **UI 元素分析**:自动化测试中的界面元素提取 +- **游戏界面分析**:网格类游戏(连连看、消消乐、2048等)数据提取 +- **表单数据提取**:从表单截图中提取结构化信息 +- **图像内容分析**:任何需要从截图中提取结构化信息的场景 + +## 核心数据结构 + +```go +// QueryOptions - 查询选项 +type QueryOptions struct { + Query string `json:"query"` // 查询文本 + Screenshot string `json:"screenshot"` // Base64编码的截图 + Size types.Size `json:"size"` // 屏幕尺寸 + OutputSchema interface{} `json:"outputSchema,omitempty"` // 自定义输出格式(可选) +} + +// QueryResult - 查询结果 +type QueryResult struct { + Content string `json:"content"` // 人类可读的分析结果 + Thought string `json:"thought"` // AI 推理过程 + Data interface{} `json:"data,omitempty"` // 结构化数据(使用OutputSchema时自动转换为指定类型) +} +``` + +## 基本用法 + +### 标准查询 + +```go +// 创建查询器 +modelConfig, err := ai.GetModelConfig(option.OPENAI_GPT_4O) +querier, err := ai.NewQuerier(ctx, modelConfig) + +// 执行查询 +result, err := querier.Query(ctx, &ai.QueryOptions{ + Query: "请分析这张截图中的内容", + Screenshot: screenshot, + Size: size, + // 不指定 OutputSchema +}) + +fmt.Printf("分析结果: %s\n", result.Content) +fmt.Printf("推理过程: %s\n", result.Thought) +// result.Data 为 nil +``` + +### 自定义格式查询 + +```go +// 定义输出结构 +type GameAnalysis struct { + Content string `json:"content"` // 分析描述 + Thought string `json:"thought"` // 思考过程 + Rows int `json:"rows"` // 行数 + Cols int `json:"cols"` // 列数 + Icons []string `json:"icons"` // 图标类型 +} + +// 执行查询 +result, err := querier.Query(ctx, &ai.QueryOptions{ + Query: "分析这个游戏界面的网格结构和图标类型", + Screenshot: screenshot, + Size: size, + OutputSchema: GameAnalysis{}, // 指定输出格式 +}) + +// 直接类型断言获取结构化数据 +if gameData, ok := result.Data.(*GameAnalysis); ok { + fmt.Printf("行数: %d, 列数: %d\n", gameData.Rows, gameData.Cols) + fmt.Printf("图标类型: %v\n", gameData.Icons) +} +``` + +## 应用场景示例 + +### UI 元素分析 + +```go +type UIAnalysis struct { + Content string `json:"content"` + Thought string `json:"thought"` + Elements []UIElement `json:"elements"` +} + +type UIElement struct { + Type string `json:"type"` // button, text, input等 + Text string `json:"text"` // 文本内容 + BoundBox BoundingBox `json:"boundBox"` // 位置坐标 + Clickable bool `json:"clickable"` // 是否可点击 +} + +type BoundingBox struct { + X, Y, Width, Height int `json:"x,y,width,height"` +} +``` + +### 网格游戏分析 + +```go +type GridGame struct { + Content string `json:"content"` + Thought string `json:"thought"` + Grid [][]Cell `json:"grid"` // 网格数据 + Stats Statistics `json:"statistics"` // 统计信息 +} + +type Cell struct { + Type string `json:"type"` // 单元格类型 + Value string `json:"value"` // 单元格值 + Row int `json:"row"` // 行索引 + Col int `json:"col"` // 列索引 +} + +type Statistics struct { + TotalCells int `json:"totalCells"` + UniqueTypes int `json:"uniqueTypes"` +} +``` + +### 表单数据提取 + +```go +type FormAnalysis struct { + Content string `json:"content"` + Thought string `json:"thought"` + Fields []FormField `json:"fields"` + Actions []Action `json:"actions"` +} + +type FormField struct { + Label string `json:"label"` // 字段标签 + Type string `json:"type"` // 字段类型 + Value string `json:"value"` // 当前值 + Required bool `json:"required"` // 是否必填 + BoundBox BoundingBox `json:"boundBox"` // 位置 +} +``` + +## 核心特性 + +### 自动类型转换 +- 指定 `OutputSchema` 时,`QueryResult.Data` 自动转换为指定类型 +- 支持直接类型断言:`result.Data.(*YourType)` +- 无需手动调用转换函数 + +### 多级回退机制 +1. 优先解析为指定的结构化类型 +2. 失败时尝试通用JSON解析 +3. 最终回退到纯文本响应 + +### 向后兼容 +- 不指定 `OutputSchema` 时行为不变 +- 现有代码无需修改 + +## 最佳实践 + +### 1. 结构体设计 + +```go +// 推荐:包含标准字段 +type YourSchema struct { + Content string `json:"content"` // 必须:人类可读描述 + Thought string `json:"thought"` // 必须:AI推理过程 + // 自定义字段... + Data CustomData `json:"data"` +} + +// 使用描述性的JSON标签 +type Element struct { + Type string `json:"elementType"` // 清晰的字段名 + Position Point `json:"gridPosition"` // 描述性标签 + Visible bool `json:"isVisible"` // 布尔值清晰性 +} +``` + +### 2. 查询指令 + +```go +// 推荐:详细的查询指令 +opts := &ai.QueryOptions{ + Query: `分析这张截图并提供结构化信息: +1. 识别界面类型和主要元素 +2. 提取所有可交互元素的位置和属性 +3. 统计各类元素的数量`, + Screenshot: screenshot, + Size: size, + OutputSchema: YourSchema{}, +} +``` + +### 3. 错误处理 + +```go +result, err := querier.Query(ctx, opts) +if err != nil { + return err +} + +// 类型断言 +if data, ok := result.Data.(*YourSchema); ok { + // 使用结构化数据 + processData(data) +} else { + // 回退到文本结果 + log.Printf("结构化解析失败,使用文本结果: %s", result.Content) +} +``` + +## 完整示例 + +```go +package main + +import ( + "context" + "fmt" + "log" + + "github.com/httprunner/httprunner/v5/internal/builtin" + "github.com/httprunner/httprunner/v5/uixt/ai" + "github.com/httprunner/httprunner/v5/uixt/option" +) + +type ScreenAnalysis struct { + Content string `json:"content"` + Thought string `json:"thought"` + Elements []string `json:"elements"` + Categories []string `json:"categories"` + Count int `json:"count"` +} + +func main() { + ctx := context.Background() + + // 创建查询器 + modelConfig, err := ai.GetModelConfig(option.OPENAI_GPT_4O) + if err != nil { + log.Fatal(err) + } + + querier, err := ai.NewQuerier(ctx, modelConfig) + if err != nil { + log.Fatal(err) + } + + // 加载截图 + screenshot, size, err := builtin.LoadImage("screenshot.png") + if err != nil { + log.Fatal(err) + } + + // 执行结构化查询 + result, err := querier.Query(ctx, &ai.QueryOptions{ + Query: "分析截图中的UI元素,提取元素类型和分类信息", + Screenshot: screenshot, + Size: size, + OutputSchema: ScreenAnalysis{}, + }) + if err != nil { + log.Fatal(err) + } + + // 使用结构化数据 + if analysis, ok := result.Data.(*ScreenAnalysis); ok { + fmt.Printf("发现 %d 个元素\n", analysis.Count) + fmt.Printf("元素类型: %v\n", analysis.Elements) + fmt.Printf("分类: %v\n", analysis.Categories) + } else { + fmt.Printf("文本结果: %s\n", result.Content) + } +} +``` + +## 辅助函数 + +对于特殊情况,提供了类型转换辅助函数: + +```go +// 手动类型转换(通常不需要) +converted, err := ai.ConvertQueryResultData[YourType](result) +if err != nil { + return err +} +``` + +**注意**:使用 `OutputSchema` 时,`Data` 字段已自动转换为正确类型,通常不需要手动调用此函数。 + +## 技术限制 + +- 需要支持结构化输出的AI模型(如 OpenAI GPT-4) +- 复杂嵌套结构需要清晰的查询指令 +- AI模型可能不总是严格遵循指定格式 +- UI-TARS 模型使用不同的响应格式处理 \ No newline at end of file diff --git a/uixt/ai/querier_prompts.go b/uixt/ai/querier_prompts.go new file mode 100644 index 00000000..c79b9e4d --- /dev/null +++ b/uixt/ai/querier_prompts.go @@ -0,0 +1,20 @@ +package ai + +// Default query system prompt +const defaultQueryPrompt = `You are an AI assistant specialized in analyzing images and extracting information. User will provide a screenshot and a query asking for specific information to be extracted from the image. Please analyze the image carefully and provide the requested information.` + +// UI-TARS query response format +const uiTarsQueryResponseFormat = ` +## Output Json String Format +` + "```" + ` +"{ + "content": "<>", + "thought": "<>" +}" +` + "```" + ` + +## Rules **MUST** follow +- Make sure to return **only** the JSON, with **no additional** text or explanations. +- Use Chinese in ` + "`Thought`" + ` part. +- You **MUST** strictly follow up the **Output Json String Format**. +- Provide detailed and accurate information extraction based on the image content.` diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go new file mode 100644 index 00000000..d300899d --- /dev/null +++ b/uixt/ai/querier_test.go @@ -0,0 +1,617 @@ +package ai + +import ( + "context" + "fmt" + "testing" + + "github.com/httprunner/httprunner/v5/internal/builtin" + "github.com/httprunner/httprunner/v5/uixt/option" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Data structures for testing custom output schemas + +// GameIcon represents a single icon in the game grid +type GameIcon struct { + Name string `json:"name"` // Icon name (e.g., "beach_ball", "glove") + Row int `json:"row"` // Row position (0-based) + Col int `json:"col"` // Column position (0-based) +} + +// GameGrid represents the complete game grid +type GameGrid struct { + Grid [][]GameIcon `json:"grid"` // 2D array of game icons + Rows int `json:"rows"` // Number of rows + Cols int `json:"cols"` // Number of columns + Icons []string `json:"icons"` // List of unique icon names +} + +// LianliankanResponse represents the structured response for lianliankan game analysis +type LianliankanResponse struct { + Content string `json:"content"` // Description of the analysis + Thought string `json:"thought"` // Reasoning process + Data GameGrid `json:"data"` // Structured game grid data +} + +// SimpleGameInfo represents basic game information +type SimpleGameInfo struct { + Content string `json:"content"` // Description + Thought string `json:"thought"` // Reasoning + Rows int `json:"rows"` // Number of rows + Cols int `json:"cols"` // Number of columns + IconTypes []string `json:"iconTypes"` // List of icon types + TotalIcons int `json:"totalIcons"` // Total number of icons +} + +// Additional data structures for comprehensive testing + +// GameAnalysisResult represents structured analysis of a game interface +type GameAnalysisResult struct { + Content string `json:"content"` // Human-readable description + Thought string `json:"thought"` // AI reasoning process + GameType string `json:"gameType"` // Type of game detected + Dimensions Dimensions `json:"dimensions"` // Grid dimensions + Elements []Element `json:"elements"` // Game elements detected + Statistics Statistics `json:"statistics"` // Game statistics +} + +type Dimensions struct { + Rows int `json:"rows"` // Number of rows + Cols int `json:"cols"` // Number of columns +} + +type Element struct { + Type string `json:"type"` // Element type/name + Position Position `json:"position"` // Position in grid + BoundBox BoundingBox `json:"boundBox"` // Pixel coordinates +} + +type Position struct { + Row int `json:"row"` // Row index (0-based) + Col int `json:"col"` // Column index (0-based) +} + +type BoundingBox struct { + X int `json:"x"` // Left coordinate + Y int `json:"y"` // Top coordinate + Width int `json:"width"` // Width in pixels + Height int `json:"height"` // Height in pixels +} + +type Statistics struct { + TotalElements int `json:"totalElements"` // Total number of elements + UniqueTypes int `json:"uniqueTypes"` // Number of unique element types + TypeCounts []TypeCount `json:"typeCounts"` // Count of each type +} + +type TypeCount struct { + Type string `json:"type"` // Element type + Count int `json:"count"` // Number of occurrences +} + +// UIElementsResult represents structured analysis of UI elements +type UIElementsResult struct { + Content string `json:"content"` // Description + Thought string `json:"thought"` // Reasoning + Elements []UIElement `json:"elements"` // UI elements found + Categories []string `json:"categories"` // Categories of elements +} + +type UIElement struct { + Type string `json:"type"` // Element type (button, text, image, etc.) + Text string `json:"text"` // Text content if any + Description string `json:"description"` // Element description + BoundBox BoundingBox `json:"boundBox"` // Pixel coordinates + Clickable bool `json:"clickable"` // Whether element is clickable + Visible bool `json:"visible"` // Whether element is visible +} + +// Test functions + +func TestParseQueryResult(t *testing.T) { + tests := []struct { + name string + content string + expected *QueryResult + }{ + { + name: "valid JSON response", + content: `{ + "content": "这是一个14行8列的连连看游戏界面,包含25种不同的图案", + "thought": "通过分析图片,我识别出了游戏界面的结构和图案类型" + }`, + expected: &QueryResult{ + Content: "这是一个14行8列的连连看游戏界面,包含25种不同的图案", + Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型", + }, + }, + { + name: "JSON in markdown", + content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```", + expected: &QueryResult{ + Content: "游戏界面分析结果", + Thought: "分析过程", + }, + }, + { + name: "plain text response", + content: "这是一个连连看游戏界面,包含多种图案。", + expected: &QueryResult{ + Content: "这是一个连连看游戏界面,包含多种图案。", + Thought: "Direct response from model", + }, + }, + { + name: "invalid JSON", + content: `{"content": "incomplete json", "missing_closing_brace": true`, + expected: &QueryResult{ + Content: `{"content": "incomplete json", "missing_closing_brace": true`, + Thought: "Direct response from model", + }, + }, + { + name: "malformed JSON that can be extracted but not parsed", + content: `{"content": "test", "invalid": }`, + expected: &QueryResult{ + Content: `{"content": "test", "invalid": }`, + Thought: "Failed to parse as JSON, returning raw content", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseQueryResult(tt.content) + assert.NoError(t, err) + assert.Equal(t, tt.expected.Content, result.Content) + assert.Equal(t, tt.expected.Thought, result.Thought) + }) + } +} + +func setupTestQuerier(t *testing.T) *Querier { + ctx := context.Background() + modelConfig, err := GetModelConfig(option.OPENAI_GPT_4O) + require.NoError(t, err) + querier, err := NewQuerier(ctx, modelConfig) + require.NoError(t, err) + return querier +} + +// TestQueryBasicUsage demonstrates basic query functionality without custom schema +func TestQueryBasicUsage(t *testing.T) { + querier := setupTestQuerier(t) + + // Load screenshot + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + // Prepare query options + opts := &QueryOptions{ + Query: "这是一张连连看小游戏的界面,请将其转换为一个二维数组,数组中的每个元素包含图案名称及其坐标", + Screenshot: screenshot, + Size: size, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + assert.Nil(t, result.Data) // Should be nil for standard query + + t.Logf("Query Result:") + t.Logf("Content: %s", result.Content) + t.Logf("Thought: %s", result.Thought) +} + +// TestQueryWithCustomSchema tests the query functionality with custom output schema +func TestQueryWithCustomSchema(t *testing.T) { + querier := setupTestQuerier(t) + + // Load test image + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + // Define custom output schema for lianliankan game + outputSchema := LianliankanResponse{} + + // Prepare query options with custom schema + opts := &QueryOptions{ + Query: `这是一张连连看小游戏的界面,请分析游戏界面并返回结构化数据: +1. 游戏网格的行数和列数 +2. 每个位置的图案名称和坐标 +3. 所有不同类型的图案列表 +请将结果组织成二维数组格式,每个元素包含图案名称及其坐标位置。`, + Screenshot: screenshot, + Size: size, + OutputSchema: outputSchema, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + assert.NotNil(t, result.Data) + + t.Logf("Query result content: %s", result.Content) + t.Logf("Query result thought: %s", result.Thought) + t.Logf("Structured data: %+v", result.Data) + + // Try to parse the structured data + if dataMap, ok := result.Data.(map[string]interface{}); ok { + if gridData, exists := dataMap["data"]; exists { + t.Logf("Game grid data: %+v", gridData) + } + if rows, exists := dataMap["rows"]; exists { + t.Logf("Rows: %v", rows) + } + if cols, exists := dataMap["cols"]; exists { + t.Logf("Cols: %v", cols) + } + if icons, exists := dataMap["icons"]; exists { + t.Logf("Icon Types: %v", icons) + } + } +} + +// TestQueryWithSimpleSchema tests with a simpler custom schema +func TestQueryWithSimpleSchema(t *testing.T) { + querier := setupTestQuerier(t) + + // Load test image + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + outputSchema := SimpleGameInfo{} + + // Prepare query options + opts := &QueryOptions{ + Query: "请分析这个连连看游戏界面,告诉我有多少行多少列,有哪些不同类型的图案,总共有多少个图标", + Screenshot: screenshot, + Size: size, + OutputSchema: outputSchema, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + assert.NotNil(t, result.Data) + + t.Logf("Simple schema result: %+v", result) +} + +// TestQueryWithGameAnalysisSchema tests with comprehensive game analysis schema +func TestQueryWithGameAnalysisSchema(t *testing.T) { + querier := setupTestQuerier(t) + + // Load test image + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + outputSchema := GameAnalysisResult{} + + // Prepare query options + opts := &QueryOptions{ + Query: `Analyze this game interface and provide structured information about: +1. The type of game +2. Grid dimensions (rows and columns) +3. All game elements with their positions and types +4. Statistics about element distribution`, + Screenshot: screenshot, + Size: size, + OutputSchema: outputSchema, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + assert.NotNil(t, result.Data) + + t.Logf("Game analysis result: %+v", result) +} + +// TestQueryWithUIElementsSchema tests UI elements analysis +func TestQueryWithUIElementsSchema(t *testing.T) { + querier := setupTestQuerier(t) + + // Load test image + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + outputSchema := UIElementsResult{} + + // Prepare query options + opts := &QueryOptions{ + Query: `Analyze this interface and identify all UI elements including: +1. Buttons and their text +2. Text labels and content +3. Images and icons +4. Interactive elements +5. Their positions and properties`, + Screenshot: screenshot, + Size: size, + OutputSchema: outputSchema, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + assert.NotNil(t, result.Data) + + t.Logf("UI elements analysis result: %+v", result) +} + +// TestQuerySchemaComparison compares standard vs custom schema queries +func TestQuerySchemaComparison(t *testing.T) { + querier := setupTestQuerier(t) + + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + query := "请分析这个连连看游戏界面的基本信息" + + // Standard query (without custom schema) + t.Run("StandardQuery", func(t *testing.T) { + standardOpts := &QueryOptions{ + Query: query, + Screenshot: screenshot, + Size: size, + // No OutputSchema specified + } + + standardResult, err := querier.Query(context.Background(), standardOpts) + assert.NoError(t, err) + assert.NotNil(t, standardResult) + assert.NotEmpty(t, standardResult.Content) + assert.NotEmpty(t, standardResult.Thought) + assert.Nil(t, standardResult.Data) // Should be nil for standard query + + t.Logf("Standard Query Result:") + t.Logf("Content: %s", standardResult.Content) + t.Logf("Thought: %s", standardResult.Thought) + t.Logf("Data: %+v", standardResult.Data) + }) + + // Custom schema query + t.Run("CustomSchemaQuery", func(t *testing.T) { + type GameInfo struct { + Content string `json:"content"` + Thought string `json:"thought"` + Rows int `json:"rows"` + Cols int `json:"cols"` + Icons []string `json:"icons"` + } + + customOpts := &QueryOptions{ + Query: query, + Screenshot: screenshot, + Size: size, + OutputSchema: GameInfo{}, + } + + customResult, err := querier.Query(context.Background(), customOpts) + assert.NoError(t, err) + assert.NotNil(t, customResult) + assert.NotEmpty(t, customResult.Content) + assert.NotEmpty(t, customResult.Thought) + assert.NotNil(t, customResult.Data) // Should contain structured data + + t.Logf("Custom Schema Query Result:") + t.Logf("Content: %s", customResult.Content) + t.Logf("Thought: %s", customResult.Thought) + t.Logf("Structured Data: %+v", customResult.Data) + }) +} + +// TestQueryWithDifferentPrompts tests various types of queries on the same screenshot +func TestQueryWithDifferentPrompts(t *testing.T) { + querier := setupTestQuerier(t) + + // Load screenshot + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + // Example queries + queries := []string{ + "请描述这张图片中的内容", + "这个游戏界面有多少行多少列?", + "请识别图片中所有不同类型的图案", + "请找出可以消除的图案对", + } + + for i, query := range queries { + t.Run(fmt.Sprintf("Query_%d", i+1), func(t *testing.T) { + opts := &QueryOptions{ + Query: query, + Screenshot: screenshot, + Size: size, + } + + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotEmpty(t, result.Content) + assert.NotEmpty(t, result.Thought) + + t.Logf("Query %d: %s", i+1, query) + t.Logf("Answer: %s", result.Content) + t.Logf("Reasoning: %s", result.Thought) + }) + } +} + +// TestConvertQueryResultData tests the type conversion functionality +func TestConvertQueryResultData(t *testing.T) { + // Test data structure + type TestSchema struct { + Content string `json:"content"` + Thought string `json:"thought"` + Count int `json:"count"` + Items []string `json:"items"` + } + + // Create a QueryResult with structured data + testData := &TestSchema{ + Content: "Test content", + Thought: "Test thought", + Count: 5, + Items: []string{"item1", "item2", "item3"}, + } + + result := &QueryResult{ + Content: "Test content", + Thought: "Test thought", + Data: testData, + } + + // Test type conversion + converted, err := ConvertQueryResultData[TestSchema](result) + assert.NoError(t, err) + assert.NotNil(t, converted) + assert.Equal(t, "Test content", converted.Content) + assert.Equal(t, "Test thought", converted.Thought) + assert.Equal(t, 5, converted.Count) + assert.Equal(t, []string{"item1", "item2", "item3"}, converted.Items) + + t.Logf("Successfully converted data: %+v", converted) +} + +// TestQueryResultDataConsistency tests that QueryResult.Data matches OutputSchema +func TestQueryResultDataConsistency(t *testing.T) { + querier := setupTestQuerier(t) + + // Load test image + screenshot, size, err := builtin.LoadImage("testdata/llk_1.png") + require.NoError(t, err) + + // Define a simple test schema + type TestGameInfo struct { + Content string `json:"content"` + Thought string `json:"thought"` + Rows int `json:"rows"` + Cols int `json:"cols"` + Icons []string `json:"icons"` + } + + outputSchema := TestGameInfo{} + + // Prepare query options + opts := &QueryOptions{ + Query: "请分析这个连连看游戏界面,告诉我有多少行多少列,有哪些不同类型的图案", + Screenshot: screenshot, + Size: size, + OutputSchema: outputSchema, + } + + // Perform query + result, err := querier.Query(context.Background(), opts) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotNil(t, result.Data) + gameInfo, ok := result.Data.(*TestGameInfo) + assert.True(t, ok) + assert.NotNil(t, gameInfo) + + // Verify that the converted data has the expected structure + assert.NotEmpty(t, gameInfo.Content) + assert.NotEmpty(t, gameInfo.Thought) + assert.NotEmpty(t, gameInfo.Rows) + assert.NotEmpty(t, gameInfo.Cols) + assert.NotEmpty(t, gameInfo.Icons) +} + +// TestAutoTypeConversion tests that QueryResult.Data is automatically converted to the correct type +func TestAutoTypeConversion(t *testing.T) { + // Test data structure + type TestSchema struct { + Content string `json:"content"` + Thought string `json:"thought"` + Count int `json:"count"` + Items []string `json:"items"` + } + + // Simulate a JSON response from the model + jsonResponse := `{ + "content": "Test content from model", + "thought": "Test reasoning process", + "count": 42, + "items": ["apple", "banana", "cherry"] + }` + + // Test the parseCustomSchemaResult function directly + result, err := parseCustomSchemaResult(jsonResponse, TestSchema{}) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotNil(t, result.Data) + + // Verify that Data is automatically converted to the correct type + typedData, ok := result.Data.(*TestSchema) + assert.True(t, ok, "Data should be automatically converted to *TestSchema") + assert.NotNil(t, typedData) + + // Verify the content + assert.Equal(t, "Test content from model", typedData.Content) + assert.Equal(t, "Test reasoning process", typedData.Thought) + assert.Equal(t, 42, typedData.Count) + assert.Equal(t, []string{"apple", "banana", "cherry"}, typedData.Items) + + // Verify that QueryResult fields are also populated + assert.Equal(t, "Test content from model", result.Content) + assert.Equal(t, "Test reasoning process", result.Thought) + + t.Logf("Auto-converted data: %+v", typedData) +} + +// TestDirectTypeAssertion tests that users can directly use type assertion on QueryResult.Data +func TestDirectTypeAssertion(t *testing.T) { + // Test data structure + type GameInfo struct { + Content string `json:"content"` + Thought string `json:"thought"` + Rows int `json:"rows"` + Cols int `json:"cols"` + Icons []string `json:"icons"` + } + + // Simulate a JSON response + jsonResponse := `{ + "content": "Game analysis complete", + "thought": "Analyzed the game grid structure", + "rows": 8, + "cols": 10, + "icons": ["apple", "banana", "cherry", "grape"] + }` + + // Test the parseCustomSchemaResult function + result, err := parseCustomSchemaResult(jsonResponse, GameInfo{}) + assert.NoError(t, err) + assert.NotNil(t, result) + assert.NotNil(t, result.Data) + + // Users can now directly use type assertion + if gameInfo, ok := result.Data.(*GameInfo); ok { + assert.Equal(t, "Game analysis complete", gameInfo.Content) + assert.Equal(t, "Analyzed the game grid structure", gameInfo.Thought) + assert.Equal(t, 8, gameInfo.Rows) + assert.Equal(t, 10, gameInfo.Cols) + assert.Equal(t, []string{"apple", "banana", "cherry", "grape"}, gameInfo.Icons) + t.Logf("Direct type assertion successful: %+v", gameInfo) + } else { + t.Fatalf("Type assertion failed, Data type: %T", result.Data) + } +} diff --git a/uixt/ai/utils.go b/uixt/ai/utils.go index 94d70637..572b705e 100644 --- a/uixt/ai/utils.go +++ b/uixt/ai/utils.go @@ -1,9 +1,17 @@ package ai import ( + "context" "regexp" "strings" + "time" "unicode/utf8" + + "github.com/cloudwego/eino/components/model" + "github.com/cloudwego/eino/schema" + "github.com/rs/zerolog/log" + + "github.com/httprunner/httprunner/v5/uixt/option" ) // extractJSONFromContent extracts JSON content from various formats in the response @@ -102,3 +110,29 @@ func extractJSONFromContent(content string) string { return "" } + +// callModelWithLogging is a common function to call model with logging and timing +// It handles the common pattern of: +// 1. Log request +// 2. Start timing +// 3. Call model.Generate +// 4. Log timing and model info +// 5. Log response +func callModelWithLogging(ctx context.Context, model model.ToolCallingChatModel, history ConversationHistory, modelType option.LLMServiceType, operation string) (*schema.Message, error) { + logRequest(history) + + startTime := time.Now() + defer func() { + log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()). + Str("model", string(modelType)). + Msgf("call model service for %s", operation) + }() + + message, err := model.Generate(ctx, history) + if err != nil { + return nil, err + } + + logResponse(message) + return message, nil +}