mirror of
https://github.com/httprunner/httprunner.git
synced 2026-06-03 06:49:38 +08:00
feat: add AI Querier module with custom output schema support and refactor common model calling logic
- Add new AI Querier module for structured information extraction from screenshots - Support custom output schema for structured data response - Implement automatic type conversion and data validation - Add comprehensive test suite with various data structure examples - Refactor callModelWithLogging to utils.go as shared function for planner, asserter, and querier - Eliminate code duplication across AI modules (30+ lines of repeated code) - Improve maintainability with unified logging and timing logic - Add environment variable checks in test setup to handle missing API keys gracefully Key features: - Custom output schema support with JSON Schema generation - Automatic data type conversion with reflection - Fallback mechanisms for robust parsing - Comprehensive documentation and usage examples - Backward compatibility with existing functionality
This commit is contained in:
@@ -3,7 +3,6 @@ package ai
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
openai2 "github.com/cloudwego/eino-ext/libs/acl/openai"
|
||||
@@ -15,7 +14,6 @@ import (
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
"github.com/httprunner/httprunner/v5/uixt/types"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// IAsserter interface defines the contract for assertion operations
|
||||
@@ -128,15 +126,11 @@ Here is the assertion. Please tell whether it is truthy according to the screens
|
||||
a.history.Append(userMsg)
|
||||
|
||||
// Call model service, generate response
|
||||
logRequest(a.history)
|
||||
startTime := time.Now()
|
||||
message, err := a.model.Generate(ctx, a.history)
|
||||
log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()).
|
||||
Str("model", string(a.modelConfig.ModelType)).Msg("call model service for assertion")
|
||||
message, err := callModelWithLogging(ctx, a.model, a.history,
|
||||
a.modelConfig.ModelType, "assertion")
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
||||
}
|
||||
logResponse(message)
|
||||
|
||||
// Parse result
|
||||
result, err := parseAssertionResult(message.Content)
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
)
|
||||
|
||||
func createAsserter(t *testing.T) *Asserter {
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428)
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328)
|
||||
require.NoError(t, err)
|
||||
asserter, err := NewAsserter(context.Background(), modelConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -2,7 +2,6 @@ package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
"github.com/cloudwego/eino/components/model"
|
||||
@@ -116,15 +115,11 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *Plan
|
||||
p.history.Append(opts.Message)
|
||||
|
||||
// call model service, generate response
|
||||
logRequest(p.history)
|
||||
startTime := time.Now()
|
||||
message, err := p.model.Generate(ctx, p.history)
|
||||
log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()).
|
||||
Str("model", string(p.modelConfig.ModelType)).Msg("call model service for planning")
|
||||
message, err := callModelWithLogging(ctx, p.model, p.history,
|
||||
p.modelConfig.ModelType, "planning")
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
||||
}
|
||||
logResponse(message)
|
||||
|
||||
defer func() {
|
||||
// Extract usage information if available
|
||||
@@ -174,7 +169,6 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (result *Plan
|
||||
log.Info().
|
||||
Interface("thought", result.Thought).
|
||||
Interface("tool_calls", result.ToolCalls).
|
||||
Float64("elapsed(s)", time.Since(startTime).Seconds()).
|
||||
Msg("get VLM planning result")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ func TestVLMPlanning(t *testing.T) {
|
||||
|
||||
userInstruction += "\n\n请基于以上游戏规则,给出下一步可点击的两个图标坐标"
|
||||
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428)
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328)
|
||||
require.NoError(t, err)
|
||||
|
||||
planner, err := NewPlanner(context.Background(), modelConfig)
|
||||
@@ -72,7 +72,7 @@ func TestXHSPlanning(t *testing.T) {
|
||||
|
||||
userInstruction := "点击第二个帖子的作者头像"
|
||||
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428)
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328)
|
||||
require.NoError(t, err)
|
||||
|
||||
planner, err := NewPlanner(context.Background(), modelConfig)
|
||||
@@ -115,7 +115,7 @@ func TestChatList(t *testing.T) {
|
||||
|
||||
userInstruction := "请结合图片的文字信息,请告诉我一共有多少个群聊,哪些群聊右下角有绿点"
|
||||
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428)
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328)
|
||||
require.NoError(t, err)
|
||||
|
||||
planner, err := NewPlanner(context.Background(), modelConfig)
|
||||
@@ -147,7 +147,7 @@ func TestChatList(t *testing.T) {
|
||||
|
||||
func TestHandleSwitch(t *testing.T) {
|
||||
userInstruction := "检查发送框下方的联网搜索开关,蓝色为开启状态,灰色为关闭状态;若开关处于关闭状态,则点击进行开启"
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250428)
|
||||
modelConfig, err := GetModelConfig(option.DOUBAO_1_5_UI_TARS_250328)
|
||||
require.NoError(t, err)
|
||||
|
||||
planner, err := NewPlanner(context.Background(), modelConfig)
|
||||
|
||||
515
uixt/ai/querier.go
Normal file
515
uixt/ai/querier.go
Normal file
@@ -0,0 +1,515 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
openai2 "github.com/cloudwego/eino-ext/libs/acl/openai"
|
||||
"github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/getkin/kin-openapi/openapi3gen"
|
||||
"github.com/httprunner/httprunner/v5/code"
|
||||
"github.com/httprunner/httprunner/v5/internal/json"
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
"github.com/httprunner/httprunner/v5/uixt/types"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// IQuerier interface defines the contract for query operations
|
||||
type IQuerier interface {
|
||||
Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error)
|
||||
}
|
||||
|
||||
// QueryOptions represents the input options for query
|
||||
type QueryOptions struct {
|
||||
Query string `json:"query"` // The query text to extract information
|
||||
Screenshot string `json:"screenshot"` // Base64 encoded screenshot
|
||||
Size types.Size `json:"size"` // Screen dimensions
|
||||
OutputSchema interface{} `json:"outputSchema,omitempty"` // Custom output schema for structured response
|
||||
}
|
||||
|
||||
// QueryResult represents the response from an AI query
|
||||
type QueryResult struct {
|
||||
Content string `json:"content"` // The extracted content/information
|
||||
Thought string `json:"thought"` // The reasoning process
|
||||
Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
|
||||
}
|
||||
|
||||
// Querier handles query operations using different AI models
|
||||
type Querier struct {
|
||||
modelConfig *ModelConfig
|
||||
model model.ToolCallingChatModel
|
||||
systemPrompt string
|
||||
history ConversationHistory
|
||||
}
|
||||
|
||||
// NewQuerier creates a new Querier instance
|
||||
func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error) {
|
||||
querier := &Querier{
|
||||
modelConfig: modelConfig,
|
||||
systemPrompt: defaultQueryPrompt,
|
||||
}
|
||||
|
||||
if option.IS_UI_TARS(modelConfig.ModelType) {
|
||||
querier.systemPrompt += "\n" + uiTarsQueryResponseFormat
|
||||
} else {
|
||||
// define default output format
|
||||
type OutputFormat struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(&OutputFormat{}, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
||||
}
|
||||
// set structured response format
|
||||
modelConfig.ChatModelConfig.ResponseFormat = &openai2.ChatCompletionResponseFormat{
|
||||
Type: openai2.ChatCompletionResponseFormatTypeJSONSchema,
|
||||
JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{
|
||||
Name: "query_result",
|
||||
Description: "data that describes query result",
|
||||
Schema: outputFormatSchema.Value,
|
||||
Strict: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
querier.model, err = openai.NewChatModel(ctx, modelConfig.ChatModelConfig)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
||||
}
|
||||
|
||||
return querier, nil
|
||||
}
|
||||
|
||||
// callModelWithLogging calls the model with automatic logging and timing
|
||||
|
||||
// Query performs the information extraction from the screenshot
|
||||
func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
|
||||
// Validate input parameters
|
||||
if err := validateQueryInput(opts); err != nil {
|
||||
return nil, errors.Wrap(err, "validate query parameters failed")
|
||||
}
|
||||
|
||||
// Handle custom output schema if provided
|
||||
if opts.OutputSchema != nil {
|
||||
return q.queryWithCustomSchema(ctx, opts)
|
||||
}
|
||||
|
||||
// Reset history for each new query
|
||||
q.history = ConversationHistory{
|
||||
{
|
||||
Role: schema.System,
|
||||
Content: q.systemPrompt,
|
||||
},
|
||||
}
|
||||
|
||||
// Create user message with screenshot and query
|
||||
userMsg := &schema.Message{
|
||||
Role: schema.User,
|
||||
MultiContent: []schema.ChatMessagePart{
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeImageURL,
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: opts.Screenshot,
|
||||
Detail: schema.ImageURLDetailAuto,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeText,
|
||||
Text: fmt.Sprintf(`
|
||||
Here is the query. Please extract the requested information from the screenshot.
|
||||
=====================================
|
||||
%s
|
||||
=====================================
|
||||
`, opts.Query),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Append user message to history
|
||||
q.history.Append(userMsg)
|
||||
|
||||
// Call model service with logging
|
||||
message, err := callModelWithLogging(ctx, q.model, q.history,
|
||||
q.modelConfig.ModelType, "query")
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
||||
}
|
||||
|
||||
// Parse result
|
||||
result, err := parseQueryResult(message.Content)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
|
||||
}
|
||||
|
||||
// Append assistant message to history
|
||||
q.history.Append(&schema.Message{
|
||||
Role: schema.Assistant,
|
||||
Content: message.Content,
|
||||
})
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// validateQueryInput validates the input parameters for query
|
||||
func validateQueryInput(opts *QueryOptions) error {
|
||||
if opts.Query == "" {
|
||||
return errors.Wrap(code.LLMPrepareRequestError, "query text is required")
|
||||
}
|
||||
if opts.Screenshot == "" {
|
||||
return errors.Wrap(code.LLMPrepareRequestError, "screenshot is required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseQueryResult parses the model response into QueryResult
|
||||
func parseQueryResult(content string) (*QueryResult, error) {
|
||||
// Extract JSON content from response
|
||||
jsonContent := extractJSONFromContent(content)
|
||||
if jsonContent == "" {
|
||||
// If no JSON found, treat the entire content as the result
|
||||
// This handles cases where the model returns plain text instead of JSON
|
||||
return &QueryResult{
|
||||
Content: content,
|
||||
Thought: "Direct response from model",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Parse JSON response
|
||||
var result QueryResult
|
||||
if err := json.Unmarshal([]byte(jsonContent), &result); err != nil {
|
||||
// If JSON parsing fails, treat the content as plain text result
|
||||
return &QueryResult{
|
||||
Content: content,
|
||||
Thought: "Failed to parse as JSON, returning raw content",
|
||||
}, nil
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
// queryWithCustomSchema performs query with custom output schema
|
||||
func (q *Querier) queryWithCustomSchema(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
|
||||
// Create a new model config with custom schema
|
||||
modelConfig := *q.modelConfig
|
||||
|
||||
if !option.IS_UI_TARS(modelConfig.ModelType) {
|
||||
// Generate schema from the provided output schema
|
||||
outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(opts.OutputSchema, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
||||
}
|
||||
|
||||
// Create custom response format with the provided schema
|
||||
modelConfig.ChatModelConfig.ResponseFormat = &openai2.ChatCompletionResponseFormat{
|
||||
Type: openai2.ChatCompletionResponseFormatTypeJSONSchema,
|
||||
JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{
|
||||
Name: "custom_query_result",
|
||||
Description: "custom structured data response",
|
||||
Schema: outputFormatSchema.Value,
|
||||
Strict: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Create a new model instance with custom schema
|
||||
model, err := openai.NewChatModel(ctx, modelConfig.ChatModelConfig)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
||||
}
|
||||
|
||||
// Reset history for each new query
|
||||
systemPrompt := q.systemPrompt
|
||||
if option.IS_UI_TARS(modelConfig.ModelType) {
|
||||
systemPrompt += "\n" + uiTarsQueryResponseFormat
|
||||
} else {
|
||||
// Add instruction for structured output
|
||||
systemPrompt += "\n\nPlease respond with structured data according to the specified schema. Include both the structured data and your reasoning process."
|
||||
}
|
||||
|
||||
history := ConversationHistory{
|
||||
{
|
||||
Role: schema.System,
|
||||
Content: systemPrompt,
|
||||
},
|
||||
}
|
||||
|
||||
// Create user message with screenshot and query
|
||||
userMsg := &schema.Message{
|
||||
Role: schema.User,
|
||||
MultiContent: []schema.ChatMessagePart{
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeImageURL,
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: opts.Screenshot,
|
||||
Detail: schema.ImageURLDetailAuto,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeText,
|
||||
Text: fmt.Sprintf(`
|
||||
Here is the query. Please extract the requested information from the screenshot and return it in the specified structured format.
|
||||
=====================================
|
||||
%s
|
||||
=====================================
|
||||
`, opts.Query),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Append user message to history
|
||||
history.Append(userMsg)
|
||||
|
||||
// Call model service with logging
|
||||
message, err := callModelWithLogging(ctx, model, history, modelConfig.ModelType, "custom schema query")
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
||||
}
|
||||
|
||||
// Parse result with custom schema
|
||||
result, err := parseCustomSchemaResult(message.Content, opts.OutputSchema)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
|
||||
}
|
||||
|
||||
// Append assistant message to history
|
||||
q.history.Append(&schema.Message{
|
||||
Role: schema.Assistant,
|
||||
Content: message.Content,
|
||||
})
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// setDefaultFieldValue sets a default value for a field in the structured data using reflection
|
||||
func setDefaultFieldValue(structValue reflect.Value, fieldName, defaultValue string) {
|
||||
if field := structValue.FieldByName(fieldName); field.IsValid() && field.CanSet() && field.Kind() == reflect.String {
|
||||
field.SetString(defaultValue)
|
||||
}
|
||||
}
|
||||
|
||||
// ensureDefaultValues ensures that Content and Thought fields have default values if empty
|
||||
func ensureDefaultValues(result *QueryResult, structuredData interface{}) {
|
||||
const (
|
||||
defaultContent = "Structured data extracted successfully"
|
||||
defaultThought = "Parsed structured response according to custom schema"
|
||||
)
|
||||
|
||||
// Set defaults for QueryResult
|
||||
if result.Content == "" {
|
||||
result.Content = defaultContent
|
||||
}
|
||||
if result.Thought == "" {
|
||||
result.Thought = defaultThought
|
||||
}
|
||||
|
||||
// Set defaults in structured data if it's a pointer to struct
|
||||
if structuredData != nil {
|
||||
if structValue := reflect.ValueOf(structuredData); structValue.Kind() == reflect.Ptr {
|
||||
if elem := structValue.Elem(); elem.IsValid() && elem.Kind() == reflect.Struct {
|
||||
if result.Content == defaultContent {
|
||||
setDefaultFieldValue(elem, "Content", defaultContent)
|
||||
}
|
||||
if result.Thought == defaultThought {
|
||||
setDefaultFieldValue(elem, "Thought", defaultThought)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseCustomSchemaResult parses the model response with custom schema
|
||||
func parseCustomSchemaResult(content string, outputSchema interface{}) (*QueryResult, error) {
|
||||
// Extract JSON content from response
|
||||
jsonContent := extractJSONFromContent(content)
|
||||
if jsonContent == "" {
|
||||
// If no JSON found, treat the entire content as the result
|
||||
return &QueryResult{
|
||||
Content: content,
|
||||
Thought: "Direct response from model",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Create a new instance of the same type as outputSchema
|
||||
schemaType := reflect.TypeOf(outputSchema)
|
||||
if schemaType.Kind() == reflect.Ptr {
|
||||
schemaType = schemaType.Elem()
|
||||
}
|
||||
|
||||
// Create a new instance of the schema type
|
||||
newInstance := reflect.New(schemaType).Interface()
|
||||
|
||||
// Try to unmarshal directly into the schema type
|
||||
if err := json.Unmarshal([]byte(jsonContent), newInstance); err == nil {
|
||||
// Successfully parsed into the expected schema type
|
||||
result := &QueryResult{
|
||||
Data: newInstance, // Store the typed pointer directly
|
||||
}
|
||||
|
||||
// Try to extract content and thought if the schema has these fields
|
||||
schemaValue := reflect.ValueOf(newInstance).Elem()
|
||||
if contentField := schemaValue.FieldByName("Content"); contentField.IsValid() && contentField.Kind() == reflect.String {
|
||||
result.Content = contentField.String()
|
||||
}
|
||||
if thoughtField := schemaValue.FieldByName("Thought"); thoughtField.IsValid() && thoughtField.Kind() == reflect.String {
|
||||
result.Thought = thoughtField.String()
|
||||
}
|
||||
|
||||
// If no standard fields found, try to extract from map representation
|
||||
if result.Content == "" && result.Thought == "" {
|
||||
var dataMap map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(jsonContent), &dataMap); err == nil {
|
||||
if content, exists := dataMap["content"]; exists {
|
||||
if contentStr, ok := content.(string); ok {
|
||||
result.Content = contentStr
|
||||
}
|
||||
}
|
||||
if thought, exists := dataMap["thought"]; exists {
|
||||
if thoughtStr, ok := thought.(string); ok {
|
||||
result.Thought = thoughtStr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure default values are set
|
||||
ensureDefaultValues(result, newInstance)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Fallback: try to parse as generic map and then convert
|
||||
var structuredData interface{}
|
||||
if err := json.Unmarshal([]byte(jsonContent), &structuredData); err == nil {
|
||||
// Try to convert the generic data to the expected schema type
|
||||
if convertedData, err := convertToSchemaType(structuredData, outputSchema); err == nil {
|
||||
result := &QueryResult{
|
||||
Data: convertedData, // Store the converted typed data
|
||||
}
|
||||
|
||||
// Extract content and thought from the original map
|
||||
if dataMap, ok := structuredData.(map[string]interface{}); ok {
|
||||
if content, exists := dataMap["content"]; exists {
|
||||
if contentStr, ok := content.(string); ok {
|
||||
result.Content = contentStr
|
||||
}
|
||||
}
|
||||
if thought, exists := dataMap["thought"]; exists {
|
||||
if thoughtStr, ok := thought.(string); ok {
|
||||
result.Thought = thoughtStr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure default values are set
|
||||
ensureDefaultValues(result, convertedData)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// If conversion failed, fall back to storing the generic data
|
||||
if dataMap, ok := structuredData.(map[string]interface{}); ok {
|
||||
result := &QueryResult{
|
||||
Data: structuredData,
|
||||
}
|
||||
|
||||
// Extract content and thought if present
|
||||
if content, exists := dataMap["content"]; exists {
|
||||
if contentStr, ok := content.(string); ok {
|
||||
result.Content = contentStr
|
||||
}
|
||||
}
|
||||
if thought, exists := dataMap["thought"]; exists {
|
||||
if thoughtStr, ok := thought.(string); ok {
|
||||
result.Thought = thoughtStr
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure default values are set
|
||||
ensureDefaultValues(result, nil)
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to treating as plain text
|
||||
return &QueryResult{
|
||||
Content: content,
|
||||
Thought: "Failed to parse as structured data, returning raw content",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// convertToSchemaType converts generic data to the specified schema type
|
||||
func convertToSchemaType(data interface{}, outputSchema interface{}) (interface{}, error) {
|
||||
// Get the type of the output schema
|
||||
schemaType := reflect.TypeOf(outputSchema)
|
||||
if schemaType.Kind() == reflect.Ptr {
|
||||
schemaType = schemaType.Elem()
|
||||
}
|
||||
|
||||
// Create a new instance of the schema type
|
||||
newInstance := reflect.New(schemaType).Interface()
|
||||
|
||||
// Convert via JSON marshaling/unmarshaling
|
||||
jsonData, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to marshal data to JSON")
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(jsonData, newInstance); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to unmarshal data to target type")
|
||||
}
|
||||
|
||||
return newInstance, nil
|
||||
}
|
||||
|
||||
// ConvertQueryResultData converts QueryResult.Data to the specified type T
|
||||
// This is a helper function for type-safe conversion of the structured data
|
||||
//
|
||||
// Note: When using QueryOptions.OutputSchema, the Data field is automatically
|
||||
// converted to the correct type, so this function is typically not needed.
|
||||
// This function is mainly useful for:
|
||||
// 1. Converting data when OutputSchema was not used
|
||||
// 2. Converting to a different type than the original OutputSchema
|
||||
// 3. Handling legacy code or edge cases
|
||||
func ConvertQueryResultData[T any](result *QueryResult) (*T, error) {
|
||||
if result.Data == nil {
|
||||
return nil, errors.New("no structured data available")
|
||||
}
|
||||
|
||||
// If Data is already of the correct type, return it directly
|
||||
if typedData, ok := result.Data.(*T); ok {
|
||||
return typedData, nil
|
||||
}
|
||||
|
||||
// If Data is a pointer to the correct type, dereference and return
|
||||
if reflect.TypeOf(result.Data).Kind() == reflect.Ptr {
|
||||
if typedData, ok := result.Data.(*T); ok {
|
||||
return typedData, nil
|
||||
}
|
||||
// Try to get the value that the pointer points to
|
||||
dataValue := reflect.ValueOf(result.Data)
|
||||
if dataValue.Kind() == reflect.Ptr && !dataValue.IsNil() {
|
||||
elem := dataValue.Elem()
|
||||
if elem.Type() == reflect.TypeOf((*T)(nil)).Elem() {
|
||||
typedData := elem.Interface().(T)
|
||||
return &typedData, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: try to convert via JSON marshaling/unmarshaling
|
||||
jsonData, err := json.Marshal(result.Data)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to marshal data to JSON")
|
||||
}
|
||||
|
||||
var converted T
|
||||
if err := json.Unmarshal(jsonData, &converted); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to unmarshal data to target type")
|
||||
}
|
||||
|
||||
return &converted, nil
|
||||
}
|
||||
299
uixt/ai/querier.md
Normal file
299
uixt/ai/querier.md
Normal file
@@ -0,0 +1,299 @@
|
||||
# HttpRunner AI Querier - 自定义输出格式功能
|
||||
|
||||
## 功能概述
|
||||
|
||||
HttpRunner 的 AI Querier 模块支持自定义输出格式功能,允许用户指定特定的数据结构,让 AI 模型返回结构化的数据响应。适用于:
|
||||
|
||||
- **UI 元素分析**:自动化测试中的界面元素提取
|
||||
- **游戏界面分析**:网格类游戏(连连看、消消乐、2048等)数据提取
|
||||
- **表单数据提取**:从表单截图中提取结构化信息
|
||||
- **图像内容分析**:任何需要从截图中提取结构化信息的场景
|
||||
|
||||
## 核心数据结构
|
||||
|
||||
```go
|
||||
// QueryOptions - 查询选项
|
||||
type QueryOptions struct {
|
||||
Query string `json:"query"` // 查询文本
|
||||
Screenshot string `json:"screenshot"` // Base64编码的截图
|
||||
Size types.Size `json:"size"` // 屏幕尺寸
|
||||
OutputSchema interface{} `json:"outputSchema,omitempty"` // 自定义输出格式(可选)
|
||||
}
|
||||
|
||||
// QueryResult - 查询结果
|
||||
type QueryResult struct {
|
||||
Content string `json:"content"` // 人类可读的分析结果
|
||||
Thought string `json:"thought"` // AI 推理过程
|
||||
Data interface{} `json:"data,omitempty"` // 结构化数据(使用OutputSchema时自动转换为指定类型)
|
||||
}
|
||||
```
|
||||
|
||||
## 基本用法
|
||||
|
||||
### 标准查询
|
||||
|
||||
```go
|
||||
// 创建查询器
|
||||
modelConfig, err := ai.GetModelConfig(option.OPENAI_GPT_4O)
|
||||
querier, err := ai.NewQuerier(ctx, modelConfig)
|
||||
|
||||
// 执行查询
|
||||
result, err := querier.Query(ctx, &ai.QueryOptions{
|
||||
Query: "请分析这张截图中的内容",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
// 不指定 OutputSchema
|
||||
})
|
||||
|
||||
fmt.Printf("分析结果: %s\n", result.Content)
|
||||
fmt.Printf("推理过程: %s\n", result.Thought)
|
||||
// result.Data 为 nil
|
||||
```
|
||||
|
||||
### 自定义格式查询
|
||||
|
||||
```go
|
||||
// 定义输出结构
|
||||
type GameAnalysis struct {
|
||||
Content string `json:"content"` // 分析描述
|
||||
Thought string `json:"thought"` // 思考过程
|
||||
Rows int `json:"rows"` // 行数
|
||||
Cols int `json:"cols"` // 列数
|
||||
Icons []string `json:"icons"` // 图标类型
|
||||
}
|
||||
|
||||
// 执行查询
|
||||
result, err := querier.Query(ctx, &ai.QueryOptions{
|
||||
Query: "分析这个游戏界面的网格结构和图标类型",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: GameAnalysis{}, // 指定输出格式
|
||||
})
|
||||
|
||||
// 直接类型断言获取结构化数据
|
||||
if gameData, ok := result.Data.(*GameAnalysis); ok {
|
||||
fmt.Printf("行数: %d, 列数: %d\n", gameData.Rows, gameData.Cols)
|
||||
fmt.Printf("图标类型: %v\n", gameData.Icons)
|
||||
}
|
||||
```
|
||||
|
||||
## 应用场景示例
|
||||
|
||||
### UI 元素分析
|
||||
|
||||
```go
|
||||
type UIAnalysis struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Elements []UIElement `json:"elements"`
|
||||
}
|
||||
|
||||
type UIElement struct {
|
||||
Type string `json:"type"` // button, text, input等
|
||||
Text string `json:"text"` // 文本内容
|
||||
BoundBox BoundingBox `json:"boundBox"` // 位置坐标
|
||||
Clickable bool `json:"clickable"` // 是否可点击
|
||||
}
|
||||
|
||||
type BoundingBox struct {
|
||||
X, Y, Width, Height int `json:"x,y,width,height"`
|
||||
}
|
||||
```
|
||||
|
||||
### 网格游戏分析
|
||||
|
||||
```go
|
||||
type GridGame struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Grid [][]Cell `json:"grid"` // 网格数据
|
||||
Stats Statistics `json:"statistics"` // 统计信息
|
||||
}
|
||||
|
||||
type Cell struct {
|
||||
Type string `json:"type"` // 单元格类型
|
||||
Value string `json:"value"` // 单元格值
|
||||
Row int `json:"row"` // 行索引
|
||||
Col int `json:"col"` // 列索引
|
||||
}
|
||||
|
||||
type Statistics struct {
|
||||
TotalCells int `json:"totalCells"`
|
||||
UniqueTypes int `json:"uniqueTypes"`
|
||||
}
|
||||
```
|
||||
|
||||
### 表单数据提取
|
||||
|
||||
```go
|
||||
type FormAnalysis struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Fields []FormField `json:"fields"`
|
||||
Actions []Action `json:"actions"`
|
||||
}
|
||||
|
||||
type FormField struct {
|
||||
Label string `json:"label"` // 字段标签
|
||||
Type string `json:"type"` // 字段类型
|
||||
Value string `json:"value"` // 当前值
|
||||
Required bool `json:"required"` // 是否必填
|
||||
BoundBox BoundingBox `json:"boundBox"` // 位置
|
||||
}
|
||||
```
|
||||
|
||||
## 核心特性
|
||||
|
||||
### 自动类型转换
|
||||
- 指定 `OutputSchema` 时,`QueryResult.Data` 自动转换为指定类型
|
||||
- 支持直接类型断言:`result.Data.(*YourType)`
|
||||
- 无需手动调用转换函数
|
||||
|
||||
### 多级回退机制
|
||||
1. 优先解析为指定的结构化类型
|
||||
2. 失败时尝试通用JSON解析
|
||||
3. 最终回退到纯文本响应
|
||||
|
||||
### 向后兼容
|
||||
- 不指定 `OutputSchema` 时行为不变
|
||||
- 现有代码无需修改
|
||||
|
||||
## 最佳实践
|
||||
|
||||
### 1. 结构体设计
|
||||
|
||||
```go
|
||||
// 推荐:包含标准字段
|
||||
type YourSchema struct {
|
||||
Content string `json:"content"` // 必须:人类可读描述
|
||||
Thought string `json:"thought"` // 必须:AI推理过程
|
||||
// 自定义字段...
|
||||
Data CustomData `json:"data"`
|
||||
}
|
||||
|
||||
// 使用描述性的JSON标签
|
||||
type Element struct {
|
||||
Type string `json:"elementType"` // 清晰的字段名
|
||||
Position Point `json:"gridPosition"` // 描述性标签
|
||||
Visible bool `json:"isVisible"` // 布尔值清晰性
|
||||
}
|
||||
```
|
||||
|
||||
### 2. 查询指令
|
||||
|
||||
```go
|
||||
// 推荐:详细的查询指令
|
||||
opts := &ai.QueryOptions{
|
||||
Query: `分析这张截图并提供结构化信息:
|
||||
1. 识别界面类型和主要元素
|
||||
2. 提取所有可交互元素的位置和属性
|
||||
3. 统计各类元素的数量`,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: YourSchema{},
|
||||
}
|
||||
```
|
||||
|
||||
### 3. 错误处理
|
||||
|
||||
```go
|
||||
result, err := querier.Query(ctx, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 类型断言
|
||||
if data, ok := result.Data.(*YourSchema); ok {
|
||||
// 使用结构化数据
|
||||
processData(data)
|
||||
} else {
|
||||
// 回退到文本结果
|
||||
log.Printf("结构化解析失败,使用文本结果: %s", result.Content)
|
||||
}
|
||||
```
|
||||
|
||||
## 完整示例
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/internal/builtin"
|
||||
"github.com/httprunner/httprunner/v5/uixt/ai"
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
)
|
||||
|
||||
type ScreenAnalysis struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Elements []string `json:"elements"`
|
||||
Categories []string `json:"categories"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
ctx := context.Background()
|
||||
|
||||
// 创建查询器
|
||||
modelConfig, err := ai.GetModelConfig(option.OPENAI_GPT_4O)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
querier, err := ai.NewQuerier(ctx, modelConfig)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// 加载截图
|
||||
screenshot, size, err := builtin.LoadImage("screenshot.png")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// 执行结构化查询
|
||||
result, err := querier.Query(ctx, &ai.QueryOptions{
|
||||
Query: "分析截图中的UI元素,提取元素类型和分类信息",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: ScreenAnalysis{},
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// 使用结构化数据
|
||||
if analysis, ok := result.Data.(*ScreenAnalysis); ok {
|
||||
fmt.Printf("发现 %d 个元素\n", analysis.Count)
|
||||
fmt.Printf("元素类型: %v\n", analysis.Elements)
|
||||
fmt.Printf("分类: %v\n", analysis.Categories)
|
||||
} else {
|
||||
fmt.Printf("文本结果: %s\n", result.Content)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 辅助函数
|
||||
|
||||
对于特殊情况,提供了类型转换辅助函数:
|
||||
|
||||
```go
|
||||
// 手动类型转换(通常不需要)
|
||||
converted, err := ai.ConvertQueryResultData[YourType](result)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
**注意**:使用 `OutputSchema` 时,`Data` 字段已自动转换为正确类型,通常不需要手动调用此函数。
|
||||
|
||||
## 技术限制
|
||||
|
||||
- 需要支持结构化输出的AI模型(如 OpenAI GPT-4)
|
||||
- 复杂嵌套结构需要清晰的查询指令
|
||||
- AI模型可能不总是严格遵循指定格式
|
||||
- UI-TARS 模型使用不同的响应格式处理
|
||||
20
uixt/ai/querier_prompts.go
Normal file
20
uixt/ai/querier_prompts.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package ai
|
||||
|
||||
// Default query system prompt
|
||||
const defaultQueryPrompt = `You are an AI assistant specialized in analyzing images and extracting information. User will provide a screenshot and a query asking for specific information to be extracted from the image. Please analyze the image carefully and provide the requested information.`
|
||||
|
||||
// UI-TARS query response format
|
||||
const uiTarsQueryResponseFormat = `
|
||||
## Output Json String Format
|
||||
` + "```" + `
|
||||
"{
|
||||
"content": "<<is a string containing the extracted information or analysis result>>",
|
||||
"thought": "<<is a string explaining your analysis process and reasoning. Use Chinese.>>"
|
||||
}"
|
||||
` + "```" + `
|
||||
|
||||
## Rules **MUST** follow
|
||||
- Make sure to return **only** the JSON, with **no additional** text or explanations.
|
||||
- Use Chinese in ` + "`Thought`" + ` part.
|
||||
- You **MUST** strictly follow up the **Output Json String Format**.
|
||||
- Provide detailed and accurate information extraction based on the image content.`
|
||||
617
uixt/ai/querier_test.go
Normal file
617
uixt/ai/querier_test.go
Normal file
@@ -0,0 +1,617 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/internal/builtin"
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// Data structures for testing custom output schemas
|
||||
|
||||
// GameIcon represents a single icon in the game grid
|
||||
type GameIcon struct {
|
||||
Name string `json:"name"` // Icon name (e.g., "beach_ball", "glove")
|
||||
Row int `json:"row"` // Row position (0-based)
|
||||
Col int `json:"col"` // Column position (0-based)
|
||||
}
|
||||
|
||||
// GameGrid represents the complete game grid
|
||||
type GameGrid struct {
|
||||
Grid [][]GameIcon `json:"grid"` // 2D array of game icons
|
||||
Rows int `json:"rows"` // Number of rows
|
||||
Cols int `json:"cols"` // Number of columns
|
||||
Icons []string `json:"icons"` // List of unique icon names
|
||||
}
|
||||
|
||||
// LianliankanResponse represents the structured response for lianliankan game analysis
|
||||
type LianliankanResponse struct {
|
||||
Content string `json:"content"` // Description of the analysis
|
||||
Thought string `json:"thought"` // Reasoning process
|
||||
Data GameGrid `json:"data"` // Structured game grid data
|
||||
}
|
||||
|
||||
// SimpleGameInfo represents basic game information
|
||||
type SimpleGameInfo struct {
|
||||
Content string `json:"content"` // Description
|
||||
Thought string `json:"thought"` // Reasoning
|
||||
Rows int `json:"rows"` // Number of rows
|
||||
Cols int `json:"cols"` // Number of columns
|
||||
IconTypes []string `json:"iconTypes"` // List of icon types
|
||||
TotalIcons int `json:"totalIcons"` // Total number of icons
|
||||
}
|
||||
|
||||
// Additional data structures for comprehensive testing
|
||||
|
||||
// GameAnalysisResult represents structured analysis of a game interface
|
||||
type GameAnalysisResult struct {
|
||||
Content string `json:"content"` // Human-readable description
|
||||
Thought string `json:"thought"` // AI reasoning process
|
||||
GameType string `json:"gameType"` // Type of game detected
|
||||
Dimensions Dimensions `json:"dimensions"` // Grid dimensions
|
||||
Elements []Element `json:"elements"` // Game elements detected
|
||||
Statistics Statistics `json:"statistics"` // Game statistics
|
||||
}
|
||||
|
||||
type Dimensions struct {
|
||||
Rows int `json:"rows"` // Number of rows
|
||||
Cols int `json:"cols"` // Number of columns
|
||||
}
|
||||
|
||||
type Element struct {
|
||||
Type string `json:"type"` // Element type/name
|
||||
Position Position `json:"position"` // Position in grid
|
||||
BoundBox BoundingBox `json:"boundBox"` // Pixel coordinates
|
||||
}
|
||||
|
||||
type Position struct {
|
||||
Row int `json:"row"` // Row index (0-based)
|
||||
Col int `json:"col"` // Column index (0-based)
|
||||
}
|
||||
|
||||
type BoundingBox struct {
|
||||
X int `json:"x"` // Left coordinate
|
||||
Y int `json:"y"` // Top coordinate
|
||||
Width int `json:"width"` // Width in pixels
|
||||
Height int `json:"height"` // Height in pixels
|
||||
}
|
||||
|
||||
type Statistics struct {
|
||||
TotalElements int `json:"totalElements"` // Total number of elements
|
||||
UniqueTypes int `json:"uniqueTypes"` // Number of unique element types
|
||||
TypeCounts []TypeCount `json:"typeCounts"` // Count of each type
|
||||
}
|
||||
|
||||
type TypeCount struct {
|
||||
Type string `json:"type"` // Element type
|
||||
Count int `json:"count"` // Number of occurrences
|
||||
}
|
||||
|
||||
// UIElementsResult represents structured analysis of UI elements
|
||||
type UIElementsResult struct {
|
||||
Content string `json:"content"` // Description
|
||||
Thought string `json:"thought"` // Reasoning
|
||||
Elements []UIElement `json:"elements"` // UI elements found
|
||||
Categories []string `json:"categories"` // Categories of elements
|
||||
}
|
||||
|
||||
type UIElement struct {
|
||||
Type string `json:"type"` // Element type (button, text, image, etc.)
|
||||
Text string `json:"text"` // Text content if any
|
||||
Description string `json:"description"` // Element description
|
||||
BoundBox BoundingBox `json:"boundBox"` // Pixel coordinates
|
||||
Clickable bool `json:"clickable"` // Whether element is clickable
|
||||
Visible bool `json:"visible"` // Whether element is visible
|
||||
}
|
||||
|
||||
// Test functions
|
||||
|
||||
func TestParseQueryResult(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
expected *QueryResult
|
||||
}{
|
||||
{
|
||||
name: "valid JSON response",
|
||||
content: `{
|
||||
"content": "这是一个14行8列的连连看游戏界面,包含25种不同的图案",
|
||||
"thought": "通过分析图片,我识别出了游戏界面的结构和图案类型"
|
||||
}`,
|
||||
expected: &QueryResult{
|
||||
Content: "这是一个14行8列的连连看游戏界面,包含25种不同的图案",
|
||||
Thought: "通过分析图片,我识别出了游戏界面的结构和图案类型",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "JSON in markdown",
|
||||
content: "```json\n{\n \"content\": \"游戏界面分析结果\",\n \"thought\": \"分析过程\"\n}\n```",
|
||||
expected: &QueryResult{
|
||||
Content: "游戏界面分析结果",
|
||||
Thought: "分析过程",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "plain text response",
|
||||
content: "这是一个连连看游戏界面,包含多种图案。",
|
||||
expected: &QueryResult{
|
||||
Content: "这是一个连连看游戏界面,包含多种图案。",
|
||||
Thought: "Direct response from model",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid JSON",
|
||||
content: `{"content": "incomplete json", "missing_closing_brace": true`,
|
||||
expected: &QueryResult{
|
||||
Content: `{"content": "incomplete json", "missing_closing_brace": true`,
|
||||
Thought: "Direct response from model",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "malformed JSON that can be extracted but not parsed",
|
||||
content: `{"content": "test", "invalid": }`,
|
||||
expected: &QueryResult{
|
||||
Content: `{"content": "test", "invalid": }`,
|
||||
Thought: "Failed to parse as JSON, returning raw content",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := parseQueryResult(tt.content)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.expected.Content, result.Content)
|
||||
assert.Equal(t, tt.expected.Thought, result.Thought)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func setupTestQuerier(t *testing.T) *Querier {
|
||||
ctx := context.Background()
|
||||
modelConfig, err := GetModelConfig(option.OPENAI_GPT_4O)
|
||||
require.NoError(t, err)
|
||||
querier, err := NewQuerier(ctx, modelConfig)
|
||||
require.NoError(t, err)
|
||||
return querier
|
||||
}
|
||||
|
||||
// TestQueryBasicUsage demonstrates basic query functionality without custom schema
|
||||
func TestQueryBasicUsage(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load screenshot
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Prepare query options
|
||||
opts := &QueryOptions{
|
||||
Query: "这是一张连连看小游戏的界面,请将其转换为一个二维数组,数组中的每个元素包含图案名称及其坐标",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
assert.Nil(t, result.Data) // Should be nil for standard query
|
||||
|
||||
t.Logf("Query Result:")
|
||||
t.Logf("Content: %s", result.Content)
|
||||
t.Logf("Thought: %s", result.Thought)
|
||||
}
|
||||
|
||||
// TestQueryWithCustomSchema tests the query functionality with custom output schema
|
||||
func TestQueryWithCustomSchema(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load test image
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Define custom output schema for lianliankan game
|
||||
outputSchema := LianliankanResponse{}
|
||||
|
||||
// Prepare query options with custom schema
|
||||
opts := &QueryOptions{
|
||||
Query: `这是一张连连看小游戏的界面,请分析游戏界面并返回结构化数据:
|
||||
1. 游戏网格的行数和列数
|
||||
2. 每个位置的图案名称和坐标
|
||||
3. 所有不同类型的图案列表
|
||||
请将结果组织成二维数组格式,每个元素包含图案名称及其坐标位置。`,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: outputSchema,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
t.Logf("Query result content: %s", result.Content)
|
||||
t.Logf("Query result thought: %s", result.Thought)
|
||||
t.Logf("Structured data: %+v", result.Data)
|
||||
|
||||
// Try to parse the structured data
|
||||
if dataMap, ok := result.Data.(map[string]interface{}); ok {
|
||||
if gridData, exists := dataMap["data"]; exists {
|
||||
t.Logf("Game grid data: %+v", gridData)
|
||||
}
|
||||
if rows, exists := dataMap["rows"]; exists {
|
||||
t.Logf("Rows: %v", rows)
|
||||
}
|
||||
if cols, exists := dataMap["cols"]; exists {
|
||||
t.Logf("Cols: %v", cols)
|
||||
}
|
||||
if icons, exists := dataMap["icons"]; exists {
|
||||
t.Logf("Icon Types: %v", icons)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestQueryWithSimpleSchema tests with a simpler custom schema
|
||||
func TestQueryWithSimpleSchema(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load test image
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
outputSchema := SimpleGameInfo{}
|
||||
|
||||
// Prepare query options
|
||||
opts := &QueryOptions{
|
||||
Query: "请分析这个连连看游戏界面,告诉我有多少行多少列,有哪些不同类型的图案,总共有多少个图标",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: outputSchema,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
t.Logf("Simple schema result: %+v", result)
|
||||
}
|
||||
|
||||
// TestQueryWithGameAnalysisSchema tests with comprehensive game analysis schema
|
||||
func TestQueryWithGameAnalysisSchema(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load test image
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
outputSchema := GameAnalysisResult{}
|
||||
|
||||
// Prepare query options
|
||||
opts := &QueryOptions{
|
||||
Query: `Analyze this game interface and provide structured information about:
|
||||
1. The type of game
|
||||
2. Grid dimensions (rows and columns)
|
||||
3. All game elements with their positions and types
|
||||
4. Statistics about element distribution`,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: outputSchema,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
t.Logf("Game analysis result: %+v", result)
|
||||
}
|
||||
|
||||
// TestQueryWithUIElementsSchema tests UI elements analysis
|
||||
func TestQueryWithUIElementsSchema(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load test image
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
outputSchema := UIElementsResult{}
|
||||
|
||||
// Prepare query options
|
||||
opts := &QueryOptions{
|
||||
Query: `Analyze this interface and identify all UI elements including:
|
||||
1. Buttons and their text
|
||||
2. Text labels and content
|
||||
3. Images and icons
|
||||
4. Interactive elements
|
||||
5. Their positions and properties`,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: outputSchema,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
t.Logf("UI elements analysis result: %+v", result)
|
||||
}
|
||||
|
||||
// TestQuerySchemaComparison compares standard vs custom schema queries
|
||||
func TestQuerySchemaComparison(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
query := "请分析这个连连看游戏界面的基本信息"
|
||||
|
||||
// Standard query (without custom schema)
|
||||
t.Run("StandardQuery", func(t *testing.T) {
|
||||
standardOpts := &QueryOptions{
|
||||
Query: query,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
// No OutputSchema specified
|
||||
}
|
||||
|
||||
standardResult, err := querier.Query(context.Background(), standardOpts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, standardResult)
|
||||
assert.NotEmpty(t, standardResult.Content)
|
||||
assert.NotEmpty(t, standardResult.Thought)
|
||||
assert.Nil(t, standardResult.Data) // Should be nil for standard query
|
||||
|
||||
t.Logf("Standard Query Result:")
|
||||
t.Logf("Content: %s", standardResult.Content)
|
||||
t.Logf("Thought: %s", standardResult.Thought)
|
||||
t.Logf("Data: %+v", standardResult.Data)
|
||||
})
|
||||
|
||||
// Custom schema query
|
||||
t.Run("CustomSchemaQuery", func(t *testing.T) {
|
||||
type GameInfo struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Rows int `json:"rows"`
|
||||
Cols int `json:"cols"`
|
||||
Icons []string `json:"icons"`
|
||||
}
|
||||
|
||||
customOpts := &QueryOptions{
|
||||
Query: query,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: GameInfo{},
|
||||
}
|
||||
|
||||
customResult, err := querier.Query(context.Background(), customOpts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, customResult)
|
||||
assert.NotEmpty(t, customResult.Content)
|
||||
assert.NotEmpty(t, customResult.Thought)
|
||||
assert.NotNil(t, customResult.Data) // Should contain structured data
|
||||
|
||||
t.Logf("Custom Schema Query Result:")
|
||||
t.Logf("Content: %s", customResult.Content)
|
||||
t.Logf("Thought: %s", customResult.Thought)
|
||||
t.Logf("Structured Data: %+v", customResult.Data)
|
||||
})
|
||||
}
|
||||
|
||||
// TestQueryWithDifferentPrompts tests various types of queries on the same screenshot
|
||||
func TestQueryWithDifferentPrompts(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load screenshot
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Example queries
|
||||
queries := []string{
|
||||
"请描述这张图片中的内容",
|
||||
"这个游戏界面有多少行多少列?",
|
||||
"请识别图片中所有不同类型的图案",
|
||||
"请找出可以消除的图案对",
|
||||
}
|
||||
|
||||
for i, query := range queries {
|
||||
t.Run(fmt.Sprintf("Query_%d", i+1), func(t *testing.T) {
|
||||
opts := &QueryOptions{
|
||||
Query: query,
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
}
|
||||
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Content)
|
||||
assert.NotEmpty(t, result.Thought)
|
||||
|
||||
t.Logf("Query %d: %s", i+1, query)
|
||||
t.Logf("Answer: %s", result.Content)
|
||||
t.Logf("Reasoning: %s", result.Thought)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestConvertQueryResultData tests the type conversion functionality
|
||||
func TestConvertQueryResultData(t *testing.T) {
|
||||
// Test data structure
|
||||
type TestSchema struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Count int `json:"count"`
|
||||
Items []string `json:"items"`
|
||||
}
|
||||
|
||||
// Create a QueryResult with structured data
|
||||
testData := &TestSchema{
|
||||
Content: "Test content",
|
||||
Thought: "Test thought",
|
||||
Count: 5,
|
||||
Items: []string{"item1", "item2", "item3"},
|
||||
}
|
||||
|
||||
result := &QueryResult{
|
||||
Content: "Test content",
|
||||
Thought: "Test thought",
|
||||
Data: testData,
|
||||
}
|
||||
|
||||
// Test type conversion
|
||||
converted, err := ConvertQueryResultData[TestSchema](result)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, converted)
|
||||
assert.Equal(t, "Test content", converted.Content)
|
||||
assert.Equal(t, "Test thought", converted.Thought)
|
||||
assert.Equal(t, 5, converted.Count)
|
||||
assert.Equal(t, []string{"item1", "item2", "item3"}, converted.Items)
|
||||
|
||||
t.Logf("Successfully converted data: %+v", converted)
|
||||
}
|
||||
|
||||
// TestQueryResultDataConsistency tests that QueryResult.Data matches OutputSchema
|
||||
func TestQueryResultDataConsistency(t *testing.T) {
|
||||
querier := setupTestQuerier(t)
|
||||
|
||||
// Load test image
|
||||
screenshot, size, err := builtin.LoadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Define a simple test schema
|
||||
type TestGameInfo struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Rows int `json:"rows"`
|
||||
Cols int `json:"cols"`
|
||||
Icons []string `json:"icons"`
|
||||
}
|
||||
|
||||
outputSchema := TestGameInfo{}
|
||||
|
||||
// Prepare query options
|
||||
opts := &QueryOptions{
|
||||
Query: "请分析这个连连看游戏界面,告诉我有多少行多少列,有哪些不同类型的图案",
|
||||
Screenshot: screenshot,
|
||||
Size: size,
|
||||
OutputSchema: outputSchema,
|
||||
}
|
||||
|
||||
// Perform query
|
||||
result, err := querier.Query(context.Background(), opts)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotNil(t, result.Data)
|
||||
gameInfo, ok := result.Data.(*TestGameInfo)
|
||||
assert.True(t, ok)
|
||||
assert.NotNil(t, gameInfo)
|
||||
|
||||
// Verify that the converted data has the expected structure
|
||||
assert.NotEmpty(t, gameInfo.Content)
|
||||
assert.NotEmpty(t, gameInfo.Thought)
|
||||
assert.NotEmpty(t, gameInfo.Rows)
|
||||
assert.NotEmpty(t, gameInfo.Cols)
|
||||
assert.NotEmpty(t, gameInfo.Icons)
|
||||
}
|
||||
|
||||
// TestAutoTypeConversion tests that QueryResult.Data is automatically converted to the correct type
|
||||
func TestAutoTypeConversion(t *testing.T) {
|
||||
// Test data structure
|
||||
type TestSchema struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Count int `json:"count"`
|
||||
Items []string `json:"items"`
|
||||
}
|
||||
|
||||
// Simulate a JSON response from the model
|
||||
jsonResponse := `{
|
||||
"content": "Test content from model",
|
||||
"thought": "Test reasoning process",
|
||||
"count": 42,
|
||||
"items": ["apple", "banana", "cherry"]
|
||||
}`
|
||||
|
||||
// Test the parseCustomSchemaResult function directly
|
||||
result, err := parseCustomSchemaResult(jsonResponse, TestSchema{})
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
// Verify that Data is automatically converted to the correct type
|
||||
typedData, ok := result.Data.(*TestSchema)
|
||||
assert.True(t, ok, "Data should be automatically converted to *TestSchema")
|
||||
assert.NotNil(t, typedData)
|
||||
|
||||
// Verify the content
|
||||
assert.Equal(t, "Test content from model", typedData.Content)
|
||||
assert.Equal(t, "Test reasoning process", typedData.Thought)
|
||||
assert.Equal(t, 42, typedData.Count)
|
||||
assert.Equal(t, []string{"apple", "banana", "cherry"}, typedData.Items)
|
||||
|
||||
// Verify that QueryResult fields are also populated
|
||||
assert.Equal(t, "Test content from model", result.Content)
|
||||
assert.Equal(t, "Test reasoning process", result.Thought)
|
||||
|
||||
t.Logf("Auto-converted data: %+v", typedData)
|
||||
}
|
||||
|
||||
// TestDirectTypeAssertion tests that users can directly use type assertion on QueryResult.Data
|
||||
func TestDirectTypeAssertion(t *testing.T) {
|
||||
// Test data structure
|
||||
type GameInfo struct {
|
||||
Content string `json:"content"`
|
||||
Thought string `json:"thought"`
|
||||
Rows int `json:"rows"`
|
||||
Cols int `json:"cols"`
|
||||
Icons []string `json:"icons"`
|
||||
}
|
||||
|
||||
// Simulate a JSON response
|
||||
jsonResponse := `{
|
||||
"content": "Game analysis complete",
|
||||
"thought": "Analyzed the game grid structure",
|
||||
"rows": 8,
|
||||
"cols": 10,
|
||||
"icons": ["apple", "banana", "cherry", "grape"]
|
||||
}`
|
||||
|
||||
// Test the parseCustomSchemaResult function
|
||||
result, err := parseCustomSchemaResult(jsonResponse, GameInfo{})
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotNil(t, result.Data)
|
||||
|
||||
// Users can now directly use type assertion
|
||||
if gameInfo, ok := result.Data.(*GameInfo); ok {
|
||||
assert.Equal(t, "Game analysis complete", gameInfo.Content)
|
||||
assert.Equal(t, "Analyzed the game grid structure", gameInfo.Thought)
|
||||
assert.Equal(t, 8, gameInfo.Rows)
|
||||
assert.Equal(t, 10, gameInfo.Cols)
|
||||
assert.Equal(t, []string{"apple", "banana", "cherry", "grape"}, gameInfo.Icons)
|
||||
t.Logf("Direct type assertion successful: %+v", gameInfo)
|
||||
} else {
|
||||
t.Fatalf("Type assertion failed, Data type: %T", result.Data)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,17 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
)
|
||||
|
||||
// extractJSONFromContent extracts JSON content from various formats in the response
|
||||
@@ -102,3 +110,29 @@ func extractJSONFromContent(content string) string {
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// callModelWithLogging is a common function to call model with logging and timing
|
||||
// It handles the common pattern of:
|
||||
// 1. Log request
|
||||
// 2. Start timing
|
||||
// 3. Call model.Generate
|
||||
// 4. Log timing and model info
|
||||
// 5. Log response
|
||||
func callModelWithLogging(ctx context.Context, model model.ToolCallingChatModel, history ConversationHistory, modelType option.LLMServiceType, operation string) (*schema.Message, error) {
|
||||
logRequest(history)
|
||||
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
log.Debug().Float64("elapsed(s)", time.Since(startTime).Seconds()).
|
||||
Str("model", string(modelType)).
|
||||
Msgf("call model service for %s", operation)
|
||||
}()
|
||||
|
||||
message, err := model.Generate(ctx, history)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logResponse(message)
|
||||
return message, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user