mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-11 18:11:21 +08:00
541 lines
17 KiB
Go
541 lines
17 KiB
Go
package ai
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
|
|
"github.com/cloudwego/eino-ext/components/model/ark"
|
|
"github.com/cloudwego/eino/components/model"
|
|
"github.com/cloudwego/eino/schema"
|
|
"github.com/getkin/kin-openapi/openapi3gen"
|
|
"github.com/pkg/errors"
|
|
arkModel "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
|
|
|
|
"github.com/httprunner/httprunner/v5/code"
|
|
"github.com/httprunner/httprunner/v5/internal/json"
|
|
"github.com/httprunner/httprunner/v5/uixt/option"
|
|
"github.com/httprunner/httprunner/v5/uixt/types"
|
|
)
|
|
|
|
// IQuerier interface defines the contract for query operations
|
|
type IQuerier interface {
|
|
Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error)
|
|
}
|
|
|
|
// QueryOptions represents the input options for query
|
|
type QueryOptions struct {
|
|
Query string `json:"query"` // The query text to extract information
|
|
Screenshot string `json:"screenshot"` // Base64 encoded screenshot
|
|
Size types.Size `json:"size"` // Screen dimensions
|
|
OutputSchema interface{} `json:"outputSchema,omitempty"` // Custom output schema for structured response
|
|
}
|
|
|
|
// QueryResult represents the response from an AI query
|
|
type QueryResult struct {
|
|
Content string `json:"content"` // The extracted content/information
|
|
Thought string `json:"thought"` // The reasoning process
|
|
Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
|
|
ModelName string `json:"model_name"` // model name used for query
|
|
Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
|
|
}
|
|
|
|
// Querier handles query operations using different AI models
|
|
type Querier struct {
|
|
modelConfig *ModelConfig
|
|
model model.ToolCallingChatModel
|
|
systemPrompt string
|
|
history ConversationHistory
|
|
}
|
|
|
|
// NewQuerier creates a new Querier instance
|
|
func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error) {
|
|
querier := &Querier{
|
|
modelConfig: modelConfig,
|
|
systemPrompt: defaultQueryPrompt,
|
|
}
|
|
|
|
if option.IS_UI_TARS(modelConfig.ModelType) {
|
|
querier.systemPrompt += "\n" + uiTarsQueryResponseFormat
|
|
} else {
|
|
// define default output format
|
|
type OutputFormat struct {
|
|
Content string `json:"content"`
|
|
Thought string `json:"thought"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(&OutputFormat{}, nil)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
|
}
|
|
// set structured response format
|
|
modelConfig.ChatModelConfig.ResponseFormat = &ark.ResponseFormat{
|
|
Type: arkModel.ResponseFormatJSONSchema,
|
|
JSONSchema: &arkModel.ResponseFormatJSONSchemaJSONSchemaParam{
|
|
Name: "query_result",
|
|
Description: "data that describes query result",
|
|
Schema: outputFormatSchema.Value,
|
|
Strict: false,
|
|
},
|
|
}
|
|
}
|
|
|
|
var err error
|
|
querier.model, err = ark.NewChatModel(ctx, modelConfig.ChatModelConfig)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
|
}
|
|
|
|
return querier, nil
|
|
}
|
|
|
|
// callModelWithLogging calls the model with automatic logging and timing
|
|
|
|
// Query performs the information extraction from the screenshot
|
|
func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (result *QueryResult, err error) {
|
|
// Validate input parameters
|
|
if err := validateQueryInput(opts); err != nil {
|
|
return nil, errors.Wrap(err, "validate query parameters failed")
|
|
}
|
|
|
|
// Handle custom output schema if provided
|
|
if opts.OutputSchema != nil {
|
|
return q.queryWithCustomSchema(ctx, opts)
|
|
}
|
|
|
|
// Reset history for each new query
|
|
q.history = ConversationHistory{
|
|
{
|
|
Role: schema.System,
|
|
Content: q.systemPrompt,
|
|
},
|
|
}
|
|
|
|
// Create user message with screenshot and query
|
|
userMsg := &schema.Message{
|
|
Role: schema.User,
|
|
MultiContent: []schema.ChatMessagePart{
|
|
{
|
|
Type: schema.ChatMessagePartTypeImageURL,
|
|
ImageURL: &schema.ChatMessageImageURL{
|
|
URL: opts.Screenshot,
|
|
Detail: schema.ImageURLDetailAuto,
|
|
},
|
|
},
|
|
{
|
|
Type: schema.ChatMessagePartTypeText,
|
|
Text: fmt.Sprintf(`
|
|
Here is the query. Please extract the requested information from the screenshot.
|
|
=====================================
|
|
%s
|
|
=====================================
|
|
`, opts.Query),
|
|
},
|
|
},
|
|
}
|
|
|
|
// Append user message to history
|
|
q.history.Append(userMsg)
|
|
|
|
// Call model service with logging
|
|
message, err := callModelWithLogging(ctx, q.model, q.history,
|
|
q.modelConfig.ModelType, "query")
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
|
}
|
|
|
|
defer func() {
|
|
// Extract usage information if available
|
|
if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
|
|
result.Usage = message.ResponseMeta.Usage
|
|
}
|
|
}()
|
|
|
|
// Parse result
|
|
result, err = parseQueryResult(message.Content, q.modelConfig.ModelType)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
|
|
}
|
|
|
|
// Append assistant message to history
|
|
q.history.Append(&schema.Message{
|
|
Role: schema.Assistant,
|
|
Content: message.Content,
|
|
})
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// validateQueryInput validates the input parameters for query
|
|
func validateQueryInput(opts *QueryOptions) error {
|
|
if opts.Query == "" {
|
|
return errors.Wrap(code.LLMPrepareRequestError, "query text is required")
|
|
}
|
|
if opts.Screenshot == "" {
|
|
return errors.Wrap(code.LLMPrepareRequestError, "screenshot is required")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// parseQueryResult parses the model response into QueryResult
|
|
func parseQueryResult(content string, modelType option.LLMServiceType) (*QueryResult, error) {
|
|
var result QueryResult
|
|
|
|
// Use the generic structured response parser with enhanced error recovery
|
|
if err := parseStructuredResponse(content, &result); err != nil {
|
|
// If parseStructuredResponse fails completely, treat content as plain text
|
|
return &QueryResult{
|
|
Content: content,
|
|
Thought: "Failed to parse response, returning raw content",
|
|
ModelName: string(modelType),
|
|
}, nil
|
|
}
|
|
|
|
result.ModelName = string(modelType)
|
|
return &result, nil
|
|
}
|
|
|
|
// queryWithCustomSchema performs query with custom output schema
|
|
func (q *Querier) queryWithCustomSchema(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
|
|
// Create a new model config with custom schema
|
|
modelConfig := *q.modelConfig
|
|
|
|
if !option.IS_UI_TARS(modelConfig.ModelType) {
|
|
// Generate schema from the provided output schema
|
|
outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(opts.OutputSchema, nil)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
|
}
|
|
|
|
// Create custom response format with the provided schema
|
|
modelConfig.ChatModelConfig.ResponseFormat = &ark.ResponseFormat{
|
|
Type: arkModel.ResponseFormatJSONSchema,
|
|
JSONSchema: &arkModel.ResponseFormatJSONSchemaJSONSchemaParam{
|
|
Name: "custom_query_result",
|
|
Description: "custom structured data response",
|
|
Schema: outputFormatSchema.Value,
|
|
Strict: false,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Create a new model instance with custom schema
|
|
model, err := ark.NewChatModel(ctx, modelConfig.ChatModelConfig)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMPrepareRequestError, err.Error())
|
|
}
|
|
|
|
// Reset history for each new query
|
|
systemPrompt := q.systemPrompt
|
|
if option.IS_UI_TARS(modelConfig.ModelType) {
|
|
systemPrompt += "\n" + uiTarsQueryResponseFormat
|
|
} else {
|
|
// Add instruction for structured output
|
|
systemPrompt += "\n\nPlease respond with structured data according to the specified schema. Include both the structured data and your reasoning process."
|
|
}
|
|
|
|
history := ConversationHistory{
|
|
{
|
|
Role: schema.System,
|
|
Content: systemPrompt,
|
|
},
|
|
}
|
|
|
|
// Create user message with screenshot and query
|
|
userMsg := &schema.Message{
|
|
Role: schema.User,
|
|
MultiContent: []schema.ChatMessagePart{
|
|
{
|
|
Type: schema.ChatMessagePartTypeImageURL,
|
|
ImageURL: &schema.ChatMessageImageURL{
|
|
URL: opts.Screenshot,
|
|
Detail: schema.ImageURLDetailAuto,
|
|
},
|
|
},
|
|
{
|
|
Type: schema.ChatMessagePartTypeText,
|
|
Text: fmt.Sprintf(`
|
|
Here is the query. Please extract the requested information from the screenshot and return it in the specified structured format.
|
|
=====================================
|
|
%s
|
|
=====================================
|
|
`, opts.Query),
|
|
},
|
|
},
|
|
}
|
|
|
|
// Append user message to history
|
|
history.Append(userMsg)
|
|
|
|
// Call model service with logging
|
|
message, err := callModelWithLogging(ctx, model, history, modelConfig.ModelType, "custom schema query")
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
|
|
}
|
|
|
|
// Parse result with custom schema
|
|
result, err := parseCustomSchemaResult(message.Content, opts.OutputSchema)
|
|
if err != nil {
|
|
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
|
|
}
|
|
|
|
// Append assistant message to history
|
|
q.history.Append(&schema.Message{
|
|
Role: schema.Assistant,
|
|
Content: message.Content,
|
|
})
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// setDefaultFieldValue sets a default value for a field in the structured data using reflection
|
|
func setDefaultFieldValue(structValue reflect.Value, fieldName, defaultValue string) {
|
|
if field := structValue.FieldByName(fieldName); field.IsValid() && field.CanSet() && field.Kind() == reflect.String {
|
|
field.SetString(defaultValue)
|
|
}
|
|
}
|
|
|
|
// ensureDefaultValues ensures that Content and Thought fields have default values if empty
|
|
func ensureDefaultValues(result *QueryResult, structuredData interface{}) {
|
|
const (
|
|
defaultContent = "Structured data extracted successfully"
|
|
defaultThought = "Parsed structured response according to custom schema"
|
|
)
|
|
|
|
// Set defaults for QueryResult
|
|
if result.Content == "" {
|
|
result.Content = defaultContent
|
|
}
|
|
if result.Thought == "" {
|
|
result.Thought = defaultThought
|
|
}
|
|
|
|
// Set defaults in structured data if it's a pointer to struct
|
|
if structuredData != nil {
|
|
if structValue := reflect.ValueOf(structuredData); structValue.Kind() == reflect.Ptr {
|
|
if elem := structValue.Elem(); elem.IsValid() && elem.Kind() == reflect.Struct {
|
|
if result.Content == defaultContent {
|
|
setDefaultFieldValue(elem, "Content", defaultContent)
|
|
}
|
|
if result.Thought == defaultThought {
|
|
setDefaultFieldValue(elem, "Thought", defaultThought)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// parseCustomSchemaResult parses the model response with custom schema
|
|
func parseCustomSchemaResult(content string, outputSchema interface{}) (*QueryResult, error) {
|
|
// Extract JSON content from response
|
|
jsonContent := extractJSONFromContent(content)
|
|
if jsonContent == "" {
|
|
// If no JSON found, treat the entire content as the result
|
|
return &QueryResult{
|
|
Content: content,
|
|
Thought: "Direct response from model",
|
|
}, nil
|
|
}
|
|
|
|
// Handle OpenAI structured output properties wrapper
|
|
actualJSONContent := unwrapPropertiesIfNeeded(jsonContent)
|
|
|
|
// Try direct unmarshaling first (most efficient)
|
|
if result, err := tryDirectUnmarshal(actualJSONContent, outputSchema); err == nil {
|
|
return result, nil
|
|
}
|
|
|
|
// Fallback: try generic parsing and conversion
|
|
if result, err := tryGenericParsingAndConversion(actualJSONContent, outputSchema); err == nil {
|
|
return result, nil
|
|
}
|
|
|
|
// Final fallback: treat as plain text
|
|
return &QueryResult{
|
|
Content: content,
|
|
Thought: "Failed to parse as structured data, returning raw content",
|
|
}, nil
|
|
}
|
|
|
|
// unwrapPropertiesIfNeeded handles OpenAI structured output properties wrapper
|
|
func unwrapPropertiesIfNeeded(jsonContent string) string {
|
|
var tempMap map[string]interface{}
|
|
if err := json.Unmarshal([]byte(jsonContent), &tempMap); err == nil {
|
|
if properties, exists := tempMap["properties"]; exists {
|
|
if propertiesBytes, err := json.Marshal(properties); err == nil {
|
|
return string(propertiesBytes)
|
|
}
|
|
}
|
|
}
|
|
return jsonContent
|
|
}
|
|
|
|
// tryDirectUnmarshal attempts to unmarshal directly into the schema type
|
|
func tryDirectUnmarshal(jsonContent string, outputSchema interface{}) (*QueryResult, error) {
|
|
// Create a new instance of the schema type
|
|
newInstance := createSchemaInstance(outputSchema)
|
|
|
|
// Try to unmarshal directly into the schema type
|
|
if err := json.Unmarshal([]byte(jsonContent), newInstance); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Create result with the typed data
|
|
result := &QueryResult{Data: newInstance}
|
|
|
|
// Extract content and thought fields
|
|
extractContentAndThoughtFromStruct(result, newInstance)
|
|
if result.Content == "" && result.Thought == "" {
|
|
extractContentAndThoughtFromJSON(result, jsonContent)
|
|
}
|
|
|
|
// Ensure default values are set
|
|
ensureDefaultValues(result, newInstance)
|
|
return result, nil
|
|
}
|
|
|
|
// tryGenericParsingAndConversion attempts generic parsing and type conversion
|
|
func tryGenericParsingAndConversion(jsonContent string, outputSchema interface{}) (*QueryResult, error) {
|
|
var structuredData interface{}
|
|
if err := json.Unmarshal([]byte(jsonContent), &structuredData); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Try to convert to the expected schema type
|
|
if convertedData, err := convertToSchemaType(structuredData, outputSchema); err == nil {
|
|
result := &QueryResult{Data: convertedData}
|
|
extractContentAndThoughtFromMap(result, structuredData)
|
|
ensureDefaultValues(result, convertedData)
|
|
return result, nil
|
|
}
|
|
|
|
// If conversion failed, store the generic data
|
|
if dataMap, ok := structuredData.(map[string]interface{}); ok {
|
|
result := &QueryResult{Data: structuredData}
|
|
extractContentAndThoughtFromMap(result, dataMap)
|
|
ensureDefaultValues(result, nil)
|
|
return result, nil
|
|
}
|
|
|
|
return nil, errors.New("failed to parse structured data")
|
|
}
|
|
|
|
// createSchemaInstance creates a new instance of the schema type
|
|
func createSchemaInstance(outputSchema interface{}) interface{} {
|
|
schemaType := reflect.TypeOf(outputSchema)
|
|
if schemaType.Kind() == reflect.Ptr {
|
|
schemaType = schemaType.Elem()
|
|
}
|
|
return reflect.New(schemaType).Interface()
|
|
}
|
|
|
|
// extractContentAndThoughtFromStruct extracts content and thought from struct fields using reflection
|
|
func extractContentAndThoughtFromStruct(result *QueryResult, structData interface{}) {
|
|
schemaValue := reflect.ValueOf(structData).Elem()
|
|
|
|
if contentField := schemaValue.FieldByName("Content"); contentField.IsValid() && contentField.Kind() == reflect.String {
|
|
result.Content = contentField.String()
|
|
}
|
|
|
|
if thoughtField := schemaValue.FieldByName("Thought"); thoughtField.IsValid() && thoughtField.Kind() == reflect.String {
|
|
result.Thought = thoughtField.String()
|
|
}
|
|
}
|
|
|
|
// extractContentAndThoughtFromJSON extracts content and thought from JSON map
|
|
func extractContentAndThoughtFromJSON(result *QueryResult, jsonContent string) {
|
|
var dataMap map[string]interface{}
|
|
if err := json.Unmarshal([]byte(jsonContent), &dataMap); err == nil {
|
|
extractContentAndThoughtFromMap(result, dataMap)
|
|
}
|
|
}
|
|
|
|
// extractContentAndThoughtFromMap extracts content and thought from a map
|
|
func extractContentAndThoughtFromMap(result *QueryResult, dataMap interface{}) {
|
|
if mapData, ok := dataMap.(map[string]interface{}); ok {
|
|
if content, exists := mapData["content"]; exists {
|
|
if contentStr, ok := content.(string); ok {
|
|
result.Content = contentStr
|
|
}
|
|
}
|
|
if thought, exists := mapData["thought"]; exists {
|
|
if thoughtStr, ok := thought.(string); ok {
|
|
result.Thought = thoughtStr
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// convertToSchemaType converts generic data to the specified schema type
|
|
func convertToSchemaType(data interface{}, outputSchema interface{}) (interface{}, error) {
|
|
// Get the type of the output schema
|
|
schemaType := reflect.TypeOf(outputSchema)
|
|
if schemaType.Kind() == reflect.Ptr {
|
|
schemaType = schemaType.Elem()
|
|
}
|
|
|
|
// Create a new instance of the schema type
|
|
newInstance := reflect.New(schemaType).Interface()
|
|
|
|
// Convert via JSON marshaling/unmarshaling
|
|
jsonData, err := json.Marshal(data)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to marshal data to JSON")
|
|
}
|
|
|
|
if err := json.Unmarshal(jsonData, newInstance); err != nil {
|
|
return nil, errors.Wrap(err, "failed to unmarshal data to target type")
|
|
}
|
|
|
|
return newInstance, nil
|
|
}
|
|
|
|
// ConvertQueryResultData converts QueryResult.Data to the specified type T
|
|
// This is a helper function for type-safe conversion of the structured data
|
|
//
|
|
// Note: When using QueryOptions.OutputSchema, the Data field is automatically
|
|
// converted to the correct type, so this function is typically not needed.
|
|
// This function is mainly useful for:
|
|
// 1. Converting data when OutputSchema was not used
|
|
// 2. Converting to a different type than the original OutputSchema
|
|
// 3. Handling legacy code or edge cases
|
|
func ConvertQueryResultData[T any](result *QueryResult) (*T, error) {
|
|
if result.Data == nil {
|
|
return nil, errors.New("no structured data available")
|
|
}
|
|
|
|
// If Data is already of the correct type, return it directly
|
|
if typedData, ok := result.Data.(*T); ok {
|
|
return typedData, nil
|
|
}
|
|
|
|
// If Data is a pointer to the correct type, dereference and return
|
|
if reflect.TypeOf(result.Data).Kind() == reflect.Ptr {
|
|
if typedData, ok := result.Data.(*T); ok {
|
|
return typedData, nil
|
|
}
|
|
// Try to get the value that the pointer points to
|
|
dataValue := reflect.ValueOf(result.Data)
|
|
if dataValue.Kind() == reflect.Ptr && !dataValue.IsNil() {
|
|
elem := dataValue.Elem()
|
|
if elem.Type() == reflect.TypeOf((*T)(nil)).Elem() {
|
|
typedData := elem.Interface().(T)
|
|
return &typedData, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: try to convert via JSON marshaling/unmarshaling
|
|
jsonData, err := json.Marshal(result.Data)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to marshal data to JSON")
|
|
}
|
|
|
|
var converted T
|
|
if err := json.Unmarshal(jsonData, &converted); err != nil {
|
|
return nil, errors.Wrap(err, "failed to unmarshal data to target type")
|
|
}
|
|
|
|
return &converted, nil
|
|
}
|