
diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go
index 58880c51..2760f6ff 100644
--- a/uixt/ai/asserter.go
+++ b/uixt/ai/asserter.go
@@ -30,8 +30,10 @@ type AssertOptions struct {
// AssertionResult represents the response from an AI assertion
type AssertionResult struct {
- Pass bool `json:"pass"`
- Thought string `json:"thought"`
+ Pass bool `json:"pass"`
+ Thought string `json:"thought"`
+ ModelName string `json:"model_name"` // model name used for assertion
+ Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
// Asserter handles assertion using different AI models
@@ -85,7 +87,7 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro
}
// Assert performs the assertion check on the screenshot
-func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error) {
+func (a *Asserter) Assert(ctx context.Context, opts *AssertOptions) (result *AssertionResult, err error) {
// Validate input parameters
if err := validateAssertionInput(opts); err != nil {
return nil, errors.Wrap(err, "validate assertion parameters failed")
@@ -132,8 +134,15 @@ Here is the assertion. Please tell whether it is truthy according to the screens
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
+ defer func() {
+ // Extract usage information if available
+ if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
+ result.Usage = message.ResponseMeta.Usage
+ }
+ }()
+
// Parse result
- result, err := parseAssertionResult(message.Content)
+ result, err = parseAssertionResult(message.Content, a.modelConfig.ModelType)
if err != nil {
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
@@ -159,7 +168,7 @@ func validateAssertionInput(opts *AssertOptions) error {
}
// parseAssertionResult parses the model response into AssertionResponse
-func parseAssertionResult(content string) (*AssertionResult, error) {
+func parseAssertionResult(content string, modelType option.LLMServiceType) (*AssertionResult, error) {
var result AssertionResult
// Use the generic structured response parser
@@ -170,5 +179,6 @@ func parseAssertionResult(content string) (*AssertionResult, error) {
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
+ result.ModelName = string(modelType)
return &result, nil
}
diff --git a/uixt/ai/asserter_test.go b/uixt/ai/asserter_test.go
index d9bdaaba..18fd3cdd 100644
--- a/uixt/ai/asserter_test.go
+++ b/uixt/ai/asserter_test.go
@@ -136,7 +136,7 @@ func TestParseAssertionResult(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- result, err := parseAssertionResult(tt.input)
+ result, err := parseAssertionResult(tt.input, option.DOUBAO_1_5_UI_TARS_250328)
if tt.shouldSucceed {
require.NoError(t, err)
assert.NotNil(t, result)
diff --git a/uixt/ai/querier.go b/uixt/ai/querier.go
index 75d5dbc3..54194fad 100644
--- a/uixt/ai/querier.go
+++ b/uixt/ai/querier.go
@@ -32,9 +32,11 @@ type QueryOptions struct {
// QueryResult represents the response from an AI query
type QueryResult struct {
- Content string `json:"content"` // The extracted content/information
- Thought string `json:"thought"` // The reasoning process
- Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
+ Content string `json:"content"` // The extracted content/information
+ Thought string `json:"thought"` // The reasoning process
+ Data interface{} `json:"data,omitempty"` // Structured data when OutputSchema is provided
+ ModelName string `json:"model_name"` // model name used for query
+ Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
}
// Querier handles query operations using different AI models
@@ -89,7 +91,7 @@ func NewQuerier(ctx context.Context, modelConfig *ModelConfig) (*Querier, error)
// callModelWithLogging calls the model with automatic logging and timing
// Query performs the information extraction from the screenshot
-func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
+func (q *Querier) Query(ctx context.Context, opts *QueryOptions) (result *QueryResult, err error) {
// Validate input parameters
if err := validateQueryInput(opts); err != nil {
return nil, errors.Wrap(err, "validate query parameters failed")
@@ -141,8 +143,15 @@ Here is the query. Please extract the requested information from the screenshot.
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
+ defer func() {
+ // Extract usage information if available
+ if message.ResponseMeta != nil && message.ResponseMeta.Usage != nil {
+ result.Usage = message.ResponseMeta.Usage
+ }
+ }()
+
// Parse result
- result, err := parseQueryResult(message.Content)
+ result, err = parseQueryResult(message.Content, q.modelConfig.ModelType)
if err != nil {
return nil, errors.Wrap(code.LLMParseQueryResponseError, err.Error())
}
@@ -168,18 +177,20 @@ func validateQueryInput(opts *QueryOptions) error {
}
// parseQueryResult parses the model response into QueryResult
-func parseQueryResult(content string) (*QueryResult, error) {
+func parseQueryResult(content string, modelType option.LLMServiceType) (*QueryResult, error) {
var result QueryResult
// Use the generic structured response parser with enhanced error recovery
if err := parseStructuredResponse(content, &result); err != nil {
// If parseStructuredResponse fails completely, treat content as plain text
return &QueryResult{
- Content: content,
- Thought: "Failed to parse response, returning raw content",
+ Content: content,
+ Thought: "Failed to parse response, returning raw content",
+ ModelName: string(modelType),
}, nil
}
+ result.ModelName = string(modelType)
return &result, nil
}
diff --git a/uixt/ai/querier_test.go b/uixt/ai/querier_test.go
index 3793f7bf..99a0fed1 100644
--- a/uixt/ai/querier_test.go
+++ b/uixt/ai/querier_test.go
@@ -87,7 +87,6 @@ func loadTestImage(t *testing.T, path string) (string, types.Size) {
}
// Test functions
-
func TestParseQueryResult(t *testing.T) {
tests := []struct {
name string
@@ -130,7 +129,7 @@ func TestParseQueryResult(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- result, err := parseQueryResult(tt.content)
+ result, err := parseQueryResult(tt.content, option.DOUBAO_1_5_UI_TARS_250328)
assert.NoError(t, err)
assert.Equal(t, tt.expected.Content, result.Content)
assert.Equal(t, tt.expected.Thought, result.Thought)
diff --git a/uixt/driver_ext_ai.go b/uixt/driver_ext_ai.go
index f7f21009..e6aaf723 100644
--- a/uixt/driver_ext_ai.go
+++ b/uixt/driver_ext_ai.go
@@ -155,8 +155,6 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
ScreenshotElapsed: screenshotElapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
- ModelName: planningResult.ModelName,
- Usage: planningResult.Usage,
PlanningResult: &planningResult.PlanningResult,
Thought: planningResult.Thought,
Content: planningResult.Content,
@@ -331,13 +329,11 @@ type PlanningExecutionResult struct {
// AIExecutionResult represents a unified result structure for all AI operations
type AIExecutionResult struct {
- Type string `json:"type"` // operation type: "query", "action", "assert"
- ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
- ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
- ImagePath string `json:"image_path"` // path to screenshot used for operation
- Resolution *types.Size `json:"resolution"` // screen resolution
- ModelName string `json:"model_name"` // model name used for operation
- Usage *schema.TokenUsage `json:"usage,omitempty"` // token usage statistics
+ Type string `json:"type"` // operation type: "query", "action", "assert"
+ ModelCallElapsed int64 `json:"model_call_elapsed"` // model call elapsed time in milliseconds
+ ScreenshotElapsed int64 `json:"screenshot_elapsed"` // screenshot elapsed time in milliseconds
+ ImagePath string `json:"image_path"` // path to screenshot used for operation
+ Resolution *types.Size `json:"resolution"` // screen resolution
// Operation-specific results (only one will be populated based on Type)
QueryResult *ai.QueryResult `json:"query_result,omitempty"` // for ai_query operations