diff --git a/internal/version/VERSION b/internal/version/VERSION index 1c93a107..75cabaad 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2505221534 +v5.0.0-beta-2505221822 diff --git a/uixt/ai/asserter.go b/uixt/ai/asserter.go index e2654aec..6103a593 100644 --- a/uixt/ai/asserter.go +++ b/uixt/ai/asserter.go @@ -54,8 +54,8 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro } if modelConfig.ModelType == option.LLMServiceTypeUITARS { - asserter.systemPrompt += "\n\n" + uiTarsAssertionResponseFormat - } else if modelConfig.ModelType == option.LLMServiceTypeDoubaoVL { + asserter.systemPrompt += "\n" + uiTarsAssertionResponseFormat + } else { // define output format type OutputFormat struct { Thought string `json:"thought"` @@ -77,8 +77,6 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro Strict: false, }, } - } else { - asserter.systemPrompt += "\n\n" + defaultAssertionResponseJsonFormat } var err error @@ -134,16 +132,16 @@ Here is the assertion. Please tell whether it is truthy according to the screens // Call model service, generate response logRequest(a.history) startTime := time.Now() - resp, err := a.model.Generate(ctx, a.history) + message, err := a.model.Generate(ctx, a.history) log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()). Str("model", string(a.modelConfig.ModelType)).Msg("call model service for assertion") if err != nil { return nil, errors.Wrap(code.LLMRequestServiceError, err.Error()) } - logResponse(resp) + logResponse(message) // Parse result - result, err := parseAssertionResult(resp.Content) + result, err := parseAssertionResult(message.Content) if err != nil { return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error()) } @@ -151,7 +149,7 @@ Here is the assertion. Please tell whether it is truthy according to the screens // Append assistant message to history a.history.Append(&schema.Message{ Role: schema.Assistant, - Content: resp.Content, + Content: message.Content, }) return result, nil diff --git a/uixt/ai/asserter_prompts.go b/uixt/ai/asserter_prompts.go index 9ceb092d..1f661a87 100644 --- a/uixt/ai/asserter_prompts.go +++ b/uixt/ai/asserter_prompts.go @@ -3,15 +3,9 @@ package ai // Default assertion system prompt const defaultAssertionPrompt = `You are a senior testing engineer. User will give an assertion and a screenshot of a page. By carefully viewing the screenshot, please tell whether the assertion is truthy.` -// Default assertion response format -const defaultAssertionResponseJsonFormat = `Return in the following JSON format: -{ - pass: boolean, // whether the assertion is truthy - thought: string | null, // string, if the result is falsy, give the reason why it is falsy. Otherwise, put null. -}` - // UI-TARS assertion response format -const uiTarsAssertionResponseFormat = `## Output Json String Format +const uiTarsAssertionResponseFormat = ` +## Output Json String Format ` + "```" + ` "{ "pass": <>, diff --git a/uixt/ai/planner_parser.go b/uixt/ai/planner_parser.go index 79d475e8..62ceb241 100644 --- a/uixt/ai/planner_parser.go +++ b/uixt/ai/planner_parser.go @@ -27,14 +27,10 @@ func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser { return &UITARSContentParser{ systemPrompt: uiTarsPlanningPrompt, } - case option.LLMServiceTypeDoubaoVL: + default: return &JSONContentParser{ systemPrompt: defaultPlanningResponseJsonFormat, } - default: - return &DefaultContentParser{ - systemPrompt: defaultPlanningResponseStringFormat, - } } } @@ -433,50 +429,3 @@ func normalizeAction(action *ParsedAction) error { return nil } - -// DefaultContentParser parses the response as string format -type DefaultContentParser struct { - systemPrompt string -} - -func (p *DefaultContentParser) SystemPrompt() string { - return p.systemPrompt -} - -func (p *DefaultContentParser) Parse(content string, size types.Size) (*PlanningResult, error) { - content = strings.TrimSpace(content) - if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") { - content = strings.TrimPrefix(content, "```json") - content = strings.TrimSuffix(content, "```") - } - content = strings.TrimSpace(content) - - var response PlanningResult - if err := json.Unmarshal([]byte(content), &response); err != nil { - return nil, fmt.Errorf("failed to parse VLM response: %v", err) - } - - if response.Error != "" { - return nil, errors.New(response.Error) - } - - if len(response.NextActions) == 0 { - return nil, errors.New("no actions returned from VLM") - } - - // normalize actions - var normalizedActions []ParsedAction - for i := range response.NextActions { - // create a new variable, avoid implicit memory aliasing in for loop. - action := response.NextActions[i] - if err := normalizeAction(&action); err != nil { - return nil, errors.Wrap(err, "failed to normalize action") - } - normalizedActions = append(normalizedActions, action) - } - - return &PlanningResult{ - NextActions: normalizedActions, - ActionSummary: response.ActionSummary, - }, nil -} diff --git a/uixt/ai/planner_prompts.go b/uixt/ai/planner_prompts.go index 65ae6fa6..e9dcf91d 100644 --- a/uixt/ai/planner_prompts.go +++ b/uixt/ai/planner_prompts.go @@ -38,7 +38,3 @@ Thought: ... Action: ... ` + "```" + ` ` - -const defaultPlanningResponseStringFormat = ` -You are a helpful assistant. -`