refactor: json asserter

This commit is contained in:
lilong.129
2025-05-22 18:11:47 +08:00
parent c377664518
commit 3b77ade24f
5 changed files with 10 additions and 73 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2505221534
v5.0.0-beta-2505221822

View File

@@ -54,8 +54,8 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro
}
if modelConfig.ModelType == option.LLMServiceTypeUITARS {
asserter.systemPrompt += "\n\n" + uiTarsAssertionResponseFormat
} else if modelConfig.ModelType == option.LLMServiceTypeDoubaoVL {
asserter.systemPrompt += "\n" + uiTarsAssertionResponseFormat
} else {
// define output format
type OutputFormat struct {
Thought string `json:"thought"`
@@ -77,8 +77,6 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro
Strict: false,
},
}
} else {
asserter.systemPrompt += "\n\n" + defaultAssertionResponseJsonFormat
}
var err error
@@ -134,16 +132,16 @@ Here is the assertion. Please tell whether it is truthy according to the screens
// Call model service, generate response
logRequest(a.history)
startTime := time.Now()
resp, err := a.model.Generate(ctx, a.history)
message, err := a.model.Generate(ctx, a.history)
log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).
Str("model", string(a.modelConfig.ModelType)).Msg("call model service for assertion")
if err != nil {
return nil, errors.Wrap(code.LLMRequestServiceError, err.Error())
}
logResponse(resp)
logResponse(message)
// Parse result
result, err := parseAssertionResult(resp.Content)
result, err := parseAssertionResult(message.Content)
if err != nil {
return nil, errors.Wrap(code.LLMParseAssertionResponseError, err.Error())
}
@@ -151,7 +149,7 @@ Here is the assertion. Please tell whether it is truthy according to the screens
// Append assistant message to history
a.history.Append(&schema.Message{
Role: schema.Assistant,
Content: resp.Content,
Content: message.Content,
})
return result, nil

View File

@@ -3,15 +3,9 @@ package ai
// Default assertion system prompt
const defaultAssertionPrompt = `You are a senior testing engineer. User will give an assertion and a screenshot of a page. By carefully viewing the screenshot, please tell whether the assertion is truthy.`
// Default assertion response format
const defaultAssertionResponseJsonFormat = `Return in the following JSON format:
{
pass: boolean, // whether the assertion is truthy
thought: string | null, // string, if the result is falsy, give the reason why it is falsy. Otherwise, put null.
}`
// UI-TARS assertion response format
const uiTarsAssertionResponseFormat = `## Output Json String Format
const uiTarsAssertionResponseFormat = `
## Output Json String Format
` + "```" + `
"{
"pass": <<is a boolean value from the enum [true, false], true means the assertion is truthy>>,

View File

@@ -27,14 +27,10 @@ func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser {
return &UITARSContentParser{
systemPrompt: uiTarsPlanningPrompt,
}
case option.LLMServiceTypeDoubaoVL:
default:
return &JSONContentParser{
systemPrompt: defaultPlanningResponseJsonFormat,
}
default:
return &DefaultContentParser{
systemPrompt: defaultPlanningResponseStringFormat,
}
}
}
@@ -433,50 +429,3 @@ func normalizeAction(action *ParsedAction) error {
return nil
}
// DefaultContentParser parses the response as string format
type DefaultContentParser struct {
systemPrompt string
}
func (p *DefaultContentParser) SystemPrompt() string {
return p.systemPrompt
}
func (p *DefaultContentParser) Parse(content string, size types.Size) (*PlanningResult, error) {
content = strings.TrimSpace(content)
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
}
content = strings.TrimSpace(content)
var response PlanningResult
if err := json.Unmarshal([]byte(content), &response); err != nil {
return nil, fmt.Errorf("failed to parse VLM response: %v", err)
}
if response.Error != "" {
return nil, errors.New(response.Error)
}
if len(response.NextActions) == 0 {
return nil, errors.New("no actions returned from VLM")
}
// normalize actions
var normalizedActions []ParsedAction
for i := range response.NextActions {
// create a new variable, avoid implicit memory aliasing in for loop.
action := response.NextActions[i]
if err := normalizeAction(&action); err != nil {
return nil, errors.Wrap(err, "failed to normalize action")
}
normalizedActions = append(normalizedActions, action)
}
return &PlanningResult{
NextActions: normalizedActions,
ActionSummary: response.ActionSummary,
}, nil
}

View File

@@ -38,7 +38,3 @@ Thought: ...
Action: ...
` + "```" + `
`
const defaultPlanningResponseStringFormat = `
You are a helpful assistant.
`