refactor: add LLMServiceTypeDoubaoVL

2026-05-10 17:43:00 +08:00 · 2025-05-22 15:34:11 +08:00
parent 269fe2de23
commit c377664518
7 changed files with 20 additions and 9 deletions
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2505212251
+v5.0.0-beta-2505221534
--- a/mcphost/chat.go
+++ b/mcphost/chat.go
@@ -25,7 +25,7 @@ import (
 // NewChat creates a new chat session
 func (h *MCPHost) NewChat(ctx context.Context) (*Chat, error) {
 	// Get model config from environment variables
-	modelConfig, err := ai.GetModelConfig(option.LLMServiceTypeGPT)
+	modelConfig, err := ai.GetModelConfig(option.LLMServiceTypeUITARS)
 	if err != nil {
 		return nil, err
 	}
--- a/uixt/ai/ai.go
+++ b/uixt/ai/ai.go
@@ -95,13 +95,16 @@ func GetModelConfig(modelType option.LLMServiceType) (*ModelConfig, error) {
 			"env %s missed", EnvModelName)
 	}

-	temperature := float32(0.7)
+	// https://www.volcengine.com/docs/82379/1536429
+	temperature := float32(0)
+	topP := float32(0.7)
 	modelConfig := &openai.ChatModelConfig{
 		BaseURL:     openaiBaseURL,
 		APIKey:      openaiAPIKey,
 		Model:       modelName,
 		Timeout:     defaultTimeout,
 		Temperature: &temperature,
+		TopP:        &topP,
 	}

 	// log config info
--- a/uixt/ai/asserter.go
+++ b/uixt/ai/asserter.go
@@ -55,7 +55,7 @@ func NewAsserter(ctx context.Context, modelConfig *ModelConfig) (*Asserter, erro

 	if modelConfig.ModelType == option.LLMServiceTypeUITARS {
 		asserter.systemPrompt += "\n\n" + uiTarsAssertionResponseFormat
-	} else if modelConfig.ModelType == option.LLMServiceTypeGPT {
+	} else if modelConfig.ModelType == option.LLMServiceTypeDoubaoVL {
 		// define output format
 		type OutputFormat struct {
 			Thought string `json:"thought"`
--- a/uixt/ai/planner_parser.go
+++ b/uixt/ai/planner_parser.go
@@ -27,7 +27,7 @@ func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser {
 		return &UITARSContentParser{
 			systemPrompt: uiTarsPlanningPrompt,
 		}
-	case option.LLMServiceTypeGPT:
+	case option.LLMServiceTypeDoubaoVL:
 		return &JSONContentParser{
 			systemPrompt: defaultPlanningResponseJsonFormat,
 		}
--- a/uixt/ai/planner_prompts.go
+++ b/uixt/ai/planner_prompts.go
@@ -30,7 +30,14 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
 `

 // system prompt for JSONContentParser
-const defaultPlanningResponseJsonFormat = `You are a versatile professional in software UI automation.`
+const defaultPlanningResponseJsonFormat = `You are a versatile professional in software UI automation.
+
+## Output Format
+` + "```" + `
+Thought: ...
+Action: ...
+` + "```" + `
+`

 const defaultPlanningResponseStringFormat = `
 You are a helpful assistant.
--- a/uixt/option/ai.go
+++ b/uixt/option/ai.go
@@ -31,9 +31,10 @@ func WithCVService(service CVServiceType) AIServiceOption {
 type LLMServiceType string

 const (
-	LLMServiceTypeUITARS LLMServiceType = "ui-tars"
-	LLMServiceTypeGPT    LLMServiceType = "gpt"
-	LLMServiceTypeQwenVL LLMServiceType = "qwen-vl"
+	LLMServiceTypeUITARS   LLMServiceType = "ui-tars" // not support function calling and json response
+	LLMServiceTypeDoubaoVL LLMServiceType = "doubao-vision"
+	LLMServiceTypeGPT      LLMServiceType = "gpt"
+	LLMServiceTypeQwenVL   LLMServiceType = "qwen-vl"
 )

 func WithLLMService(modelType LLMServiceType) AIServiceOption {