From fbc888655ff7a7163294a8a0f89f48a7eaa500e8 Mon Sep 17 00:00:00 2001
From: "lilong.129" <lilong.129@bytedance.com>
Date: Wed, 11 Jun 2025 12:18:31 +0800
Subject: [PATCH] feat: optimize ILLMService interface to support different
 models for each component

- Add LLMServiceConfig to support mixed model configuration
- Enable Planner, Asserter, Querier to use different optimal models
- Provide recommended configurations for various use cases
- Maintain backward compatibility with existing API
- Update documentation to reflect current state without iteration history
- Merge test files and add comprehensive configuration tests
- Resolve circular dependency by moving config to option package
---
 internal/version/VERSION |   2 +-
 uixt/ai/README.md        | 964 +++++++++++----------------------------
 uixt/ai/ai.go            |  29 +-
 uixt/ai/ai_test.go       |  79 ++++
 uixt/option/ai.go        |  63 +++
 uixt/sdk.go              |  15 +-
 6 files changed, 444 insertions(+), 708 deletions(-)

diff --git a/internal/version/VERSION b/internal/version/VERSION
index 581c7dd8..287ad371 100644
--- a/internal/version/VERSION
+++ b/internal/version/VERSION
@@ -1 +1 @@
-v5.0.0-beta-2506111115
+v5.0.0-beta-2506111218
diff --git a/uixt/ai/README.md b/uixt/ai/README.md
index 0baebd37..577ecb46 100644
--- a/uixt/ai/README.md
+++ b/uixt/ai/README.md
@@ -1,47 +1,118 @@
-# HttpRunner AI 模块文档
+# HttpRunner UIXT AI 模块
 
-## 📖 概述
+## 🚀 概述
 
-HttpRunner AI 模块是一个集成了多种人工智能服务的 UI 自动化智能引擎，提供基于大语言模型（LLM）的智能规划、断言验证、信息查询、计算机视觉识别等功能，实现真正的智能化 UI 自动化测试。
+HttpRunner UIXT AI 模块是一个集成了多种人工智能服务的 UI 自动化智能引擎，提供基于大语言模型（LLM）的智能规划、断言验证、信息查询、计算机视觉识别等功能，实现真正的智能化 UI 自动化测试。
 
-## 🎯 核心功能
+## ✨ 核心特性
 
-### 1. 智能规划 (Planning)
-- **视觉语言模型驱动**: 基于屏幕截图和自然语言指令生成操作序列
-- **多模型支持**: 支持 UI-TARS、豆包视觉等多种专业模型
-- **上下文感知**: 维护对话历史，支持多轮交互规划
-- **动作解析**: 将模型输出解析为标准化的工具调用
+### 🎯 智能组件
 
-### 2. 智能断言 (Assertion)
-- **视觉验证**: 基于屏幕截图验证断言条件
-- **自然语言断言**: 支持自然语言描述的断言条件
-- **结构化输出**: 返回标准化的断言结果和推理过程
+- **智能规划器 (Planner)**: 基于视觉语言模型进行 UI 操作规划
+- **智能断言器 (Asserter)**: 基于视觉语言模型进行断言验证
+- **智能查询器 (Querier)**: 从屏幕截图中提取结构化信息
+- **计算机视觉 (CV)**: OCR 文本识别、UI 元素检测、弹窗识别
 
-### 3. 智能查询 (Query)
-- **信息提取**: 从屏幕截图中提取指定信息
-- **自定义输出格式**: 支持用户定义的结构化数据格式
-- **自动类型转换**: 智能转换为用户指定的数据类型
-- **多场景适用**: 适用于游戏分析、UI元素提取、表单数据提取等
+### 🔧 灵活配置
 
-### 4. 计算机视觉 (Computer Vision)
-- **OCR 文本识别**: 提取屏幕中的文本内容和位置信息
-- **UI 元素检测**: 识别界面中的图标、按钮等 UI 元素
-- **弹窗检测**: 自动识别和定位弹窗及关闭按钮
-- **坐标转换**: 支持相对坐标和绝对坐标的转换
+- **统一 API**: 通过 `NewXTDriver` 统一初始化，无需额外函数
+- **混合模型**: 支持为三个组件分别选择不同的最优模型
+- **预设配置**: 提供多种推荐配置方案
 
-### 5. 会话管理 (Session Management)
-- **对话历史**: 维护完整的对话上下文
-- **消息管理**: 智能管理用户图像消息和助手回复
-- **历史清理**: 自动清理过期的对话记录
+## 📖 使用指南
 
-## 🏗️ 架构设计
+### 基本用法
+
+```go
+import (
+    "github.com/httprunner/httprunner/v5/uixt"
+    "github.com/httprunner/httprunner/v5/uixt/option"
+)
+
+// 方式1: 使用单一模型
+driver, err := uixt.NewXTDriver(mockDriver,
+    option.WithLLMService(option.OPENAI_GPT_4O))
+
+// 方式2: 使用高级配置 - 为不同组件选择不同模型
+config := option.NewLLMServiceConfig(option.DOUBAO_1_5_THINKING_VISION_PRO_250428).
+    WithPlannerModel(option.DOUBAO_1_5_UI_TARS_250328).  // UI理解用UI-TARS
+    WithAsserterModel(option.OPENAI_GPT_4O).             // 推理用GPT-4O
+    WithQuerierModel(option.DEEPSEEK_R1_250528)          // 查询用DeepSeek
+
+driver, err := uixt.NewXTDriver(mockDriver,
+    option.WithLLMConfig(config))
+
+// 方式3: 使用推荐配置
+configs := option.RecommendedConfigurations()
+driver, err := uixt.NewXTDriver(mockDriver,
+    option.WithLLMConfig(configs["mixed_optimal"]))
+```
+
+### 推荐配置方案
+
+| 配置名称 | 说明 | 适用场景 |
+|---------|------|----------|
+| `cost_effective` | 成本优化配置 | 预算有限的项目 |
+| `high_performance` | 高性能配置（全部使用GPT-4O） | 对准确性要求极高的场景 |
+| `mixed_optimal` | 混合优化配置 | 平衡性能和成本的最佳选择 |
+| `ui_focused` | UI专注配置（全部使用UI-TARS） | UI自动化专项测试 |
+| `reasoning_focused` | 推理专注配置（全部使用豆包思考模型） | 复杂逻辑推理场景 |
+
+### 支持的模型
+
+| 模型名称 | 特点 | 适用组件 |
+|---------|------|----------|
+| `DOUBAO_1_5_UI_TARS_250328` | UI理解专业模型 | Planner |
+| `DOUBAO_1_5_THINKING_VISION_PRO_250428` | 思考推理模型 | Asserter, Querier |
+| `OPENAI_GPT_4O` | 高性能通用模型 | 全部组件 |
+| `DEEPSEEK_R1_250528` | 成本效益模型 | Querier |
+
+## 🔧 环境配置
+
+### 多模型配置
+
+支持为不同模型配置独立的环境变量：
+
+```bash
+# 豆包思维视觉专业版
+DOUBAO_1_5_THINKING_VISION_PRO_250428_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
+DOUBAO_1_5_THINKING_VISION_PRO_250428_API_KEY=your_doubao_api_key
+
+# 豆包UI-TARS
+DOUBAO_1_5_UI_TARS_250328_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
+DOUBAO_1_5_UI_TARS_250328_API_KEY=your_doubao_ui_tars_api_key
+
+# OpenAI GPT-4O
+OPENAI_GPT_4O_BASE_URL=https://api.openai.com/v1
+OPENAI_GPT_4O_API_KEY=your_openai_api_key
+
+# DeepSeek
+DEEPSEEK_R1_250528_BASE_URL=https://api.deepseek.com/v1
+DEEPSEEK_R1_250528_API_KEY=your_deepseek_api_key
+```
+
+### 默认配置
+
+```bash
+# 默认配置，当没有找到服务特定配置时使用
+LLM_MODEL_NAME=doubao-1.5-thinking-vision-pro-250428
+OPENAI_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
+OPENAI_API_KEY=your_default_api_key
+```
+
+### 配置优先级
+
+1. **服务特定配置**（最高优先级）：`{SERVICE_NAME}_BASE_URL`、`{SERVICE_NAME}_API_KEY`
+2. **默认配置**：`OPENAI_BASE_URL`、`OPENAI_API_KEY`、`LLM_MODEL_NAME`
+
+## 🏗️ 核心架构
 
 ### 整体架构
 
 ```
 ┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
 │   UI Driver     │    │   AI Module     │    │  LLM Services   │
-│   (XTDriver)    │◄──►│   (ai package)  │◄──►│ (OpenAI/豆包)   │
+│   (XTDriver)    │◄──►│   (ai package)  │◄──►│ (多模型支持)     │
 └─────────────────┘    └─────────────────┘    └─────────────────┘
                               │
                               ▼
@@ -53,524 +124,117 @@ HttpRunner AI 模块是一个集成了多种人工智能服务的 UI 自动化
 
 ### 核心接口
 
-#### ILLMService - LLM 服务接口
 ```go
+// LLM 服务接口
 type ILLMService interface {
     Plan(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error)
     Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error)
     Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error)
     RegisterTools(tools []*schema.ToolInfo) error
 }
-```
 
-#### IPlanner - 规划器接口
-```go
-type IPlanner interface {
-    Plan(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error)
-}
-```
-
-#### IAsserter - 断言器接口
-```go
-type IAsserter interface {
-    Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error)
-}
-```
-
-#### IQuerier - 查询器接口
-```go
-type IQuerier interface {
-    Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error)
-}
-```
-
-#### ICVService - 计算机视觉服务接口
-```go
+// 计算机视觉服务接口
 type ICVService interface {
     ReadFromBuffer(imageBuf *bytes.Buffer, opts ...option.ActionOption) (*CVResult, error)
     ReadFromPath(imagePath string, opts ...option.ActionOption) (*CVResult, error)
 }
 ```
 
-## 🔧 主要组件
+## 💡 功能详解
 
-### 1. AI 服务管理器 (ai.go)
+### 1. 智能规划 (Planning)
 
-**功能**: 统一管理 LLM 服务，提供规划、断言和查询功能的统一入口
+基于视觉语言模型进行 UI 操作规划，将自然语言指令转换为具体的操作序列。
 
-**核心类型**:
 ```go
-type combinedLLMService struct {
-    planner  IPlanner  // 提供规划功能
-    asserter IAsserter // 提供断言功能
-    querier  IQuerier  // 提供查询功能
-}
-
-type ModelConfig struct {
-    *openai.ChatModelConfig
-    ModelType option.LLMServiceType
-}
-```
-
-**主要功能**:
-- 模型配置管理和验证
-- 环境变量读取和验证
-- API 密钥安全处理
-- 多模型类型支持
-
-**支持的模型类型**:
-- `DOUBAO_1_5_THINKING_VISION_PRO_250428`: 豆包思维视觉专业版
-- `DOUBAO_1_5_UI_TARS_250428`: 豆包UI-TARS专业UI自动化模型
-- `OPENAI_GPT_4O`: OpenAI GPT-4O 视觉模型
-
-### 2. 智能规划器 (planner.go)
-
-**功能**: 基于视觉语言模型进行 UI 操作规划
-
-**核心类型**:
-```go
-type Planner struct {
-    modelConfig *ModelConfig
-    model       model.ToolCallingChatModel
-    parser      LLMContentParser
-    history     ConversationHistory
-}
-
+// 规划选项
 type PlanningOptions struct {
-    UserInstruction string          `json:"user_instruction"`
-    Message         *schema.Message `json:"message"`
-    Size            types.Size      `json:"size"`
-    ResetHistory    bool            `json:"reset_history"`
+    UserInstruction string          `json:"user_instruction"` // 用户指令
+    Message         *schema.Message `json:"message"`          // 消息内容
+    Size            types.Size      `json:"size"`             // 屏幕尺寸
+    ResetHistory    bool            `json:"reset_history"`    // 是否重置历史
 }
 
+// 规划结果
 type PlanningResult struct {
-    ToolCalls []schema.ToolCall  `json:"tool_calls"`
-    Thought   string             `json:"thought"`
-    Content   string             `json:"content"`
+    ToolCalls []schema.ToolCall  `json:"tool_calls"` // 工具调用序列
+    Thought   string             `json:"thought"`    // 思考过程
+    Content   string             `json:"content"`    // 响应内容
     Error     string             `json:"error,omitempty"`
     ModelName string             `json:"model_name"`
     Usage     *schema.TokenUsage `json:"usage,omitempty"`
 }
 ```
 
-**工作流程**:
-1. 接收用户指令和屏幕截图
-2. 构建包含系统提示词的对话历史
-3. 调用视觉语言模型生成响应
-4. 解析模型输出为标准化工具调用
-5. 更新对话历史以支持多轮交互
-
-**特性**:
-- 支持工具注册和函数调用
-- 智能对话历史管理
-- 多种输出格式解析
-- 详细的日志记录和使用统计
-
-### 3. 智能断言器 (asserter.go)
-
-**功能**: 基于视觉语言模型进行断言验证
-
-**核心类型**:
+**使用示例**：
 ```go
-type Asserter struct {
-    modelConfig  *ModelConfig
-    model        model.ToolCallingChatModel
-    systemPrompt string
-    history      ConversationHistory
-}
-
-type AssertOptions struct {
-    Assertion  string     `json:"assertion"`
-    Screenshot string     `json:"screenshot"`
-    Size       types.Size `json:"size"`
-}
-
-type AssertionResult struct {
-    Pass    bool   `json:"pass"`
-    Thought string `json:"thought"`
-}
-```
-
-**工作流程**:
-1. 接收断言条件和屏幕截图
-2. 构建断言验证提示词
-3. 调用视觉语言模型进行判断
-4. 解析模型输出为结构化结果
-5. 返回断言通过状态和推理过程
-
-**特性**:
-- 结构化 JSON 输出格式
-- 自然语言断言支持
-- 详细的推理过程记录
-- 多模型适配
-
-### 4. 智能查询器 (querier.go)
-
-**功能**: 基于视觉语言模型从屏幕截图中提取结构化信息
-
-**核心类型**:
-```go
-type Querier struct {
-    modelConfig  *ModelConfig
-    model        model.ToolCallingChatModel
-    systemPrompt string
-    history      ConversationHistory
-}
-
-type QueryOptions struct {
-    Query        string      `json:"query"`
-    Screenshot   string      `json:"screenshot"`
-    Size         types.Size  `json:"size"`
-    OutputSchema interface{} `json:"outputSchema,omitempty"`
-}
-
-type QueryResult struct {
-    Content string      `json:"content"`
-    Thought string      `json:"thought"`
-    Data    interface{} `json:"data,omitempty"`
-}
-```
-
-**工作流程**:
-1. 接收查询指令和屏幕截图
-2. 根据是否提供 OutputSchema 选择处理方式
-3. 调用视觉语言模型进行分析
-4. 解析模型输出为结构化数据
-5. 自动进行类型转换和验证
-
-**特性**:
-- 支持自定义输出格式（OutputSchema）
-- 自动类型转换和数据验证
-- 多级回退机制确保稳定性
-- 向后兼容的API设计
-
-**应用场景**:
-- **UI元素分析**: 提取界面中的按钮、文本、图标等元素信息
-- **游戏界面分析**: 分析游戏网格、角色状态、道具信息等
-- **表单数据提取**: 从表单界面提取字段值和结构
-- **状态信息获取**: 获取应用状态、进度、设置等信息
-
-### 5. 内容解析器 (parser_*.go)
-
-**功能**: 将不同模型的输出解析为标准化的工具调用格式
-
-#### JSONContentParser (parser_default.go)
-- 适用于支持 JSON 格式输出的通用模型
-- 解析标准 JSON 格式的动作序列
-- 支持坐标归一化和参数处理
-
-#### UITARSContentParser (parser_ui_tars.go)
-- 专门适配 UI-TARS 模型的 Thought/Action 格式
-- 支持多种坐标格式解析 (`<point>`, `<bbox>`, `[x,y,x,y]`)
-- 智能参数名称映射和归一化
-- 相对坐标到绝对坐标转换
-
-**核心功能**:
-```go
-type LLMContentParser interface {
-    SystemPrompt() string
-    Parse(content string, size types.Size) (*PlanningResult, error)
-}
-
-type Action struct {
-    ActionType   string         `json:"action_type"`
-    ActionInputs map[string]any `json:"action_inputs"`
-}
-```
-
-**解析特性**:
-- 多种坐标格式支持
-- 智能参数映射
-- 坐标系统转换
-- 错误处理和验证
-
-### 6. 计算机视觉服务 (cv.go)
-
-**功能**: 提供图像识别和分析能力
-
-**核心类型**:
-```go
-type CVResult struct {
-    URL               string             `json:"url,omitempty"`
-    OCRResult         OCRResults         `json:"ocrResult,omitempty"`
-    LiveType          string             `json:"liveType,omitempty"`
-    LivePopularity    int64              `json:"livePopularity,omitempty"`
-    UIResult          UIResultMap        `json:"uiResult,omitempty"`
-    ClosePopupsResult *ClosePopupsResult `json:"closeResult,omitempty"`
-}
-
-type OCRText struct {
-    Text    string          `json:"text"`
-    RectStr string          `json:"rect"`
-    Rect    image.Rectangle `json:"-"`
-}
-
-type UIResult struct {
-    Box
-}
-
-type ClosePopupsResult struct {
-    Type      string `json:"type"`
-    PopupArea Box    `json:"popupArea"`
-    CloseArea Box    `json:"closeArea"`
-    Text      string `json:"text"`
-}
-```
-
-**主要功能**:
-- **OCR 文本识别**: 提取文本内容和精确位置
-- **UI 元素检测**: 识别按钮、图标等界面元素
-- **弹窗检测**: 自动识别弹窗和关闭按钮
-- **区域过滤**: 支持指定区域的元素筛选
-- **坐标计算**: 提供中心点和随机点计算
-
-**OCR 功能特性**:
-- 文本精确定位
-- 正则表达式匹配
-- 索引选择支持
-- 区域范围过滤
-
-### 7. 会话管理器 (session.go)
-
-**功能**: 管理 AI 对话的历史记录和上下文
-
-**核心类型**:
-```go
-type ConversationHistory []*schema.Message
-```
-
-**管理策略**:
-- **用户消息**: 最多保留 4 条用户图像消息
-- **助手消息**: 最多保留 10 条助手回复
-- **自动清理**: 超出限制时自动删除最旧的消息
-- **系统消息**: 始终保留系统提示词
-
-**功能特性**:
-- 智能消息管理
-- 内存优化
-- 日志记录和调试
-- 敏感信息脱敏
-
-## 🚀 使用指南
-
-### 1. 环境配置
-
-HttpRunner AI 模块支持多模型服务配置，您可以同时配置多个大模型服务，然后在测试用例中灵活切换。
-
-#### 多模型配置方式
-
-**服务特定配置**：
-```bash
-# 豆包思维视觉专业版配置
-DOUBAO_1_5_THINKING_VISION_PRO_250428_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-DOUBAO_1_5_THINKING_VISION_PRO_250428_API_KEY=your_doubao_api_key
-
-# 豆包UI-TARS配置
-DOUBAO_1_5_UI_TARS_250428_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-DOUBAO_1_5_UI_TARS_250428_API_KEY=your_doubao_ui_tars_api_key
-
-# OpenAI GPT-4O配置
-OPENAI_GPT_4O_BASE_URL=https://api.openai.com/v1
-OPENAI_GPT_4O_API_KEY=your_openai_api_key
-```
-
-**默认配置（向后兼容）**：
-```bash
-# 默认配置，当没有找到服务特定配置时使用
-LLM_MODEL_NAME=doubao-1.5-thinking-vision-pro-250428
-OPENAI_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-OPENAI_API_KEY=your_default_api_key
-```
-
-#### 环境变量命名规则
-
-- 将服务名称转换为大写
-- 将连字符 `-` 和点号 `.` 替换为下划线 `_`
-- 添加对应的后缀：`_BASE_URL`、`_API_KEY`
-- 模型名称直接从服务类型推导，无需单独配置
-
-例如：
-- `doubao-1.5-thinking-vision-pro-250428` → `DOUBAO_1_5_THINKING_VISION_PRO_250428_*`
-- `openai/gpt-4o` → `OPENAI_GPT_4O_*`
-- `claude-3.5-sonnet` → `CLAUDE_3_5_SONNET_*`
-
-#### 配置优先级
-
-1. **服务特定配置**（最高优先级）：`{SERVICE_NAME}_BASE_URL`、`{SERVICE_NAME}_API_KEY`
-2. **默认配置**（向后兼容）：`OPENAI_BASE_URL`、`OPENAI_API_KEY`、`LLM_MODEL_NAME`
-3. **模型名称**：优先使用服务类型名称，仅在完全使用默认配置时才使用 `LLM_MODEL_NAME`
-
-#### 示例 .env 文件
-
-```bash
-# 默认配置
-LLM_MODEL_NAME=doubao-1.5-thinking-vision-pro-250428
-OPENAI_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-OPENAI_API_KEY=your_default_api_key
-
-# doubao-1.5-thinking-vision-pro-250428
-DOUBAO_1_5_THINKING_VISION_PRO_250428_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-DOUBAO_1_5_THINKING_VISION_PRO_250428_API_KEY=your_doubao_thinking_api_key
-
-# doubao-1.5-ui-tars-250428
-DOUBAO_1_5_UI_TARS_250428_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
-DOUBAO_1_5_UI_TARS_250428_API_KEY=your_doubao_ui_tars_api_key
-
-# openai/gpt-4o
-OPENAI_GPT_4O_BASE_URL=https://api.openai.com/v1
-OPENAI_GPT_4O_API_KEY=your_openai_api_key
-```
-
-### 2. 创建 LLM 服务
-
-#### 在测试用例中指定服务
-
-```json
-{
-    "config": {
-        "name": "AI测试用例",
-        "llm_service": "doubao-1.5-thinking-vision-pro-250428"
-    },
-    "teststeps": [
-        {
-            "name": "AI操作步骤",
-            "android": {
-                "actions": [
-                    {
-                        "method": "start_to_goal",
-                        "params": "启动应用并完成某个任务"
-                    }
-                ]
-            }
-        }
-    ]
-}
-```
-
-#### 在Go代码中使用
-
-```go
-// 创建豆包思维视觉专业版服务
-llmService, err := ai.NewLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428)
-if err != nil {
-    log.Fatal().Err(err).Msg("failed to create LLM service")
-}
-
-// 创建豆包UI-TARS服务
-llmService, err := ai.NewLLMService(option.DOUBAO_1_5_UI_TARS_250428)
-if err != nil {
-    log.Fatal().Err(err).Msg("failed to create LLM service")
-}
-
-// 创建OpenAI GPT-4O服务
-llmService, err := ai.NewLLMService(option.OPENAI_GPT_4O)
-if err != nil {
-    log.Fatal().Err(err).Msg("failed to create LLM service")
-}
-```
-
-#### 模型切换
-
-要切换到不同的模型服务，只需要修改测试用例中的 `llm_service` 字段：
-
-```json
-{
-    "config": {
-        "name": "连连看游戏测试",
-        "llm_service": "doubao-1.5-ui-tars-250428"
-    }
-}
-```
-
-系统会自动根据服务名称获取对应的配置，无需修改环境变量。
-
-### 3. 智能规划使用
-
-```go
-// 准备规划选项
-planningOpts := &ai.PlanningOptions{
+planResult, err := service.Plan(ctx, &ai.PlanningOptions{
     UserInstruction: "点击登录按钮",
-    Message: &schema.Message{
-        Role: schema.User,
-        MultiContent: []schema.ChatMessagePart{
-            {
-                Type: schema.ChatMessagePartTypeImageURL,
-                ImageURL: &schema.ChatMessageImageURL{
-                    URL: "data:image/jpeg;base64," + base64Screenshot,
-                },
-            },
-        },
-    },
-    Size: types.Size{Width: 1080, Height: 1920},
+    Message:         message,
+    Size:           screenSize,
+})
+```
+
+### 2. 智能断言 (Assertion)
+
+基于视觉语言模型进行断言验证，支持自然语言描述的断言条件。
+
+```go
+// 断言选项
+type AssertOptions struct {
+    Assertion  string     `json:"assertion"`  // 断言条件
+    Screenshot string     `json:"screenshot"` // 屏幕截图
+    Size       types.Size `json:"size"`       // 屏幕尺寸
 }
 
-// 执行规划
-result, err := llmService.Plan(ctx, planningOpts)
-if err != nil {
-    log.Error().Err(err).Msg("planning failed")
-    return
-}
-
-// 处理规划结果
-for _, toolCall := range result.ToolCalls {
-    log.Info().Str("action", toolCall.Function.Name).
-        Interface("args", toolCall.Function.Arguments).
-        Msg("planned action")
+// 断言结果
+type AssertionResult struct {
+    Pass    bool   `json:"pass"`    // 是否通过
+    Thought string `json:"thought"` // 推理过程
 }
 ```
 
-### 4. 智能断言使用
-
+**使用示例**：
 ```go
-// 准备断言选项
-assertOpts := &ai.AssertOptions{
+assertResult, err := service.Assert(ctx, &ai.AssertOptions{
     Assertion:  "登录按钮应该可见",
-    Screenshot: "data:image/jpeg;base64," + base64Screenshot,
-    Size:       types.Size{Width: 1080, Height: 1920},
+    Screenshot: screenshot,
+    Size:       screenSize,
+})
+```
+
+### 3. 智能查询 (Query)
+
+从屏幕截图中提取结构化信息，支持自定义输出格式。
+
+```go
+// 查询选项
+type QueryOptions struct {
+    Query        string      `json:"query"`                    // 查询指令
+    Screenshot   string      `json:"screenshot"`               // 屏幕截图
+    Size         types.Size  `json:"size"`                     // 屏幕尺寸
+    OutputSchema interface{} `json:"outputSchema,omitempty"`   // 自定义输出格式
 }
 
-// 执行断言
-result, err := llmService.Assert(ctx, assertOpts)
-if err != nil {
-    log.Error().Err(err).Msg("assertion failed")
-    return
-}
-
-// 检查断言结果
-if result.Pass {
-    log.Info().Str("thought", result.Thought).Msg("assertion passed")
-} else {
-    log.Warn().Str("thought", result.Thought).Msg("assertion failed")
+// 查询结果
+type QueryResult struct {
+    Content string      `json:"content"`           // 文本内容
+    Thought string      `json:"thought"`           // 思考过程
+    Data    interface{} `json:"data,omitempty"`    // 结构化数据
 }
 ```
 
-### 5. 智能查询使用
-
-#### 基础查询
-
+**基础查询示例**：
 ```go
-// 基础查询，返回文本描述
-queryOpts := &ai.QueryOptions{
+result, err := service.Query(ctx, &ai.QueryOptions{
     Query:      "请描述这张图片中的内容",
-    Screenshot: "data:image/jpeg;base64," + base64Screenshot,
-    Size:       types.Size{Width: 1080, Height: 1920},
-}
-
-result, err := llmService.Query(ctx, queryOpts)
-if err != nil {
-    log.Error().Err(err).Msg("query failed")
-    return
-}
-
-log.Info().Str("content", result.Content).
-    Str("thought", result.Thought).
-    Msg("query result")
+    Screenshot: screenshot,
+    Size:       screenSize,
+})
 ```
 
-#### 自定义格式查询
-
+**自定义格式查询示例**：
 ```go
-// 定义输出数据结构
 type GameInfo struct {
     Content string   `json:"content"`
     Thought string   `json:"thought"`
@@ -579,79 +243,88 @@ type GameInfo struct {
     Icons   []string `json:"icons"`
 }
 
-// 自定义格式查询
-queryOpts := &ai.QueryOptions{
-    Query:        "请分析这个连连看游戏界面，告诉我有多少行多少列，有哪些不同类型的图案",
-    Screenshot:   "data:image/jpeg;base64," + base64Screenshot,
-    Size:         types.Size{Width: 1080, Height: 1920},
+result, err := service.Query(ctx, &ai.QueryOptions{
+    Query:        "分析这个连连看游戏界面",
+    Screenshot:   screenshot,
+    Size:         screenSize,
     OutputSchema: GameInfo{},
-}
-
-result, err := llmService.Query(ctx, queryOpts)
-if err != nil {
-    log.Error().Err(err).Msg("query failed")
-    return
-}
+})
 
 // 直接类型断言获取结构化数据
 if gameInfo, ok := result.Data.(*GameInfo); ok {
-    log.Info().Int("rows", gameInfo.Rows).
-        Int("cols", gameInfo.Cols).
-        Strs("icons", gameInfo.Icons).
-        Msg("game analysis result")
-} else {
-    log.Error().Msg("failed to convert to GameInfo")
+    fmt.Printf("游戏有 %d 行 %d 列\n", gameInfo.Rows, gameInfo.Cols)
 }
 ```
 
-#### 泛型类型转换（可选）
+### 4. 计算机视觉 (CV)
+
+提供 OCR 文本识别、UI 元素检测、弹窗识别等计算机视觉功能。
 
 ```go
-// 使用泛型函数进行类型转换（当需要转换为不同类型时）
-gameInfo, err := ai.ConvertQueryResultData[GameInfo](result)
-if err != nil {
-    log.Error().Err(err).Msg("failed to convert data")
-    return
+// CV 结果
+type CVResult struct {
+    URL               string             `json:"url,omitempty"`
+    OCRResult         OCRResults         `json:"ocrResult,omitempty"`
+    LiveType          string             `json:"liveType,omitempty"`
+    LivePopularity    int64              `json:"livePopularity,omitempty"`
+    UIResult          UIResultMap        `json:"uiResult,omitempty"`
+    ClosePopupsResult *ClosePopupsResult `json:"closeResult,omitempty"`
 }
-
-log.Info().Interface("gameInfo", gameInfo).Msg("converted game info")
 ```
 
-### 6. 计算机视觉使用
-
+**使用示例**：
 ```go
-// 创建 CV 服务
 cvService, err := ai.NewCVService(option.CVServiceTypeVEDEM)
-if err != nil {
-    log.Fatal().Err(err).Msg("failed to create CV service")
-}
-
-// 从图像缓冲区读取
 cvResult, err := cvService.ReadFromBuffer(imageBuffer)
-if err != nil {
-    log.Error().Err(err).Msg("CV analysis failed")
-    return
-}
 
 // 处理 OCR 结果
 ocrTexts := cvResult.OCRResult.ToOCRTexts()
-for _, ocrText := range ocrTexts {
-    log.Info().Str("text", ocrText.Text).
-        Str("rect", ocrText.RectStr).
-        Msg("found text")
-}
-
-// 查找特定文本
 targetText, err := ocrTexts.FindText("登录", option.WithRegex(false))
-if err != nil {
-    log.Error().Err(err).Msg("text not found")
-    return
+center := targetText.Center()
+```
+
+## 🎨 高级特性
+
+### 1. 多模型适配
+
+不同模型具有不同的优势，可以根据场景选择最适合的模型：
+
+- **UI-TARS**: 专门针对 UI 自动化优化，理解界面元素能力强
+- **GPT-4O**: 通用性强，推理能力优秀
+- **豆包思考模型**: 支持深度思考，适合复杂场景分析
+- **DeepSeek**: 成本效益高，适合大量查询场景
+
+### 2. 坐标系统转换
+
+支持多种坐标格式的智能转换：
+
+- 相对坐标 (0-1000 范围) 转换为绝对像素坐标
+- 支持 `<point>`、`<bbox>`、`[x,y,x,y]` 等多种格式
+- 自动处理不同模型的坐标输出差异
+
+### 3. 智能会话管理
+
+- **对话历史**: 维护完整的对话上下文
+- **内存优化**: 自动清理过期的对话记录
+- **消息管理**: 智能管理用户图像消息和助手回复
+
+### 4. 自定义输出格式
+
+查询功能支持用户定义的复杂结构化输出格式：
+
+```go
+type UIAnalysisResult struct {
+    Content    string      `json:"content"`
+    Elements   []UIElement `json:"elements"`
+    Statistics Statistics  `json:"statistics"`
 }
 
-// 获取文本中心点
-center := targetText.Center()
-log.Info().Float64("x", center.X).Float64("y", center.Y).
-    Msg("text center coordinates")
+type UIElement struct {
+    Type        string      `json:"type"`
+    Text        string      `json:"text"`
+    BoundingBox BoundingBox `json:"boundingBox"`
+    Clickable   bool        `json:"clickable"`
+}
 ```
 
 ## 📋 配置参数
@@ -667,148 +340,40 @@ log.Info().Float64("x", center.X).Float64("y", center.Y).
 | `TopP` | float32 | Top-P 参数 | 0.7 |
 | `Timeout` | time.Duration | 请求超时 | 30s |
 
-### 规划选项
+### 操作选项
 
-| 参数 | 类型 | 说明 | 必需 |
-|------|------|------|------|
-| `UserInstruction` | string | 用户指令 | ✓ |
-| `Message` | *schema.Message | 消息内容 | ✓ |
-| `Size` | types.Size | 屏幕尺寸 | ✓ |
-| `ResetHistory` | bool | 是否重置历史 | ✗ |
-
-### 断言选项
-
-| 参数 | 类型 | 说明 | 必需 |
-|------|------|------|------|
-| `Assertion` | string | 断言条件 | ✓ |
-| `Screenshot` | string | Base64 截图 | ✓ |
-| `Size` | types.Size | 屏幕尺寸 | ✓ |
-
-### 查询选项
-
-| 参数 | 类型 | 说明 | 必需 |
-|------|------|------|------|
-| `Query` | string | 查询指令 | ✓ |
-| `Screenshot` | string | Base64 截图 | ✓ |
-| `Size` | types.Size | 屏幕尺寸 | ✓ |
-| `OutputSchema` | interface{} | 自定义输出格式 | ✗ |
-
-## 🔍 高级特性
-
-### 1. 多模型适配
-
-AI 模块支持多种不同的语言模型，每种模型都有其特定的优势：
-
-- **豆包思维视觉专业版**: 支持深度思考的视觉语言模型，适合复杂场景分析
-- **豆包UI-TARS**: 专门针对 UI 自动化优化的模型，支持 Thought/Action 格式
-- **OpenAI GPT-4O**: 强大的多模态模型，支持视觉理解和推理
-
-### 2. 坐标系统转换
-
-支持多种坐标格式的智能转换：
-
-```go
-// 相对坐标 (0-1000 范围) 转换为绝对像素坐标
-func convertRelativeToAbsolute(relativeCoord float64, isXCoord bool, size types.Size) float64 {
-    if isXCoord {
-        return math.Round((relativeCoord/DefaultFactor*float64(size.Width))*10) / 10
-    }
-    return math.Round((relativeCoord/DefaultFactor*float64(size.Height))*10) / 10
-}
-```
-
-### 3. 智能参数映射
-
-自动处理不同模型输出格式的参数名称映射：
-
-```go
-func normalizeParameterName(paramName string) string {
-    switch paramName {
-    case "start_point":
-        return "start_box"
-    case "end_point":
-        return "end_box"
-    case "point":
-        return "start_box"
-    default:
-        return paramName
-    }
-}
-```
-
-### 4. 对话历史优化
-
-智能管理对话历史，平衡上下文完整性和内存使用：
-
-- 用户图像消息限制：4 条
-- 助手回复消息限制：10 条
-- 自动清理策略：FIFO (先进先出)
-
-### 5. 自定义输出格式
-
-查询功能支持用户定义的结构化输出格式：
-
-```go
-// 定义复杂的嵌套数据结构
-type UIAnalysisResult struct {
-    Content    string      `json:"content"`
-    Thought    string      `json:"thought"`
-    Elements   []UIElement `json:"elements"`
-    Statistics Statistics  `json:"statistics"`
-}
-
-type UIElement struct {
-    Type        string      `json:"type"`
-    Text        string      `json:"text"`
-    BoundingBox BoundingBox `json:"boundingBox"`
-    Clickable   bool        `json:"clickable"`
-}
-
-// 使用自定义格式进行查询
-result, err := llmService.Query(ctx, &ai.QueryOptions{
-    Query:        "分析界面中的所有UI元素",
-    Screenshot:   screenshot,
-    Size:         size,
-    OutputSchema: UIAnalysisResult{},
-})
-
-// 自动类型转换
-uiAnalysis := result.Data.(*UIAnalysisResult)
-```
+| 组件 | 必需参数 | 可选参数 |
+|------|----------|----------|
+| **Planner** | `UserInstruction`, `Message`, `Size` | `ResetHistory` |
+| **Asserter** | `Assertion`, `Screenshot`, `Size` | - |
+| **Querier** | `Query`, `Screenshot`, `Size` | `OutputSchema` |
 
 ## ⚠️ 注意事项
 
-### 1. 环境变量配置
+### 1. 环境配置
 - 确保所有必需的环境变量都已正确设置
 - API 密钥需要有足够的权限和配额
 - 支持多模型配置，可以同时配置多个服务
-- 模型名称自动从服务类型推导，无需手动配置
 
-### 2. 图像格式要求
+### 2. 图像格式
 - 支持 Base64 编码的图像数据
 - 推荐使用 JPEG 格式以减少数据传输量
 - 图像尺寸信息必须准确提供
 
 ### 3. 坐标系统
-- 豆包UI-TARS 使用 1000x1000 相对坐标系统
+- 不同模型使用不同的坐标系统
 - 需要正确的屏幕尺寸信息进行坐标转换
-- 注意不同模型的坐标格式差异
+- 系统会自动处理坐标格式差异
 
-### 4. 错误处理
-- 网络请求可能失败，需要适当的重试机制
-- 模型输出格式可能不稳定，需要健壮的解析逻辑
-- 资源使用需要监控，避免内存泄漏
-
-### 5. 性能考虑
+### 4. 性能考虑
 - LLM 调用有延迟，适合异步处理
 - 图像数据较大，注意网络传输优化
-- 对话历史会占用内存，需要定期清理
+- 对话历史会占用内存，系统会自动清理
 
-### 6. 查询功能使用
-- 指定 OutputSchema 时，Data 字段会自动转换为对应类型
-- 支持复杂的嵌套数据结构定义
-- 建议使用类型断言直接获取结构化数据
-- ConvertQueryResultData 函数主要用于类型转换的特殊场景
+### 5. 错误处理
+- 网络请求可能失败，需要适当的重试机制
+- 模型输出格式可能不稳定，系统提供健壮的解析逻辑
+- 建议在生产环境中添加监控和告警
 
 ## 🧪 测试数据
 
@@ -822,34 +387,31 @@ uiAnalysis := result.Data.(*UIAnalysisResult)
 
 这些测试数据覆盖了各种典型的 UI 场景，用于验证 AI 模块的功能正确性。
 
-## 📈 扩展开发
+## 🚀 快速开始
 
-### 添加新的模型支持
+1. **配置环境变量**
+   ```bash
+   # 配置默认模型
+   export OPENAI_BASE_URL=https://your-endpoint.com
+   export OPENAI_API_KEY=your-api-key
+   ```
 
-1. 在 `option` 包中定义新的模型类型
-2. 实现对应的 `LLMContentParser`
-3. 在 `GetModelConfig` 中添加模型验证逻辑
-4. 更新系统提示词和输出格式
+2. **创建驱动**
+   ```go
+   driver, err := uixt.NewXTDriver(mockDriver,
+       option.WithLLMService(option.DOUBAO_1_5_THINKING_VISION_PRO_250428))
+   ```
 
-### 添加新的 CV 服务
+3. **执行智能操作**
+   ```go
+   // 智能规划
+   planResult, err := driver.LLMService.Plan(ctx, planningOpts)
 
-1. 实现 `ICVService` 接口
-2. 在 `NewCVService` 中添加服务创建逻辑
-3. 定义服务特定的配置和选项
-4. 添加相应的测试用例
+   // 智能断言
+   assertResult, err := driver.LLMService.Assert(ctx, assertOpts)
 
-### 扩展查询功能
+   // 智能查询
+   queryResult, err := driver.LLMService.Query(ctx, queryOpts)
+   ```
 
-1. 定义新的数据结构模板
-2. 优化 JSON Schema 生成逻辑
-3. 增强类型转换和验证机制
-4. 添加更多应用场景的示例
-
-### 优化解析逻辑
-
-1. 扩展坐标格式支持
-2. 改进参数映射规则
-3. 增强错误处理机制
-4. 优化性能和内存使用
-
-通过这些扩展点，AI 模块可以持续演进，支持更多的模型和服务，提供更强大的智能化 UI 自动化能力。
\ No newline at end of file
+通过 HttpRunner UIXT AI 模块，您可以轻松实现智能化的 UI 自动化测试，大幅提升测试效率和准确性。
\ No newline at end of file
diff --git a/uixt/ai/ai.go b/uixt/ai/ai.go
index 469fb11d..75bd8845 100644
--- a/uixt/ai/ai.go
+++ b/uixt/ai/ai.go
@@ -16,21 +16,42 @@ type ILLMService interface {
 	RegisterTools(tools []*schema.ToolInfo) error
 }
 
+// NewLLMService creates a new LLM service with the same model for all components (backward compatibility)
 func NewLLMService(modelType option.LLMServiceType) (ILLMService, error) {
-	modelConfig, err := GetModelConfig(modelType)
+	config := option.NewLLMServiceConfig(modelType)
+	return NewLLMServiceWithOptionConfig(config)
+}
+
+// NewLLMServiceWithOptionConfig creates a new LLM service with different models for each component
+func NewLLMServiceWithOptionConfig(config *option.LLMServiceConfig) (ILLMService, error) {
+	// Get model configs for each component
+	plannerModelConfig, err := GetModelConfig(config.PlannerModel)
 	if err != nil {
 		return nil, err
 	}
 
-	planner, err := NewPlanner(context.Background(), modelConfig)
+	asserterModelConfig, err := GetModelConfig(config.AsserterModel)
 	if err != nil {
 		return nil, err
 	}
-	asserter, err := NewAsserter(context.Background(), modelConfig)
+
+	querierModelConfig, err := GetModelConfig(config.QuerierModel)
 	if err != nil {
 		return nil, err
 	}
-	querier, err := NewQuerier(context.Background(), modelConfig)
+
+	// Create components with their respective model configs
+	planner, err := NewPlanner(context.Background(), plannerModelConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	asserter, err := NewAsserter(context.Background(), asserterModelConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	querier, err := NewQuerier(context.Background(), querierModelConfig)
 	if err != nil {
 		return nil, err
 	}
diff --git a/uixt/ai/ai_test.go b/uixt/ai/ai_test.go
index 2d49f2c9..2035c047 100644
--- a/uixt/ai/ai_test.go
+++ b/uixt/ai/ai_test.go
@@ -140,3 +140,82 @@ func TestILLMServiceIntegration(t *testing.T) {
 		// which is more complex, so we skip it in this integration test
 	})
 }
+
+// TestLLMServiceConfig tests the LLM service configuration functionality
+func TestLLMServiceConfig(t *testing.T) {
+	t.Run("BasicConfiguration", func(t *testing.T) {
+		// Test creating config with same model for all components
+		modelType := option.DOUBAO_1_5_THINKING_VISION_PRO_250428
+		config := option.NewLLMServiceConfig(modelType)
+
+		assert.Equal(t, modelType, config.PlannerModel)
+		assert.Equal(t, modelType, config.AsserterModel)
+		assert.Equal(t, modelType, config.QuerierModel)
+	})
+
+	t.Run("MixedConfiguration", func(t *testing.T) {
+		// Test configuring different models for each component
+		config := option.NewLLMServiceConfig(option.DOUBAO_1_5_THINKING_VISION_PRO_250428).
+			WithPlannerModel(option.DOUBAO_1_5_UI_TARS_250328).
+			WithAsserterModel(option.OPENAI_GPT_4O).
+			WithQuerierModel(option.DEEPSEEK_R1_250528)
+
+		assert.Equal(t, option.DOUBAO_1_5_UI_TARS_250328, config.PlannerModel)
+		assert.Equal(t, option.OPENAI_GPT_4O, config.AsserterModel)
+		assert.Equal(t, option.DEEPSEEK_R1_250528, config.QuerierModel)
+	})
+
+	t.Run("RecommendedConfigurations", func(t *testing.T) {
+		configs := option.RecommendedConfigurations()
+
+		// Test mixed optimal configuration
+		mixedOptimal := configs["mixed_optimal"]
+		assert.NotNil(t, mixedOptimal)
+		assert.Equal(t, option.DOUBAO_1_5_UI_TARS_250328, mixedOptimal.PlannerModel)
+		assert.Equal(t, option.OPENAI_GPT_4O, mixedOptimal.AsserterModel)
+		assert.Equal(t, option.DEEPSEEK_R1_250528, mixedOptimal.QuerierModel)
+
+		// Test high performance configuration
+		highPerf := configs["high_performance"]
+		assert.NotNil(t, highPerf)
+		assert.Equal(t, option.OPENAI_GPT_4O, highPerf.PlannerModel)
+		assert.Equal(t, option.OPENAI_GPT_4O, highPerf.AsserterModel)
+		assert.Equal(t, option.OPENAI_GPT_4O, highPerf.QuerierModel)
+	})
+}
+
+// TestLLMServiceCreation tests service creation with different configurations
+func TestLLMServiceCreation(t *testing.T) {
+	t.Run("BackwardCompatibility", func(t *testing.T) {
+		// Test that the original NewLLMService function still works
+		modelType := option.DOUBAO_1_5_THINKING_VISION_PRO_250428
+		service, err := NewLLMService(modelType)
+
+		// We expect an error due to missing environment variables in test environment
+		// but the function signature should be correct
+		if err != nil {
+			assert.NotNil(t, err)
+			assert.Nil(t, service)
+		} else {
+			assert.NotNil(t, service)
+		}
+	})
+
+	t.Run("WithAdvancedConfig", func(t *testing.T) {
+		// Test the new API with different models for each component
+		config := option.NewLLMServiceConfig(option.DOUBAO_1_5_THINKING_VISION_PRO_250428).
+			WithPlannerModel(option.DOUBAO_1_5_UI_TARS_250328).
+			WithAsserterModel(option.OPENAI_GPT_4O)
+
+		service, err := NewLLMServiceWithOptionConfig(config)
+
+		// We expect an error due to missing environment variables in test environment
+		// but the function signature should be correct
+		if err != nil {
+			assert.NotNil(t, err)
+			assert.Nil(t, service)
+		} else {
+			assert.NotNil(t, service)
+		}
+	})
+}
diff --git a/uixt/option/ai.go b/uixt/option/ai.go
index ebbbcf8b..ce8c8265 100644
--- a/uixt/option/ai.go
+++ b/uixt/option/ai.go
@@ -11,6 +11,7 @@ func NewAIServiceOptions(opts ...AIServiceOption) *AIServiceOptions {
 type AIServiceOptions struct {
 	CVService  CVServiceType
 	LLMService LLMServiceType
+	LLMConfig  *LLMServiceConfig // New field for advanced LLM configuration
 }
 
 type AIServiceOption func(*AIServiceOptions)
@@ -48,3 +49,65 @@ func WithLLMService(modelType LLMServiceType) AIServiceOption {
 		opts.LLMService = modelType
 	}
 }
+
+// LLMServiceConfig defines configuration for different LLM service components
+type LLMServiceConfig struct {
+	PlannerModel  LLMServiceType `json:"planner_model"`  // Model type for planner component
+	AsserterModel LLMServiceType `json:"asserter_model"` // Model type for asserter component
+	QuerierModel  LLMServiceType `json:"querier_model"`  // Model type for querier component
+}
+
+// NewLLMServiceConfig creates a new LLMServiceConfig with the same model for all components
+func NewLLMServiceConfig(modelType LLMServiceType) *LLMServiceConfig {
+	return &LLMServiceConfig{
+		PlannerModel:  modelType,
+		AsserterModel: modelType,
+		QuerierModel:  modelType,
+	}
+}
+
+// WithPlannerModel sets the model type for planner component
+func (c *LLMServiceConfig) WithPlannerModel(modelType LLMServiceType) *LLMServiceConfig {
+	c.PlannerModel = modelType
+	return c
+}
+
+// WithAsserterModel sets the model type for asserter component
+func (c *LLMServiceConfig) WithAsserterModel(modelType LLMServiceType) *LLMServiceConfig {
+	c.AsserterModel = modelType
+	return c
+}
+
+// WithQuerierModel sets the model type for querier component
+func (c *LLMServiceConfig) WithQuerierModel(modelType LLMServiceType) *LLMServiceConfig {
+	c.QuerierModel = modelType
+	return c
+}
+
+// WithLLMConfig sets the advanced LLM configuration
+func WithLLMConfig(config *LLMServiceConfig) AIServiceOption {
+	return func(opts *AIServiceOptions) {
+		opts.LLMConfig = config
+	}
+}
+
+// RecommendedConfigurations provides some recommended model configurations for different use cases
+func RecommendedConfigurations() map[string]*LLMServiceConfig {
+	return map[string]*LLMServiceConfig{
+		"cost_effective": NewLLMServiceConfig(DOUBAO_1_5_THINKING_VISION_PRO_250428).
+			WithPlannerModel(DOUBAO_1_5_UI_TARS_250328).
+			WithAsserterModel(DOUBAO_1_5_THINKING_VISION_PRO_250428).
+			WithQuerierModel(DOUBAO_1_5_THINKING_VISION_PRO_250428),
+
+		"high_performance": NewLLMServiceConfig(OPENAI_GPT_4O),
+
+		"mixed_optimal": NewLLMServiceConfig(DOUBAO_1_5_THINKING_VISION_PRO_250428).
+			WithPlannerModel(DOUBAO_1_5_UI_TARS_250328). // Best for UI understanding
+			WithAsserterModel(OPENAI_GPT_4O).            // Best for reasoning
+			WithQuerierModel(DEEPSEEK_R1_250528),        // Cost-effective for queries
+
+		"ui_focused": NewLLMServiceConfig(DOUBAO_1_5_UI_TARS_250328),
+
+		"reasoning_focused": NewLLMServiceConfig(DOUBAO_1_5_THINKING_VISION_PRO_250428),
+	}
+}
diff --git a/uixt/sdk.go b/uixt/sdk.go
index 50e1148d..0e763ce7 100644
--- a/uixt/sdk.go
+++ b/uixt/sdk.go
@@ -33,13 +33,24 @@ func NewXTDriver(driver IDriver, opts ...option.AIServiceOption) (*XTDriver, err
 			return nil, err
 		}
 	}
-	if services.LLMService != "" {
+
+	// Handle LLM service initialization
+	if services.LLMConfig != nil {
+		// Use advanced LLM configuration if provided
+		driverExt.LLMService, err = ai.NewLLMServiceWithOptionConfig(services.LLMConfig)
+		if err != nil {
+			return nil, errors.Wrap(err, "init llm service with config failed")
+		}
+	} else if services.LLMService != "" {
+		// Fallback to simple LLM service if no config provided
 		driverExt.LLMService, err = ai.NewLLMService(services.LLMService)
 		if err != nil {
 			return nil, errors.Wrap(err, "init llm service failed")
 		}
+	}
 
-		// Register uixt MCP tools to LLM service
+	// Register uixt MCP tools to LLM service if it exists
+	if driverExt.LLMService != nil {
 		mcpTools := driverExt.client.Server.ListTools()
 		einoTools := ai.ConvertMCPToolsToEinoToolInfos(mcpTools, "uixt")
 		if err := driverExt.LLMService.RegisterTools(einoTools); err != nil {