feat: add model name display in AI actions and optimize HTML report

- Add ModelName field to PlanningResult and SubActionResult
- Update HTML report with improved layout and model name display
- Fix elapsed time setting bug and enhance mobile responsiveness
This commit is contained in:
lilong.129
2025-06-08 21:46:25 +08:00
parent 660e8ca124
commit 14cef72f5a
7 changed files with 609 additions and 118 deletions

View File

@@ -21,11 +21,13 @@ func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser {
switch modelType {
case option.DOUBAO_1_5_UI_TARS_250428:
return &UITARSContentParser{
modelType: modelType,
systemPrompt: doubao_1_5_ui_tars_planning_prompt,
actionMapping: doubao_1_5_ui_tars_action_mapping,
}
default:
return &JSONContentParser{
modelType: modelType,
systemPrompt: doubao_1_5_thinking_vision_pro_planning_prompt,
actionMapping: doubao_1_5_thinking_vision_pro_action_mapping,
}
@@ -34,6 +36,7 @@ func NewLLMContentParser(modelType option.LLMServiceType) LLMContentParser {
// JSONContentParser parses the response as JSON string format
type JSONContentParser struct {
modelType option.LLMServiceType
systemPrompt string
actionMapping map[string]option.ActionName
}
@@ -98,5 +101,6 @@ func (p *JSONContentParser) Parse(content string, size types.Size) (*PlanningRes
ToolCalls: toolCalls,
Thought: jsonResponse.Thought,
Content: content,
ModelName: string(p.modelType),
}, nil
}

View File

@@ -21,6 +21,7 @@ const (
// UITARSContentParser parses the Thought/Action format response
type UITARSContentParser struct {
modelType option.LLMServiceType
systemPrompt string
actionMapping map[string]option.ActionName
}
@@ -55,6 +56,7 @@ func (p *UITARSContentParser) Parse(content string, size types.Size) (*PlanningR
ToolCalls: toolCalls,
Thought: thought,
Content: content,
ModelName: string(p.modelType),
}, nil
}

View File

@@ -32,6 +32,7 @@ type PlanningResult struct {
Thought string `json:"thought"`
Content string `json:"content"` // original content from model
Error string `json:"error,omitempty"`
ModelName string `json:"model_name"` // model name used for planning
}
func NewPlanner(ctx context.Context, modelConfig *ModelConfig) (*Planner, error) {
@@ -132,6 +133,7 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
result := &PlanningResult{
ToolCalls: message.ToolCalls,
Thought: message.Content,
ModelName: string(p.modelConfig.ModelType),
}
return result, nil
}
@@ -140,8 +142,9 @@ func (p *Planner) Call(ctx context.Context, opts *PlanningOptions) (*PlanningRes
result, err := p.parser.Parse(message.Content, opts.Size)
if err != nil {
result = &PlanningResult{
Thought: message.Content,
Error: err.Error(),
Thought: message.Content,
Error: err.Error(),
ModelName: string(p.modelConfig.ModelType),
}
log.Debug().Str("reason", err.Error()).Msg("parse content to actions failed")
}

View File

@@ -35,6 +35,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}
// Plan next action with history reset on first attempt
planningStartTime := time.Now()
planningOpts := opts
if attempt == 1 {
// Add ResetHistory option for the first attempt
@@ -49,9 +50,12 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
continue
}
allSubActions = append(allSubActions, &SubActionResult{
ActionName: "plan_next_action",
Arguments: prompt,
Error: err,
ActionName: "plan_next_action",
Arguments: prompt,
Error: err,
StartTime: planningStartTime.Unix(),
Elapsed: time.Since(planningStartTime).Milliseconds(),
SessionData: dExt.GetSession().GetData(true),
})
return allSubActions, err
}
@@ -59,6 +63,17 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
// Check if task is finished BEFORE executing actions
if dExt.isTaskFinished(result) {
log.Info().Msg("task finished, stopping StartToGoal")
// Create a sub-action result to record the planning result even when task is finished
subActionResult := &SubActionResult{
ActionName: "plan_next_action",
Arguments: prompt,
StartTime: planningStartTime.Unix(),
Elapsed: time.Since(planningStartTime).Milliseconds(),
Thought: result.Thought,
ModelName: result.ModelName,
SessionData: dExt.GetSession().GetData(true),
}
allSubActions = append(allSubActions, subActionResult)
return allSubActions, nil
}
@@ -79,6 +94,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
Arguments: toolCall.Function.Arguments,
StartTime: subActionStartTime.Unix(),
Thought: result.Thought,
ModelName: result.ModelName,
}
if err := dExt.invokeToolCall(ctx, toolCall); err != nil {
@@ -86,6 +102,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
allSubActions = append(allSubActions, subActionResult)
return allSubActions, err
}
subActionResult.Elapsed = time.Since(subActionStartTime).Milliseconds()
// Collect sub-action specific attachments and reset session data
subActionResult.SessionData = dExt.GetSession().GetData(true) // reset after getting data
@@ -221,12 +238,13 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
// SubActionResult represents a sub-action within a start_to_goal action
type SubActionResult struct {
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
StartTime int64 `json:"start_time"` // sub-action start time
Elapsed int64 `json:"elapsed_ms"` // sub-action elapsed time(ms)
Error error `json:"error,omitempty"` // sub-action execution result
Thought string `json:"thought,omitempty"` // sub-action thought
ActionName string `json:"action_name"` // name of the sub-action (e.g., "tap", "input")
Arguments interface{} `json:"arguments,omitempty"` // arguments passed to the sub-action
StartTime int64 `json:"start_time"` // sub-action start time
Elapsed int64 `json:"elapsed_ms"` // sub-action elapsed time(ms)
Error error `json:"error,omitempty"` // sub-action execution result
Thought string `json:"thought,omitempty"` // sub-action thought
ModelName string `json:"model_name,omitempty"` // model name used for AI actions
SessionData
}