diff --git a/internal/version/VERSION b/internal/version/VERSION index c1ba7e00..69e16753 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2506090029 +v5.0.0-beta-2506091704 diff --git a/report.go b/report.go index 66cde1a1..0b70fd2e 100644 --- a/report.go +++ b/report.go @@ -305,9 +305,18 @@ func (g *HTMLReportGenerator) calculateTotalSubActions() int { for _, step := range testCase.Records { if step.Actions != nil { for _, action := range step.Actions { + // Count sub-actions from regular actions if action.SubActions != nil { total += len(action.SubActions) } + // Count sub-actions from planning results + if action.Plannings != nil { + for _, planning := range action.Plannings { + if planning.SubActions != nil { + total += len(planning.SubActions) + } + } + } } } } @@ -315,8 +324,8 @@ func (g *HTMLReportGenerator) calculateTotalSubActions() int { return total } -// calculateTotalRequests calculates the total number of requests across all test cases -func (g *HTMLReportGenerator) calculateTotalRequests() int { +// calculateTotalPlannings calculates the total number of planning results across all test cases +func (g *HTMLReportGenerator) calculateTotalPlannings() int { total := 0 if g.SummaryData == nil || g.SummaryData.Details == nil { return total @@ -329,12 +338,8 @@ func (g *HTMLReportGenerator) calculateTotalRequests() int { for _, step := range testCase.Records { if step.Actions != nil { for _, action := range step.Actions { - if action.SubActions != nil { - for _, subAction := range action.SubActions { - if subAction.Requests != nil { - total += len(subAction.Requests) - } - } + if action.Plannings != nil { + total += len(action.Plannings) } } } @@ -343,11 +348,16 @@ func (g *HTMLReportGenerator) calculateTotalRequests() int { return total } -// calculateTotalScreenshots calculates the total number of screenshots across all test cases -func (g *HTMLReportGenerator) calculateTotalScreenshots() int { - total := 0 +// calculateTotalUsage calculates the total token usage across all test cases +func (g *HTMLReportGenerator) calculateTotalUsage() map[string]interface{} { + totalUsage := map[string]interface{}{ + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + } + if g.SummaryData == nil || g.SummaryData.Details == nil { - return total + return totalUsage } for _, testCase := range g.SummaryData.Details { @@ -355,31 +365,25 @@ func (g *HTMLReportGenerator) calculateTotalScreenshots() int { continue } for _, step := range testCase.Records { - // Count screenshots in actions - if step.Actions != nil { - for _, action := range step.Actions { - if action.SubActions != nil { - for _, subAction := range action.SubActions { - if subAction.ScreenResults != nil { - total += len(subAction.ScreenResults) - } - } - } - } + if step.Actions == nil { + continue } - // Count screenshots in attachments - if step.Attachments != nil { - if attachments, ok := step.Attachments.(map[string]any); ok { - if screenResults, exists := attachments["screen_results"]; exists { - if screenResultsSlice, ok := screenResults.([]any); ok { - total += len(screenResultsSlice) - } + for _, action := range step.Actions { + if action.Plannings == nil { + continue + } + for _, planning := range action.Plannings { + if planning.Usage == nil { + continue } + totalUsage["prompt_tokens"] = totalUsage["prompt_tokens"].(int) + planning.Usage.PromptTokens + totalUsage["completion_tokens"] = totalUsage["completion_tokens"].(int) + planning.Usage.CompletionTokens + totalUsage["total_tokens"] = totalUsage["total_tokens"].(int) + planning.Usage.TotalTokens } } } } - return total + return totalUsage } // GenerateReport generates the complete HTML test report @@ -390,13 +394,13 @@ func (g *HTMLReportGenerator) GenerateReport(outputFile string) error { // Create template functions funcMap := template.FuncMap{ - "formatDuration": g.formatDuration, - "encodeImageBase64": g.encodeImageToBase64, - "getStepLogs": g.getStepLogsForTemplate, - "calculateTotalActions": g.calculateTotalActions, - "calculateTotalSubActions": g.calculateTotalSubActions, - "calculateTotalRequests": g.calculateTotalRequests, - "calculateTotalScreenshots": g.calculateTotalScreenshots, + "formatDuration": g.formatDuration, + "encodeImageBase64": g.encodeImageToBase64, + "getStepLogs": g.getStepLogsForTemplate, + "calculateTotalActions": g.calculateTotalActions, + "calculateTotalSubActions": g.calculateTotalSubActions, + "calculateTotalPlannings": g.calculateTotalPlannings, + "calculateTotalUsage": g.calculateTotalUsage, "getSummaryContentBase64": func() string { return base64.StdEncoding.EncodeToString([]byte(g.SummaryContent)) }, @@ -502,19 +506,26 @@ const htmlTemplate = ` gap: 15px; } - .start-time { + .download-section { background: rgba(255, 255, 255, 0.2); - padding: 12px 20px; + padding: 15px 20px; border-radius: 8px; backdrop-filter: blur(10px); - min-width: 200px; + min-width: 240px; + text-align: center; + } + + .download-title { + font-size: 0.9em; + font-weight: 600; + margin-bottom: 10px; + opacity: 0.9; } .download-buttons { display: flex; gap: 10px; width: 100%; - max-width: 240px; } .download-btn { @@ -548,19 +559,6 @@ const htmlTemplate = ` transform: translateY(0); } - .time-label { - display: block; - font-size: 0.9em; - opacity: 0.8; - margin-bottom: 4px; - } - - .time-value { - display: block; - font-size: 1.1em; - font-weight: bold; - } - .summary { background: white; padding: 25px; @@ -690,70 +688,138 @@ const htmlTemplate = ` box-shadow: 0 2px 4px rgba(0,0,0,0.1); } + .test-cases { + margin-top: 20px; + } + + .test-case { + background: white; + margin-bottom: 40px; + border-radius: 15px; + box-shadow: 0 6px 12px rgba(0,0,0,0.1); + overflow: hidden; + border: 2px solid #e9ecef; + padding-bottom: 8px; + } + + .test-case h2 { + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + color: #495057; + margin: 0; + padding: 20px 30px; + font-size: 1.5em; + font-weight: 600; + border-bottom: 2px solid #dee2e6; + display: flex; + justify-content: space-between; + align-items: center; + } + + .case-info { + display: flex; + align-items: center; + gap: 15px; + } + .step-container { background: white; - margin-bottom: 20px; - border-radius: 10px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); + margin-bottom: 8px; + border-radius: 12px; + box-shadow: 0 4px 8px rgba(0,0,0,0.1); overflow: hidden; + border: 1px solid #e9ecef; + } + + .step-container:first-of-type { + margin-top: 8px; } .step-header { - background: #f8f9fa; - padding: 20px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + padding: 25px 30px; cursor: pointer; - border-bottom: 1px solid #dee2e6; - transition: background-color 0.3s; + border-bottom: 2px solid #dee2e6; + transition: all 0.3s ease; } .step-header:hover { - background: #e9ecef; + background: linear-gradient(135deg, #e9ecef 0%, #dee2e6 100%); + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .step-header h3 { display: flex; align-items: center; - gap: 15px; + gap: 20px; margin: 0; font-size: 1.3em; + font-weight: 500; + } + + .step-info-group { + margin-left: auto; + display: flex; + align-items: center; + gap: 12px; + min-width: 300px; + justify-content: flex-end; + } + + .step-status { + min-width: 70px; + text-align: center; + } + + .step-duration { + min-width: 80px; + text-align: center; + } + + .step-type-fixed { + min-width: 120px; + text-align: center; } .step-number { - background: #007bff; + background: linear-gradient(135deg, #007bff 0%, #0056b3 100%); color: white; - width: 30px; - height: 30px; + width: 36px; + height: 36px; border-radius: 50%; display: flex; align-items: center; justify-content: center; - font-size: 0.9em; + font-size: 1.0em; font-weight: bold; + box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3); } .status-badge { - padding: 5px 12px; + padding: 6px 14px; border-radius: 20px; - font-size: 0.8em; + font-size: 0.85em; font-weight: bold; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .status-badge.success { - background: #28a745; + background: linear-gradient(135deg, #28a745 0%, #20c997 100%); color: white; } .status-badge.failure { - background: #dc3545; + background: linear-gradient(135deg, #dc3545 0%, #c82333 100%); color: white; } .duration { - background: #6c757d; + background: linear-gradient(135deg, #6c757d 0%, #5a6268 100%); color: white; - padding: 3px 8px; + padding: 4px 10px; border-radius: 12px; font-size: 0.8em; + box-shadow: 0 1px 3px rgba(0,0,0,0.2); } .toggle-icon { @@ -766,22 +832,20 @@ const htmlTemplate = ` transform: rotate(-90deg); } - .step-meta { - margin-top: 10px; - color: #6c757d; - } - .step-type { - background: #17a2b8; + background: linear-gradient(135deg, #17a2b8 0%, #138496 100%); color: white; - padding: 2px 8px; - border-radius: 10px; + padding: 3px 10px; + border-radius: 12px; font-size: 0.8em; + box-shadow: 0 1px 3px rgba(0,0,0,0.2); } .step-content { - padding: 25px; + padding: 30px; display: none; + background: #fafbfc; + border-top: 1px solid #e9ecef; } .step-content.show { @@ -789,41 +853,73 @@ const htmlTemplate = ` } .actions-section, .validators-section, .screenshots-section, .logs-section { - margin-bottom: 25px; + margin-bottom: 30px; } .actions-section h4, .validators-section h4, .screenshots-section h4, .logs-section h4 { color: #495057; - margin-bottom: 15px; - padding-bottom: 8px; - border-bottom: 1px solid #dee2e6; + margin-bottom: 20px; + padding-bottom: 12px; + border-bottom: 2px solid #dee2e6; + font-size: 1.2em; + font-weight: 600; } .action-item { - background: #f8f9fa; - border: 1px solid #dee2e6; - border-radius: 8px; - padding: 15px; - margin-bottom: 15px; + background: white; + border: 2px solid #e9ecef; + border-radius: 12px; + padding: 20px; + margin-bottom: 20px; + box-shadow: 0 2px 6px rgba(0,0,0,0.08); + transition: all 0.3s ease; + } + + .action-item:hover { + border-color: #007bff; + box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15); + transform: translateY(-1px); } .action-header { display: flex; align-items: center; - gap: 15px; - margin-bottom: 10px; + gap: 18px; + margin-bottom: 15px; cursor: pointer; - transition: background-color 0.3s; - padding: 8px; - border-radius: 6px; + transition: all 0.3s ease; + padding: 12px 15px; + border-radius: 8px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; } .action-header:hover { - background-color: rgba(0, 123, 255, 0.1); + background: linear-gradient(135deg, rgba(0, 123, 255, 0.1) 0%, rgba(0, 123, 255, 0.05) 100%); + border-color: #007bff; + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 123, 255, 0.2); } .action-header strong { color: #007bff; + font-size: 1.1em; + font-weight: 600; + } + + .action-description { + color: #6c757d; + font-style: italic; + margin: 10px 0; + padding: 10px 15px; + white-space: pre-wrap; + word-wrap: break-word; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + background: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + font-size: 0.9em; + line-height: 1.4; } .action-toggle { @@ -849,26 +945,196 @@ const htmlTemplate = ` display: block; } - .action-params { - color: #6c757d; - font-style: italic; - margin-bottom: 10px; - white-space: pre-wrap; - word-wrap: break-word; - font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; - background: #f8f9fa; - border: 1px solid #dee2e6; - border-radius: 4px; - padding: 10px; - font-size: 0.9em; - line-height: 1.4; - } - .error { color: #dc3545; font-weight: bold; } + .planning-results { + margin-top: 15px; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + } + + .planning-item { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + padding: 15px; + margin-bottom: 15px; + } + + .planning-item:last-child { + margin-bottom: 0; + } + + .planning-header { + display: flex; + align-items: center; + gap: 15px; + margin-bottom: 15px; + padding-bottom: 10px; + border-bottom: 1px solid #dee2e6; + } + + .planning-label { + background: #007bff; + color: white; + padding: 4px 12px; + border-radius: 15px; + font-size: 0.9em; + font-weight: bold; + } + + .plan-next-action { + margin: 15px 0; + padding: 15px; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 2px solid #dee2e6; + border-radius: 12px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); + } + + .plan-next-action h5 { + color: #495057; + margin-bottom: 10px; + font-size: 1.0em; + font-weight: 600; + } + + .planning-two-columns { + display: flex; + gap: 20px; + margin: 15px 0; + } + + .planning-column-left, .planning-column-right { + flex: 1; + min-width: 0; + } + + .planning-step-compact { + background: white; + border: 1px solid #dee2e6; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + } + + .step-header-compact { + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + padding: 10px 12px; + border-bottom: 1px solid #dee2e6; + border-radius: 8px 8px 0 0; + display: flex; + align-items: center; + justify-content: space-between; + } + + .step-name { + font-weight: 600; + color: #495057; + font-size: 0.9em; + } + + .screenshot-display { + padding: 12px; + } + + .screenshot-item-compact { + text-align: center; + } + + .screenshot-item-compact .screenshot-image { + min-height: 200px; + padding: 10px 0; + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border-radius: 6px; + display: flex; + justify-content: center; + align-items: center; + } + + .screenshot-item-compact .screenshot-image img { + max-width: 100%; + max-height: 180px; + border-radius: 4px; + cursor: pointer; + transition: transform 0.2s; + object-fit: contain; + box-shadow: 0 2px 6px rgba(0,0,0,0.1); + } + + .screenshot-item-compact .screenshot-image img:hover { + transform: scale(1.02); + } + + .model-output-compact { + padding: 12px; + } + + .model-info, .tool-calls-info, .actions-info, .usage-info { + background: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 4px; + padding: 8px 10px; + margin: 6px 0; + font-size: 0.85em; + color: #495057; + } + + @media screen and (max-width: 768px) { + .planning-two-columns { + flex-direction: column; + gap: 15px; + } + } + + .raw-content { + margin-top: 10px; + } + + .raw-content pre { + background: #f1f3f4; + border: 1px solid #dadce0; + border-radius: 4px; + padding: 8px; + font-size: 0.8em; + max-height: 150px; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + } + + .step-screenshots { + margin-top: 10px; + } + + .action-details { + display: flex; + align-items: center; + gap: 10px; + } + + .action-details .action-name { + background: #6f42c1; + color: white; + padding: 2px 8px; + border-radius: 12px; + font-size: 0.8em; + font-weight: bold; + } + + .action-details .action-desc { + color: #6c757d; + font-style: italic; + font-size: 0.9em; + } + + + .sub-actions { margin-top: 15px; padding-left: 20px; @@ -1449,7 +1715,7 @@ const htmlTemplate = ` gap: 15px; } - .start-time { + .download-section { width: 100%; text-align: center; min-width: auto; @@ -1458,7 +1724,6 @@ const htmlTemplate = ` .download-buttons { justify-content: center; width: 100%; - max-width: 300px; } .download-btn { @@ -1489,14 +1754,56 @@ const htmlTemplate = ` } .step-header h3 { - font-size: 1.1em; - gap: 10px; + font-size: 1.2em; + gap: 15px; + flex-direction: column; + align-items: flex-start; + } + + .step-info-group { + min-width: auto; + width: 100%; + justify-content: space-between; + margin-left: 0; + margin-top: 8px; + } + + .step-status { + min-width: 60px; + } + + .step-duration { + min-width: 70px; + } + + .step-type-fixed { + min-width: 100px; } .step-number { - width: 25px; - height: 25px; - font-size: 0.8em; + width: 32px; + height: 32px; + font-size: 0.9em; + } + + .test-case h2 { + font-size: 1.3em; + padding: 15px 20px; + flex-direction: column; + align-items: flex-start; + gap: 10px; + } + + .case-info { + align-self: flex-end; + } + + .step-header { + padding: 20px 25px; + } + + .step-content { + padding: 25px 20px; } .action-header { @@ -1585,6 +1892,34 @@ const htmlTemplate = ` margin: 0; } } + + .action-content { + margin-top: 10px; + } + + .action-content strong { + color: #6f42c1; + display: block; + margin-bottom: 8px; + font-size: 0.95em; + } + + .action-output { + background: #f8f9fa; + border: 2px solid #6f42c1; + border-radius: 6px; + padding: 10px; + font-size: 0.85em; + max-height: 120px; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + color: #495057; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + line-height: 1.4; + } + +
@@ -1593,22 +1928,21 @@ const htmlTemplate = `