Merge branch 'dev' into 'master'

优化 html report 截图展示,支持展示标注截图

See merge request iesqa/httprunner!110
This commit is contained in:
李隆
2025-06-29 16:23:19 +00:00
10 changed files with 280 additions and 146 deletions

2
go.mod
View File

@@ -7,6 +7,7 @@ toolchain go1.23.7
require ( require (
github.com/Masterminds/semver v1.5.0 github.com/Masterminds/semver v1.5.0
github.com/andybalholm/brotli v1.0.4 github.com/andybalholm/brotli v1.0.4
github.com/antchfx/xmlquery v1.4.4
github.com/bytedance/sonic v1.13.2 github.com/bytedance/sonic v1.13.2
github.com/charmbracelet/glamour v0.8.0 github.com/charmbracelet/glamour v0.8.0
github.com/charmbracelet/huh v0.3.0 github.com/charmbracelet/huh v0.3.0
@@ -47,7 +48,6 @@ require (
require ( require (
github.com/alecthomas/chroma/v2 v2.14.0 // indirect github.com/alecthomas/chroma/v2 v2.14.0 // indirect
github.com/antchfx/xmlquery v1.4.4 // indirect
github.com/antchfx/xpath v1.3.3 // indirect github.com/antchfx/xpath v1.3.3 // indirect
github.com/atotto/clipboard v0.1.4 // indirect github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect

View File

@@ -1 +1 @@
v5.0.0-250628 v5.0.0-250630

168
report.go
View File

@@ -1212,7 +1212,7 @@ const htmlTemplate = `<!DOCTYPE html>
.screenshot-item-compact .screenshot-image img { .screenshot-item-compact .screenshot-image img {
width: 100%; width: 100%;
height: auto; height: auto;
max-height: 400px; max-height: 500px;
border-radius: 4px; border-radius: 4px;
cursor: pointer; cursor: pointer;
transition: transform 0.2s; transition: transform 0.2s;
@@ -1232,6 +1232,113 @@ const htmlTemplate = `<!DOCTYPE html>
transform: scale(1.02); transform: scale(1.02);
} }
/* Horizontal scrolling screenshot styles */
.screenshot-horizontal-scroll {
display: flex;
gap: 0 !important;
overflow-x: auto;
overflow-y: hidden;
padding: 8px;
scroll-behavior: smooth;
-webkit-overflow-scrolling: touch;
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
border: 1px solid #dee2e6;
border-radius: 6px;
align-items: center;
justify-content: center;
line-height: 0;
font-size: 0;
}
.screenshot-horizontal-scroll::-webkit-scrollbar {
height: 8px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-track {
background: #f1f1f1;
border-radius: 4px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-thumb {
background: #888;
border-radius: 4px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-thumb:hover {
background: #555;
}
.screenshot-item-horizontal {
flex: 0 0 auto;
min-width: 180px;
max-width: 280px;
text-align: center;
margin: 0 !important;
padding: 0 !important;
border: none !important;
outline: none;
line-height: 0;
}
.screenshot-item-horizontal .screenshot-image {
padding: 0;
margin: 0;
background: transparent;
border-radius: 0;
display: flex;
justify-content: center;
align-items: center;
position: relative;
overflow: hidden;
height: 350px;
border: none;
}
.screenshot-item-horizontal .screenshot-image img {
max-width: 100%;
max-height: 100%;
border-radius: 0;
cursor: pointer;
transition: transform 0.2s;
object-fit: contain;
box-shadow: none;
display: block;
margin: 0 !important;
padding: 0 !important;
border: none !important;
vertical-align: top;
float: left;
outline: none;
}
.screenshot-item-horizontal .screenshot-image img:hover {
transform: scale(1.05);
}
/* Direct inline screenshot styles */
.screenshot-inline {
max-height: 350px;
object-fit: contain;
cursor: pointer;
transition: transform 0.2s;
display: inline-block;
margin: 0 4px 0 0 !important;
padding: 0 !important;
border: none !important;
border-radius: 0 !important;
box-shadow: none !important;
vertical-align: top;
outline: none;
}
.screenshot-inline:last-child {
margin-right: 0 !important;
}
.screenshot-inline:hover {
transform: scale(1.05);
}
.actions-details { .actions-details {
padding: 12px; padding: 12px;
max-height: 300px; max-height: 300px;
@@ -1538,12 +1645,12 @@ const htmlTemplate = `<!DOCTYPE html>
} }
.screenshots-horizontal .screenshot-image { .screenshots-horizontal .screenshot-image {
min-height: 200px; min-height: 300px;
padding: 10px 0; padding: 10px 0;
} }
.screenshots-horizontal .screenshot-image img { .screenshots-horizontal .screenshot-image img {
max-height: 250px; max-height: 400px;
width: auto; width: auto;
} }
@@ -1592,7 +1699,7 @@ const htmlTemplate = `<!DOCTYPE html>
display: flex; display: flex;
justify-content: center; justify-content: center;
align-items: center; align-items: center;
min-height: 300px; min-height: 400px;
padding: 20px 0; padding: 20px 0;
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
border-radius: 8px; border-radius: 8px;
@@ -1601,7 +1708,7 @@ const htmlTemplate = `<!DOCTYPE html>
.screenshot-image img { .screenshot-image img {
max-width: 100%; max-width: 100%;
max-height: 400px; max-height: 600px;
border-radius: 6px; border-radius: 6px;
cursor: pointer; cursor: pointer;
transition: transform 0.2s; transition: transform 0.2s;
@@ -1614,12 +1721,12 @@ const htmlTemplate = `<!DOCTYPE html>
} }
.screenshot-item.small .screenshot-image { .screenshot-item.small .screenshot-image {
min-height: 250px; min-height: 300px;
padding: 15px 0; padding: 15px 0;
} }
.screenshot-item.small .screenshot-image img { .screenshot-item.small .screenshot-image img {
max-height: 200px; max-height: 350px;
} }
.validator-item { .validator-item {
@@ -2286,21 +2393,21 @@ const htmlTemplate = `<!DOCTYPE html>
} }
.screenshot-image { .screenshot-image {
min-height: 250px; min-height: 300px;
padding: 15px 0; padding: 15px 0;
} }
.screenshot-image img { .screenshot-image img {
max-height: 250px; max-height: 400px;
} }
.screenshot-item.small .screenshot-image { .screenshot-item.small .screenshot-image {
min-height: 200px; min-height: 250px;
padding: 10px 0; padding: 10px 0;
} }
.screenshot-item.small .screenshot-image img { .screenshot-item.small .screenshot-image img {
max-height: 150px; max-height: 300px;
} }
.log-header { .log-header {
@@ -2539,22 +2646,33 @@ const htmlTemplate = `<!DOCTYPE html>
<div class="planning-column-screenshot"> <div class="planning-column-screenshot">
<div class="planning-step-compact"> <div class="planning-step-compact">
<div class="step-header-compact"> <div class="step-header-compact">
<span class="step-name">📸 Take Screenshot</span> <span class="step-name">📸 ScreenShots</span>
<span class="duration">{{formatDuration $planning.ScreenshotElapsed}}</span> <span class="duration">{{formatDuration $planning.ScreenshotElapsed}}</span>
</div> </div>
{{if $planning.ScreenResult}} <div class="screenshot-display screenshot-horizontal-scroll">
<div class="screenshot-display"> {{if $planning.ScreenResult}}
{{$screenshot := $planning.ScreenResult}} {{if $planning.ScreenResult.ImagePath}}
{{$base64Image := encodeImageBase64 $screenshot.ImagePath}} {{$base64Image := encodeImageBase64 $planning.ScreenResult.ImagePath}}
{{if $base64Image}} {{if $base64Image}}
<div class="screenshot-item-compact"> <img src="data:image/jpeg;base64,{{$base64Image}}" alt="Planning Screenshot" onclick="openImageModal(this.src)" class="screenshot-inline" />
<div class="screenshot-image"> {{end}}
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Planning Screenshot" onclick="openImageModal(this.src)" /> {{end}}
</div> {{end}}
</div> {{if $planning.SubActions}}
{{range $subAction := $planning.SubActions}}
{{if $subAction.ScreenResults}}
{{range $subScreenshot := $subAction.ScreenResults}}
{{if $subScreenshot.ImagePath}}
{{$base64Image := encodeImageBase64 $subScreenshot.ImagePath}}
{{if $base64Image}}
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Sub-action Screenshot" onclick="openImageModal(this.src)" class="screenshot-inline" />
{{end}}
{{end}}
{{end}}
{{end}}
{{end}}
{{end}} {{end}}
</div> </div>
{{end}}
</div> </div>
</div> </div>
@@ -2837,13 +2955,13 @@ const htmlTemplate = `<!DOCTYPE html>
</div> </div>
{{end}} {{end}}
<!-- Screenshots --> <!-- ScreenShots -->
{{if $step.Attachments}} {{if $step.Attachments}}
{{$attachments := $step.Attachments}} {{$attachments := $step.Attachments}}
{{if eq (printf "%T" $attachments) "map[string]interface {}"}} {{if eq (printf "%T" $attachments) "map[string]interface {}"}}
{{if index $attachments "screen_results"}} {{if index $attachments "screen_results"}}
<div class="screenshots-section"> <div class="screenshots-section">
<h4>Screenshots</h4> <h4>Attachment ScreenShots</h4>
<div class="screenshots-horizontal"> <div class="screenshots-horizontal">
{{range $screenshot := index $attachments "screen_results"}} {{range $screenshot := index $attachments "screen_results"}}
{{$imagePath := ""}} {{$imagePath := ""}}

View File

@@ -622,9 +622,13 @@ func (ad *ADBDriver) tapByTextUsingHierarchy(hierarchy *Hierarchy, text string,
func (ud *ADBDriver) TapByXpath(xpath string, opts ...option.ActionOption) (err error) { func (ud *ADBDriver) TapByXpath(xpath string, opts ...option.ActionOption) (err error) {
source, err := ud.Source() source, err := ud.Source()
if err != nil {
log.Error().Err(err).Msg("failed to get source")
return err
}
doc, err := xmlquery.Parse(strings.NewReader(source)) doc, err := xmlquery.Parse(strings.NewReader(source))
if err != nil { if err != nil {
log.Error().Err(err).Str("serial", ud.Device.Serial()) log.Error().Err(err).Msg("failed to parse source")
return err return err
} }
targetNodes := xmlquery.Find(doc, xpath) targetNodes := xmlquery.Find(doc, xpath)
@@ -644,10 +648,12 @@ func (ud *ADBDriver) TapByXpath(xpath string, opts ...option.ActionOption) (err
centerX := float64(x1+x2) / 2 centerX := float64(x1+x2) / 2
centerY := float64(y1+y2) / 2 centerY := float64(y1+y2) / 2
log.Info().Str("serial", ud.Device.Serial()).Str("xpath", xpath).Str("bounds", bounds).Msg("find node by xpath success") log.Info().Str("xpath", xpath).Str("bounds", bounds).Msg("find node by xpath success")
return ud.TapAbsXY(centerX, centerY, opts...) return ud.TapAbsXY(centerX, centerY, opts...)
} }
return
log.Error().Str("xpath", xpath).Msg("failed to find node by xpath")
return errors.New("failed to find node by xpath")
} }
func (ad *ADBDriver) searchNodes(nodes []Layout, text string, opts ...option.ActionOption) []Bounds { func (ad *ADBDriver) searchNodes(nodes []Layout, text string, opts ...option.ActionOption) []Bounds {
@@ -756,27 +762,44 @@ func (ad *ADBDriver) GetSession() *DriverSession {
} }
func (ad *ADBDriver) ForegroundInfo() (app types.AppInfo, err error) { func (ad *ADBDriver) ForegroundInfo() (app types.AppInfo, err error) {
packageInfo, err := ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite", "app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null") // Get foreground app package info using evalite service
packageInfo, err := ad.getForegroundPackageInfo()
if err != nil { if err != nil {
packageInfo, err = ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite", "app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null") log.Error().Err(err).Msg("failed to get foreground app info")
if err != nil { return app, err
log.Error().Err(err).Str("serial", ad.Device.Serial()).Msg("failed to get foreground app")
return app, err
}
} }
log.Info().Str("serial", ad.Device.Serial()).Msg("foreground app output: " + packageInfo)
if strings.TrimSpace(packageInfo) == "" { // Parse package info JSON
log.Error().Str("serial", ad.Device.Serial()).Msg("foreground app output is empty") packageInfo = strings.TrimSpace(packageInfo)
return app, errors.New("foreground app output is empty") if packageInfo == "" {
err = errors.New("foreground app output is empty")
log.Error().Err(err).Msg("get foreground app info failed")
return app, err
} }
err = json.Unmarshal([]byte(strings.TrimSpace(packageInfo)), &app) if err = json.Unmarshal([]byte(packageInfo), &app); err != nil {
if err != nil { log.Error().Err(err).Str("packageInfo", packageInfo).Msg("failed to parse package info")
log.Error().Err(err).Str("serial", ad.Device.Serial()).Str("packageInfo", packageInfo).Msg("failed to parse package info")
return app, err return app, err
} }
return app, nil return app, nil
} }
// getForegroundPackageInfo executes the evalite service command to get foreground app info
func (ad *ADBDriver) getForegroundPackageInfo() (string, error) {
const maxRetries = 2
var lastErr error
for i := 0; i < maxRetries; i++ {
packageInfo, err := ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite",
"app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null")
if err == nil {
return packageInfo, nil
}
lastErr = err
log.Warn().Err(err).Int("attempt", i+1).Msg("failed to get foreground package info, retrying")
}
return "", lastErr
}
func (ad *ADBDriver) SetIme(imeRegx string) error { func (ad *ADBDriver) SetIme(imeRegx string) error {
log.Info().Str("imeRegx", imeRegx).Msg("ADBDriver.SetIme") log.Info().Str("imeRegx", imeRegx).Msg("ADBDriver.SetIme")
imeList := ad.ListIme() imeList := ad.ListIme()

View File

@@ -6,8 +6,11 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/httprunner/httprunner/v5/uixt/option" "github.com/pkg/errors"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/uixt/option"
) )
// CacheManager provides a generic cache management interface // CacheManager provides a generic cache management interface
@@ -153,7 +156,7 @@ func (cm *CacheManager[T]) GetOrCreate(key string, factory func() (T, map[string
item, metadata, err := factory() item, metadata, err := factory()
if err != nil { if err != nil {
var zero T var zero T
return zero, fmt.Errorf("failed to create item: %w", err) return zero, err
} }
// Store in cache // Store in cache
@@ -271,7 +274,7 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
browserOpts := config.DeviceOpts.ToBrowserOptions().Options() browserOpts := config.DeviceOpts.ToBrowserOptions().Options()
device, err = NewBrowserDevice(browserOpts...) device, err = NewBrowserDevice(browserOpts...)
default: default:
return nil, fmt.Errorf("unsupported platform: %s", platform) return nil, errors.Wrapf(code.InvalidParamError, "unsupported platform: %s", platform)
} }
} else { } else {
// Use default options, let NewXXDevice handle serial (empty or specified) // Use default options, let NewXXDevice handle serial (empty or specified)
@@ -301,17 +304,17 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
device, err = NewBrowserDevice() device, err = NewBrowserDevice()
} }
default: default:
return nil, fmt.Errorf("unsupported platform: %s", platform) return nil, errors.Wrapf(code.InvalidParamError, "unsupported platform: %s", platform)
} }
} }
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create device: %w", err) return nil, err
} }
// Create driver // Create driver
driver, err := device.NewDriver() driver, err := device.NewDriver()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create driver: %w", err) return nil, errors.Wrap(err, "failed to create driver")
} }
// Create XTDriver with AI options // Create XTDriver with AI options
@@ -326,7 +329,7 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
driverExt, err := NewXTDriver(driver, aiOpts...) driverExt, err := NewXTDriver(driver, aiOpts...)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create XTDriver: %w", err) return nil, errors.Wrap(err, "failed to create XTDriver")
} }
return driverExt, nil return driverExt, nil
} }

View File

@@ -10,7 +10,6 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code" "github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/internal/json" "github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/option"
@@ -115,7 +114,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}() }()
// Execute the tool call // Execute the tool call
if err := dExt.invokeToolCall(ctx, toolCall); err != nil { if err := dExt.invokeToolCall(ctx, toolCall, opts...); err != nil {
subActionResult.Error = err subActionResult.Error = err
return err return err
} }
@@ -136,6 +135,9 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
if options.MaxRetryTimes > 0 && attempt > options.MaxRetryTimes { if options.MaxRetryTimes > 0 && attempt > options.MaxRetryTimes {
return allPlannings, errors.New("reached max retry times") return allPlannings, errors.New("reached max retry times")
} }
// wait 3 seconds for tool calls to complete
time.Sleep(3 * time.Second)
} }
} }
@@ -143,12 +145,11 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) { func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("prompt", prompt).Msg("performing AI action") log.Info().Str("prompt", prompt).Msg("performing AI action")
// Step 1: Take screenshot and measure time // Step 1: Take screenshot and convert to base64
screenshotStartTime := time.Now() screenResult, err := dExt.GetScreenResult(
screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName("ai_action"),
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), option.WithScreenShotBase64(true),
) )
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -160,7 +161,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
aiExecutionResult := &AIExecutionResult{ aiExecutionResult := &AIExecutionResult{
Type: "action", Type: "action",
ModelCallElapsed: modelCallElapsed, ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenshotElapsed, ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath, ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution, Resolution: &screenResult.Resolution,
PlanningResult: &planningResult.PlanningResult, PlanningResult: &planningResult.PlanningResult,
@@ -173,7 +174,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
// Step 3: Execute tool calls // Step 3: Execute tool calls
for _, toolCall := range planningResult.ToolCalls { for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall) err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil { if err != nil {
aiExecutionResult.Error = err.Error() aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "invoke tool call failed") return aiExecutionResult, errors.Wrap(err, "invoke tool call failed")
@@ -193,13 +194,11 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
options := option.NewActionOptions(opts...) options := option.NewActionOptions(opts...)
resetHistory := options.ResetHistory resetHistory := options.ResetHistory
// Step 1: Take screenshot // Step 1: Take screenshot and convert to base64
screenshotStartTime := time.Now() screenResult, err := dExt.GetScreenResult(
// Use GetScreenResult to handle screenshot capture, save, and session tracking option.WithScreenShotFileName("ai_planning"),
screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotBase64(true),
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
) )
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -208,12 +207,6 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
// The planning screenshot is already stored in planningResult.ScreenResult // The planning screenshot is already stored in planningResult.ScreenResult
dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions
// get screen shot buffer base64 and size
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
}
// Step 2: Call model // Step 2: Call model
modelCallStartTime := time.Now() modelCallStartTime := time.Now()
planningOpts := &ai.PlanningOptions{ planningOpts := &ai.PlanningOptions{
@@ -224,12 +217,12 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
{ {
Type: schema.ChatMessagePartTypeImageURL, Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{ ImageURL: &schema.ChatMessageImageURL{
URL: screenShotBase64, URL: screenResult.Base64,
}, },
}, },
}, },
}, },
Size: size, Size: screenResult.Resolution,
ResetHistory: resetHistory, ResetHistory: resetHistory,
} }
@@ -250,7 +243,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
planningResult := &PlanningExecutionResult{ planningResult := &PlanningExecutionResult{
PlanningResult: *result, // Inherit all fields from ai.PlanningResult PlanningResult: *result, // Inherit all fields from ai.PlanningResult
// Planning process timing and metadata // Planning process timing and metadata
ScreenshotElapsed: screenshotElapsed, ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath, ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution, Resolution: &screenResult.Resolution,
ScreenResult: screenResult, ScreenResult: screenResult,
@@ -286,7 +279,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
} }
// invokeToolCall invokes the tool call // invokeToolCall invokes the tool call
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall) error { func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments // Parse arguments
arguments := make(map[string]interface{}) arguments := make(map[string]interface{})
err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments) err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments)
@@ -294,6 +287,10 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return err return err
} }
// Merge StartToGoal options into tool call arguments
// This ensures options like PreMarkOperation are passed to specific tool implementations
extractActionOptionsToArguments(opts, arguments)
// Execute the action // Execute the action
req := mcp.CallToolRequest{ req := mcp.CallToolRequest{
Params: struct { Params: struct {
@@ -370,17 +367,11 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
return nil, errors.New("LLM service is not initialized") return nil, errors.New("LLM service is not initialized")
} }
// Step 1: Take screenshot and measure time // Step 1: Take screenshot and convert to base64
screenshotStartTime := time.Now() screenResult, err := dExt.GetScreenResult(
screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName("ai_query"),
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), option.WithScreenShotBase64(true),
) )
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -394,8 +385,8 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
// execute query // execute query
queryOpts := &ai.QueryOptions{ queryOpts := &ai.QueryOptions{
Query: text, Query: text,
Screenshot: screenShotBase64, Screenshot: screenResult.Base64,
Size: size, Size: screenResult.Resolution,
OutputSchema: actionOptions.OutputSchema, OutputSchema: actionOptions.OutputSchema,
} }
result, err := dExt.LLMService.Query(context.Background(), queryOpts) result, err := dExt.LLMService.Query(context.Background(), queryOpts)
@@ -408,7 +399,7 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
aiResult := &AIExecutionResult{ aiResult := &AIExecutionResult{
Type: "query", Type: "query",
ModelCallElapsed: modelCallElapsed, // model call timing ModelCallElapsed: modelCallElapsed, // model call timing
ScreenshotElapsed: screenshotElapsed, // screenshot timing ScreenshotElapsed: screenResult.Elapsed, // screenshot timing
ImagePath: screenResult.ImagePath, // screenshot path ImagePath: screenResult.ImagePath, // screenshot path
Resolution: &screenResult.Resolution, // screen resolution Resolution: &screenResult.Resolution, // screen resolution
QueryResult: result, // query-specific result QueryResult: result, // query-specific result
@@ -422,35 +413,28 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
return nil, errors.New("LLM service is not initialized") return nil, errors.New("LLM service is not initialized")
} }
// Step 1: Take screenshot and measure time // Step 1: Take screenshot and convert to base64
screenshotStartTime := time.Now() screenResult, err := dExt.GetScreenResult(
screenResult, err := dExt.createScreenshotWithSession( option.WithScreenShotFileName("ai_assert"),
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")), option.WithScreenShotBase64(true),
) )
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil { if err != nil {
return nil, err return nil, err
} }
assertResult := &AIExecutionResult{ assertResult := &AIExecutionResult{
Type: "assert", Type: "assert",
ScreenshotElapsed: screenshotElapsed, ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath, ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution, Resolution: &screenResult.Resolution,
} }
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
assertResult.Error = err.Error()
return assertResult, err
}
// Step 2: Call model and measure time // Step 2: Call model and measure time
modelCallStartTime := time.Now() modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{ assertOpts := &ai.AssertOptions{
Assertion: assertion, Assertion: assertion,
Screenshot: screenShotBase64, Screenshot: screenResult.Base64,
Size: size, Size: screenResult.Resolution,
} }
result, err := dExt.LLMService.Assert(context.Background(), assertOpts) result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds() assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()

View File

@@ -27,6 +27,7 @@ import (
"github.com/httprunner/httprunner/v5/uixt/types" "github.com/httprunner/httprunner/v5/uixt/types"
) )
// ScreenResult represents the result of taking a screenshot, including image path, recognition results, and metadata
type ScreenResult struct { type ScreenResult struct {
bufSource *bytes.Buffer // raw image buffer bytes bufSource *bytes.Buffer // raw image buffer bytes
ImagePath string `json:"image_path"` // image file path ImagePath string `json:"image_path"` // image file path
@@ -36,6 +37,8 @@ type ScreenResult struct {
Icons ai.UIResultMap `json:"icons"` // CV 识别的图标 Icons ai.UIResultMap `json:"icons"` // CV 识别的图标
Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"] Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"]
Popup *PopupInfo `json:"popup,omitempty"` Popup *PopupInfo `json:"popup,omitempty"`
Elapsed int64 `json:"elapsed_ms,omitempty"` // screenshot elapsed time in milliseconds
Base64 string `json:"-"` // base64 encoded screenshot
} }
func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts { func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
@@ -49,26 +52,11 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
}) })
} }
// GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size // GetScreenResult takes a screenshot and returns the ScreenResult with metadata
// Also saves the screenshot to session for report display func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) { // Take screenshot and measure time
// Create screenshot with session saving, minimal CV processing for AI operations screenshotStartTime := time.Now()
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName("screenshot_base64"),
)
if err != nil {
return "", types.Size{}, err
}
// convert buffer to base64 string
screenShotBase64 := "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
return screenShotBase64, screenResult.Resolution, nil
}
// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session
func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// get compressed screenshot buffer // get compressed screenshot buffer
compressBufSource, err := getScreenShotBuffer(dExt.IDriver) compressBufSource, err := getScreenShotBuffer(dExt.IDriver)
if err != nil { if err != nil {
@@ -146,6 +134,13 @@ func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (
session := dExt.GetSession() session := dExt.GetSession()
session.screenResults = append(session.screenResults, screenResult) session.screenResults = append(session.screenResults, screenResult)
// Convert screenshot buffer to base64 string
if screenshotOptions.ScreenShotWithBase64 {
screenResult.Base64 = "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
}
screenResult.Elapsed = time.Since(screenshotStartTime).Milliseconds()
logger.Msg("log screenshot") logger.Msg("log screenshot")
return screenResult, nil return screenResult, nil
} }
@@ -161,13 +156,7 @@ func needsCVProcessing(options *option.ActionOptions) bool {
options.ScreenShotWithOCRCluster != "" options.ScreenShotWithOCRCluster != ""
} }
// GetScreenResult takes a screenshot, returns the image recognition result // GetScreenTexts takes a screenshot, returns the OCR recognition result
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// Enable OCR processing for GetScreenResult
opts = append(opts, option.WithScreenShotOCR(true))
return dExt.createScreenshotWithSession(opts...)
}
func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) { func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) {
options := option.NewActionOptions(opts...) options := option.NewActionOptions(opts...)
if options.ScreenShotFileName == "" { if options.ScreenShotFileName == "" {
@@ -467,17 +456,18 @@ func MarkUIOperation(driver IDriver, actionType option.ActionName, actionCoordin
timestamp := builtin.GenNameWithTimestamp("%d") timestamp := builtin.GenNameWithTimestamp("%d")
imagePath := filepath.Join( imagePath := filepath.Join(
config.GetConfig().ScreenShotsPath(), config.GetConfig().ScreenShotsPath(),
fmt.Sprintf("action_%s_pre_%s.png", timestamp, actionType), fmt.Sprintf("%s_pre_mark_%s.png", timestamp, actionType),
) )
if actionType == option.ACTION_TapAbsXY || actionType == option.ACTION_DoubleTapXY { switch actionType {
case option.ACTION_TapAbsXY, option.ACTION_DoubleTapXY:
if len(actionCoordinates) != 2 { if len(actionCoordinates) != 2 {
return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates) return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates)
} }
x, y := actionCoordinates[0], actionCoordinates[1] x, y := actionCoordinates[0], actionCoordinates[1]
point := image.Point{X: int(x), Y: int(y)} point := image.Point{X: int(x), Y: int(y)}
err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath) err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath)
} else if actionType == option.ACTION_SwipeDirection || actionType == option.ACTION_SwipeCoordinate || actionType == option.ACTION_Drag { case option.ACTION_SwipeDirection, option.ACTION_SwipeCoordinate, option.ACTION_Drag:
if len(actionCoordinates) != 4 { if len(actionCoordinates) != 4 {
return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates) return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates)
} }

View File

@@ -6,16 +6,17 @@ import (
"path/filepath" "path/filepath"
"time" "time"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/internal/builtin" "github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/internal/config" "github.com/httprunner/httprunner/v5/internal/config"
"github.com/httprunner/httprunner/v5/uixt/ai" "github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option" "github.com/httprunner/httprunner/v5/uixt/option"
"github.com/rs/zerolog/log"
) )
func preHandler_TapAbsXY(driver IDriver, options *option.ActionOptions, rawX, rawY float64) ( func preHandler_TapAbsXY(driver IDriver, options *option.ActionOptions, rawX, rawY float64) (
x, y float64, err error) { x, y float64, err error,
) {
// Call MCP action tool if anti-risk is enabled // Call MCP action tool if anti-risk is enabled
if options.AntiRisk { if options.AntiRisk {
arguments := getAntiRisk_SetTouchInfoList_Arguments(driver, []ai.PointF{ arguments := getAntiRisk_SetTouchInfoList_Arguments(driver, []ai.PointF{
@@ -40,8 +41,8 @@ func preHandler_TapAbsXY(driver IDriver, options *option.ActionOptions, rawX, ra
} }
func preHandler_DoubleTap(driver IDriver, options *option.ActionOptions, rawX, rawY float64) ( func preHandler_DoubleTap(driver IDriver, options *option.ActionOptions, rawX, rawY float64) (
x, y float64, err error) { x, y float64, err error,
) {
x, y, err = convertToAbsolutePoint(driver, rawX, rawY) x, y, err = convertToAbsolutePoint(driver, rawX, rawY)
if err != nil { if err != nil {
return 0, 0, err return 0, 0, err
@@ -60,8 +61,8 @@ func preHandler_DoubleTap(driver IDriver, options *option.ActionOptions, rawX, r
} }
func preHandler_Drag(driver IDriver, options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) ( func preHandler_Drag(driver IDriver, options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) (
fromX, fromY, toX, toY float64, err error) { fromX, fromY, toX, toY float64, err error,
) {
fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY) fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY)
if err != nil { if err != nil {
return 0, 0, 0, 0, err return 0, 0, 0, 0, err
@@ -92,8 +93,8 @@ func preHandler_Drag(driver IDriver, options *option.ActionOptions, rawFomX, raw
func preHandler_Swipe(driver IDriver, actionType option.ActionName, func preHandler_Swipe(driver IDriver, actionType option.ActionName,
options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) ( options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) (
fromX, fromY, toX, toY float64, err error) { fromX, fromY, toX, toY float64, err error,
) {
fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY) fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY)
if err != nil { if err != nil {
return 0, 0, 0, 0, err return 0, 0, 0, 0, err
@@ -142,7 +143,7 @@ func postHandler(driver IDriver, actionType option.ActionName, options *option.A
timestamp := builtin.GenNameWithTimestamp("%d") timestamp := builtin.GenNameWithTimestamp("%d")
imagePath := filepath.Join( imagePath := filepath.Join(
config.GetConfig().ScreenShotsPath(), config.GetConfig().ScreenShotsPath(),
fmt.Sprintf("action_%s_post_%s.png", timestamp, actionType), fmt.Sprintf("%s_post_mark_%s.png", timestamp, actionType),
) )
go func() { go func() {
@@ -157,7 +158,8 @@ func postHandler(driver IDriver, actionType option.ActionName, options *option.A
// callMCPActionTool calls MCP tool for the given action // callMCPActionTool calls MCP tool for the given action
func callMCPActionTool(driver IDriver, func callMCPActionTool(driver IDriver,
serverName, actionType string, arguments map[string]any) { serverName, actionType string, arguments map[string]any,
) {
// Get XTDriver from cache // Get XTDriver from cache
dExt := getXTDriverFromCache(driver) dExt := getXTDriverFromCache(driver)
if dExt == nil { if dExt == nil {

View File

@@ -4,10 +4,11 @@ import (
"context" "context"
"fmt" "fmt"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server" "github.com/mark3labs/mcp-go/server"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/uixt/option"
) )
// ToolScreenShot implements the screenshot tool call. // ToolScreenShot implements the screenshot tool call.
@@ -34,14 +35,17 @@ func (t *ToolScreenShot) Implement() server.ToolHandlerFunc {
if err != nil { if err != nil {
return nil, err return nil, err
} }
bufferBase64, _, err := driverExt.GetScreenshotBase64WithSize() screenResult, err := driverExt.GetScreenResult(
option.WithScreenShotFileName("tool_screenshot"),
option.WithScreenShotBase64(true),
)
if err != nil { if err != nil {
log.Error().Err(err).Msg("ScreenShot failed") log.Error().Err(err).Msg("ScreenShot failed")
return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil
} }
log.Debug().Int("imageBytes", len(bufferBase64)).Msg("take screenshot success") log.Debug().Int("imageBytes", len(screenResult.Base64)).Msg("take screenshot success")
return mcp.NewToolResultImage("screenshot", bufferBase64, "image/jpeg"), nil return mcp.NewToolResultImage("screenshot", screenResult.Base64, "image/jpeg"), nil
} }
} }

View File

@@ -16,6 +16,7 @@ type ScreenShotOptions struct {
ScreenShotWithUpload bool `json:"screenshot_with_upload,omitempty" yaml:"screenshot_with_upload,omitempty"` ScreenShotWithUpload bool `json:"screenshot_with_upload,omitempty" yaml:"screenshot_with_upload,omitempty"`
ScreenShotWithLiveType bool `json:"screenshot_with_live_type,omitempty" yaml:"screenshot_with_live_type,omitempty"` ScreenShotWithLiveType bool `json:"screenshot_with_live_type,omitempty" yaml:"screenshot_with_live_type,omitempty"`
ScreenShotWithLivePopularity bool `json:"screenshot_with_live_popularity,omitempty" yaml:"screenshot_with_live_popularity,omitempty"` ScreenShotWithLivePopularity bool `json:"screenshot_with_live_popularity,omitempty" yaml:"screenshot_with_live_popularity,omitempty"`
ScreenShotWithBase64 bool `json:"screenshot_with_base64,omitempty" yaml:"screenshot_with_base64,omitempty"`
ScreenShotWithUITypes []string `json:"screenshot_with_ui_types,omitempty" yaml:"screenshot_with_ui_types,omitempty"` ScreenShotWithUITypes []string `json:"screenshot_with_ui_types,omitempty" yaml:"screenshot_with_ui_types,omitempty"`
ScreenShotWithClosePopups bool `json:"screenshot_with_close_popups,omitempty" yaml:"screenshot_with_close_popups,omitempty"` ScreenShotWithClosePopups bool `json:"screenshot_with_close_popups,omitempty" yaml:"screenshot_with_close_popups,omitempty"`
ScreenShotWithOCRCluster string `json:"screenshot_with_ocr_cluster,omitempty" yaml:"screenshot_with_ocr_cluster,omitempty"` ScreenShotWithOCRCluster string `json:"screenshot_with_ocr_cluster,omitempty" yaml:"screenshot_with_ocr_cluster,omitempty"`
@@ -53,6 +54,9 @@ func (o *ScreenShotOptions) GetScreenShotOptions() []ActionOption {
if o.ScreenShotFileName != "" { if o.ScreenShotFileName != "" {
options = append(options, WithScreenShotFileName(o.ScreenShotFileName)) options = append(options, WithScreenShotFileName(o.ScreenShotFileName))
} }
if o.ScreenShotWithBase64 {
options = append(options, WithScreenShotBase64(true))
}
return options return options
} }
@@ -129,6 +133,12 @@ func WithScreenShotFileName(fileName string) ActionOption {
} }
} }
func WithScreenShotBase64(base64 bool) ActionOption {
return func(o *ActionOptions) {
o.ScreenShotWithBase64 = base64
}
}
type ScreenRecordOptions struct { type ScreenRecordOptions struct {
ScreenRecordDuration float64 `json:"screenrecord_duration,omitempty" yaml:"screenrecord_duration,omitempty"` ScreenRecordDuration float64 `json:"screenrecord_duration,omitempty" yaml:"screenrecord_duration,omitempty"`
ScreenRecordWithAudio bool `json:"screenrecord_with_audio,omitempty" yaml:"screenrecord_with_audio,omitempty"` ScreenRecordWithAudio bool `json:"screenrecord_with_audio,omitempty" yaml:"screenrecord_with_audio,omitempty"`