Merge branch 'dev' into 'master'

优化 html report 截图展示,支持展示标注截图

See merge request iesqa/httprunner!110
This commit is contained in:
李隆
2025-06-29 16:23:19 +00:00
10 changed files with 280 additions and 146 deletions

2
go.mod
View File

@@ -7,6 +7,7 @@ toolchain go1.23.7
require (
github.com/Masterminds/semver v1.5.0
github.com/andybalholm/brotli v1.0.4
github.com/antchfx/xmlquery v1.4.4
github.com/bytedance/sonic v1.13.2
github.com/charmbracelet/glamour v0.8.0
github.com/charmbracelet/huh v0.3.0
@@ -47,7 +48,6 @@ require (
require (
github.com/alecthomas/chroma/v2 v2.14.0 // indirect
github.com/antchfx/xmlquery v1.4.4 // indirect
github.com/antchfx/xpath v1.3.3 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect

View File

@@ -1 +1 @@
v5.0.0-250628
v5.0.0-250630

168
report.go
View File

@@ -1212,7 +1212,7 @@ const htmlTemplate = `<!DOCTYPE html>
.screenshot-item-compact .screenshot-image img {
width: 100%;
height: auto;
max-height: 400px;
max-height: 500px;
border-radius: 4px;
cursor: pointer;
transition: transform 0.2s;
@@ -1232,6 +1232,113 @@ const htmlTemplate = `<!DOCTYPE html>
transform: scale(1.02);
}
/* Horizontal scrolling screenshot styles */
.screenshot-horizontal-scroll {
display: flex;
gap: 0 !important;
overflow-x: auto;
overflow-y: hidden;
padding: 8px;
scroll-behavior: smooth;
-webkit-overflow-scrolling: touch;
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
border: 1px solid #dee2e6;
border-radius: 6px;
align-items: center;
justify-content: center;
line-height: 0;
font-size: 0;
}
.screenshot-horizontal-scroll::-webkit-scrollbar {
height: 8px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-track {
background: #f1f1f1;
border-radius: 4px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-thumb {
background: #888;
border-radius: 4px;
}
.screenshot-horizontal-scroll::-webkit-scrollbar-thumb:hover {
background: #555;
}
.screenshot-item-horizontal {
flex: 0 0 auto;
min-width: 180px;
max-width: 280px;
text-align: center;
margin: 0 !important;
padding: 0 !important;
border: none !important;
outline: none;
line-height: 0;
}
.screenshot-item-horizontal .screenshot-image {
padding: 0;
margin: 0;
background: transparent;
border-radius: 0;
display: flex;
justify-content: center;
align-items: center;
position: relative;
overflow: hidden;
height: 350px;
border: none;
}
.screenshot-item-horizontal .screenshot-image img {
max-width: 100%;
max-height: 100%;
border-radius: 0;
cursor: pointer;
transition: transform 0.2s;
object-fit: contain;
box-shadow: none;
display: block;
margin: 0 !important;
padding: 0 !important;
border: none !important;
vertical-align: top;
float: left;
outline: none;
}
.screenshot-item-horizontal .screenshot-image img:hover {
transform: scale(1.05);
}
/* Direct inline screenshot styles */
.screenshot-inline {
max-height: 350px;
object-fit: contain;
cursor: pointer;
transition: transform 0.2s;
display: inline-block;
margin: 0 4px 0 0 !important;
padding: 0 !important;
border: none !important;
border-radius: 0 !important;
box-shadow: none !important;
vertical-align: top;
outline: none;
}
.screenshot-inline:last-child {
margin-right: 0 !important;
}
.screenshot-inline:hover {
transform: scale(1.05);
}
.actions-details {
padding: 12px;
max-height: 300px;
@@ -1538,12 +1645,12 @@ const htmlTemplate = `<!DOCTYPE html>
}
.screenshots-horizontal .screenshot-image {
min-height: 200px;
min-height: 300px;
padding: 10px 0;
}
.screenshots-horizontal .screenshot-image img {
max-height: 250px;
max-height: 400px;
width: auto;
}
@@ -1592,7 +1699,7 @@ const htmlTemplate = `<!DOCTYPE html>
display: flex;
justify-content: center;
align-items: center;
min-height: 300px;
min-height: 400px;
padding: 20px 0;
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
border-radius: 8px;
@@ -1601,7 +1708,7 @@ const htmlTemplate = `<!DOCTYPE html>
.screenshot-image img {
max-width: 100%;
max-height: 400px;
max-height: 600px;
border-radius: 6px;
cursor: pointer;
transition: transform 0.2s;
@@ -1614,12 +1721,12 @@ const htmlTemplate = `<!DOCTYPE html>
}
.screenshot-item.small .screenshot-image {
min-height: 250px;
min-height: 300px;
padding: 15px 0;
}
.screenshot-item.small .screenshot-image img {
max-height: 200px;
max-height: 350px;
}
.validator-item {
@@ -2286,21 +2393,21 @@ const htmlTemplate = `<!DOCTYPE html>
}
.screenshot-image {
min-height: 250px;
min-height: 300px;
padding: 15px 0;
}
.screenshot-image img {
max-height: 250px;
max-height: 400px;
}
.screenshot-item.small .screenshot-image {
min-height: 200px;
min-height: 250px;
padding: 10px 0;
}
.screenshot-item.small .screenshot-image img {
max-height: 150px;
max-height: 300px;
}
.log-header {
@@ -2539,22 +2646,33 @@ const htmlTemplate = `<!DOCTYPE html>
<div class="planning-column-screenshot">
<div class="planning-step-compact">
<div class="step-header-compact">
<span class="step-name">📸 Take Screenshot</span>
<span class="step-name">📸 ScreenShots</span>
<span class="duration">{{formatDuration $planning.ScreenshotElapsed}}</span>
</div>
{{if $planning.ScreenResult}}
<div class="screenshot-display">
{{$screenshot := $planning.ScreenResult}}
{{$base64Image := encodeImageBase64 $screenshot.ImagePath}}
{{if $base64Image}}
<div class="screenshot-item-compact">
<div class="screenshot-image">
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Planning Screenshot" onclick="openImageModal(this.src)" />
</div>
</div>
<div class="screenshot-display screenshot-horizontal-scroll">
{{if $planning.ScreenResult}}
{{if $planning.ScreenResult.ImagePath}}
{{$base64Image := encodeImageBase64 $planning.ScreenResult.ImagePath}}
{{if $base64Image}}
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Planning Screenshot" onclick="openImageModal(this.src)" class="screenshot-inline" />
{{end}}
{{end}}
{{end}}
{{if $planning.SubActions}}
{{range $subAction := $planning.SubActions}}
{{if $subAction.ScreenResults}}
{{range $subScreenshot := $subAction.ScreenResults}}
{{if $subScreenshot.ImagePath}}
{{$base64Image := encodeImageBase64 $subScreenshot.ImagePath}}
{{if $base64Image}}
<img src="data:image/jpeg;base64,{{$base64Image}}" alt="Sub-action Screenshot" onclick="openImageModal(this.src)" class="screenshot-inline" />
{{end}}
{{end}}
{{end}}
{{end}}
{{end}}
{{end}}
</div>
{{end}}
</div>
</div>
@@ -2837,13 +2955,13 @@ const htmlTemplate = `<!DOCTYPE html>
</div>
{{end}}
<!-- Screenshots -->
<!-- ScreenShots -->
{{if $step.Attachments}}
{{$attachments := $step.Attachments}}
{{if eq (printf "%T" $attachments) "map[string]interface {}"}}
{{if index $attachments "screen_results"}}
<div class="screenshots-section">
<h4>Screenshots</h4>
<h4>Attachment ScreenShots</h4>
<div class="screenshots-horizontal">
{{range $screenshot := index $attachments "screen_results"}}
{{$imagePath := ""}}

View File

@@ -622,9 +622,13 @@ func (ad *ADBDriver) tapByTextUsingHierarchy(hierarchy *Hierarchy, text string,
func (ud *ADBDriver) TapByXpath(xpath string, opts ...option.ActionOption) (err error) {
source, err := ud.Source()
if err != nil {
log.Error().Err(err).Msg("failed to get source")
return err
}
doc, err := xmlquery.Parse(strings.NewReader(source))
if err != nil {
log.Error().Err(err).Str("serial", ud.Device.Serial())
log.Error().Err(err).Msg("failed to parse source")
return err
}
targetNodes := xmlquery.Find(doc, xpath)
@@ -644,10 +648,12 @@ func (ud *ADBDriver) TapByXpath(xpath string, opts ...option.ActionOption) (err
centerX := float64(x1+x2) / 2
centerY := float64(y1+y2) / 2
log.Info().Str("serial", ud.Device.Serial()).Str("xpath", xpath).Str("bounds", bounds).Msg("find node by xpath success")
log.Info().Str("xpath", xpath).Str("bounds", bounds).Msg("find node by xpath success")
return ud.TapAbsXY(centerX, centerY, opts...)
}
return
log.Error().Str("xpath", xpath).Msg("failed to find node by xpath")
return errors.New("failed to find node by xpath")
}
func (ad *ADBDriver) searchNodes(nodes []Layout, text string, opts ...option.ActionOption) []Bounds {
@@ -756,27 +762,44 @@ func (ad *ADBDriver) GetSession() *DriverSession {
}
func (ad *ADBDriver) ForegroundInfo() (app types.AppInfo, err error) {
packageInfo, err := ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite", "app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null")
// Get foreground app package info using evalite service
packageInfo, err := ad.getForegroundPackageInfo()
if err != nil {
packageInfo, err = ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite", "app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null")
if err != nil {
log.Error().Err(err).Str("serial", ad.Device.Serial()).Msg("failed to get foreground app")
return app, err
}
log.Error().Err(err).Msg("failed to get foreground app info")
return app, err
}
log.Info().Str("serial", ad.Device.Serial()).Msg("foreground app output: " + packageInfo)
if strings.TrimSpace(packageInfo) == "" {
log.Error().Str("serial", ad.Device.Serial()).Msg("foreground app output is empty")
return app, errors.New("foreground app output is empty")
// Parse package info JSON
packageInfo = strings.TrimSpace(packageInfo)
if packageInfo == "" {
err = errors.New("foreground app output is empty")
log.Error().Err(err).Msg("get foreground app info failed")
return app, err
}
err = json.Unmarshal([]byte(strings.TrimSpace(packageInfo)), &app)
if err != nil {
log.Error().Err(err).Str("serial", ad.Device.Serial()).Str("packageInfo", packageInfo).Msg("failed to parse package info")
if err = json.Unmarshal([]byte(packageInfo), &app); err != nil {
log.Error().Err(err).Str("packageInfo", packageInfo).Msg("failed to parse package info")
return app, err
}
return app, nil
}
// getForegroundPackageInfo executes the evalite service command to get foreground app info
func (ad *ADBDriver) getForegroundPackageInfo() (string, error) {
const maxRetries = 2
var lastErr error
for i := 0; i < maxRetries; i++ {
packageInfo, err := ad.runShellCommand("CLASSPATH=/data/local/tmp/evalite",
"app_process", "/", "com.bytedance.iesqa.eval_process.PackageService", "2>/dev/null")
if err == nil {
return packageInfo, nil
}
lastErr = err
log.Warn().Err(err).Int("attempt", i+1).Msg("failed to get foreground package info, retrying")
}
return "", lastErr
}
func (ad *ADBDriver) SetIme(imeRegx string) error {
log.Info().Str("imeRegx", imeRegx).Msg("ADBDriver.SetIme")
imeList := ad.ListIme()

View File

@@ -6,8 +6,11 @@ import (
"strings"
"sync"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/uixt/option"
)
// CacheManager provides a generic cache management interface
@@ -153,7 +156,7 @@ func (cm *CacheManager[T]) GetOrCreate(key string, factory func() (T, map[string
item, metadata, err := factory()
if err != nil {
var zero T
return zero, fmt.Errorf("failed to create item: %w", err)
return zero, err
}
// Store in cache
@@ -271,7 +274,7 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
browserOpts := config.DeviceOpts.ToBrowserOptions().Options()
device, err = NewBrowserDevice(browserOpts...)
default:
return nil, fmt.Errorf("unsupported platform: %s", platform)
return nil, errors.Wrapf(code.InvalidParamError, "unsupported platform: %s", platform)
}
} else {
// Use default options, let NewXXDevice handle serial (empty or specified)
@@ -301,17 +304,17 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
device, err = NewBrowserDevice()
}
default:
return nil, fmt.Errorf("unsupported platform: %s", platform)
return nil, errors.Wrapf(code.InvalidParamError, "unsupported platform: %s", platform)
}
}
if err != nil {
return nil, fmt.Errorf("failed to create device: %w", err)
return nil, err
}
// Create driver
driver, err := device.NewDriver()
if err != nil {
return nil, fmt.Errorf("failed to create driver: %w", err)
return nil, errors.Wrap(err, "failed to create driver")
}
// Create XTDriver with AI options
@@ -326,7 +329,7 @@ func createXTDriverWithConfig(config DriverCacheConfig) (*XTDriver, error) {
driverExt, err := NewXTDriver(driver, aiOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create XTDriver: %w", err)
return nil, errors.Wrap(err, "failed to create XTDriver")
}
return driverExt, nil
}

View File

@@ -10,7 +10,6 @@ import (
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/code"
"github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/internal/json"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
@@ -115,7 +114,7 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
}()
// Execute the tool call
if err := dExt.invokeToolCall(ctx, toolCall); err != nil {
if err := dExt.invokeToolCall(ctx, toolCall, opts...); err != nil {
subActionResult.Error = err
return err
}
@@ -136,6 +135,9 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
if options.MaxRetryTimes > 0 && attempt > options.MaxRetryTimes {
return allPlannings, errors.New("reached max retry times")
}
// wait 3 seconds for tool calls to complete
time.Sleep(3 * time.Second)
}
}
@@ -143,12 +145,11 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
log.Info().Str("prompt", prompt).Msg("performing AI action")
// Step 1: Take screenshot and measure time
screenshotStartTime := time.Now()
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_action"),
option.WithScreenShotBase64(true),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
@@ -160,7 +161,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
aiExecutionResult := &AIExecutionResult{
Type: "action",
ModelCallElapsed: modelCallElapsed,
ScreenshotElapsed: screenshotElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
PlanningResult: &planningResult.PlanningResult,
@@ -173,7 +174,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
// Step 3: Execute tool calls
for _, toolCall := range planningResult.ToolCalls {
err = dExt.invokeToolCall(ctx, toolCall)
err = dExt.invokeToolCall(ctx, toolCall, opts...)
if err != nil {
aiExecutionResult.Error = err.Error()
return aiExecutionResult, errors.Wrap(err, "invoke tool call failed")
@@ -193,13 +194,11 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
options := option.NewActionOptions(opts...)
resetHistory := options.ResetHistory
// Step 1: Take screenshot
screenshotStartTime := time.Now()
// Use GetScreenResult to handle screenshot capture, save, and session tracking
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_planning"),
option.WithScreenShotBase64(true),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
@@ -208,12 +207,6 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
// The planning screenshot is already stored in planningResult.ScreenResult
dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions
// get screen shot buffer base64 and size
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
}
// Step 2: Call model
modelCallStartTime := time.Now()
planningOpts := &ai.PlanningOptions{
@@ -224,12 +217,12 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: screenShotBase64,
URL: screenResult.Base64,
},
},
},
},
Size: size,
Size: screenResult.Resolution,
ResetHistory: resetHistory,
}
@@ -250,7 +243,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
planningResult := &PlanningExecutionResult{
PlanningResult: *result, // Inherit all fields from ai.PlanningResult
// Planning process timing and metadata
ScreenshotElapsed: screenshotElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
ScreenResult: screenResult,
@@ -286,7 +279,7 @@ func (dExt *XTDriver) isTaskFinished(planningResult *PlanningExecutionResult) bo
}
// invokeToolCall invokes the tool call
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall) error {
func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCall, opts ...option.ActionOption) error {
// Parse arguments
arguments := make(map[string]interface{})
err := json.Unmarshal([]byte(toolCall.Function.Arguments), &arguments)
@@ -294,6 +287,10 @@ func (dExt *XTDriver) invokeToolCall(ctx context.Context, toolCall schema.ToolCa
return err
}
// Merge StartToGoal options into tool call arguments
// This ensures options like PreMarkOperation are passed to specific tool implementations
extractActionOptionsToArguments(opts, arguments)
// Execute the action
req := mcp.CallToolRequest{
Params: struct {
@@ -370,17 +367,11 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
return nil, errors.New("LLM service is not initialized")
}
// Step 1: Take screenshot and measure time
screenshotStartTime := time.Now()
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_query"),
option.WithScreenShotBase64(true),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
return nil, err
}
@@ -394,8 +385,8 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
// execute query
queryOpts := &ai.QueryOptions{
Query: text,
Screenshot: screenShotBase64,
Size: size,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
OutputSchema: actionOptions.OutputSchema,
}
result, err := dExt.LLMService.Query(context.Background(), queryOpts)
@@ -408,7 +399,7 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
aiResult := &AIExecutionResult{
Type: "query",
ModelCallElapsed: modelCallElapsed, // model call timing
ScreenshotElapsed: screenshotElapsed, // screenshot timing
ScreenshotElapsed: screenResult.Elapsed, // screenshot timing
ImagePath: screenResult.ImagePath, // screenshot path
Resolution: &screenResult.Resolution, // screen resolution
QueryResult: result, // query-specific result
@@ -422,35 +413,28 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
return nil, errors.New("LLM service is not initialized")
}
// Step 1: Take screenshot and measure time
screenshotStartTime := time.Now()
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
// Step 1: Take screenshot and convert to base64
screenResult, err := dExt.GetScreenResult(
option.WithScreenShotFileName("ai_assert"),
option.WithScreenShotBase64(true),
)
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
if err != nil {
return nil, err
}
assertResult := &AIExecutionResult{
Type: "assert",
ScreenshotElapsed: screenshotElapsed,
ScreenshotElapsed: screenResult.Elapsed,
ImagePath: screenResult.ImagePath,
Resolution: &screenResult.Resolution,
}
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
if err != nil {
assertResult.Error = err.Error()
return assertResult, err
}
// Step 2: Call model and measure time
modelCallStartTime := time.Now()
assertOpts := &ai.AssertOptions{
Assertion: assertion,
Screenshot: screenShotBase64,
Size: size,
Screenshot: screenResult.Base64,
Size: screenResult.Resolution,
}
result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()

View File

@@ -27,6 +27,7 @@ import (
"github.com/httprunner/httprunner/v5/uixt/types"
)
// ScreenResult represents the result of taking a screenshot, including image path, recognition results, and metadata
type ScreenResult struct {
bufSource *bytes.Buffer // raw image buffer bytes
ImagePath string `json:"image_path"` // image file path
@@ -36,6 +37,8 @@ type ScreenResult struct {
Icons ai.UIResultMap `json:"icons"` // CV 识别的图标
Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"]
Popup *PopupInfo `json:"popup,omitempty"`
Elapsed int64 `json:"elapsed_ms,omitempty"` // screenshot elapsed time in milliseconds
Base64 string `json:"-"` // base64 encoded screenshot
}
func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
@@ -49,26 +52,11 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
})
}
// GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size
// Also saves the screenshot to session for report display
func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) {
// Create screenshot with session saving, minimal CV processing for AI operations
screenResult, err := dExt.createScreenshotWithSession(
option.WithScreenShotFileName("screenshot_base64"),
)
if err != nil {
return "", types.Size{}, err
}
// GetScreenResult takes a screenshot and returns the ScreenResult with metadata
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// Take screenshot and measure time
screenshotStartTime := time.Now()
// convert buffer to base64 string
screenShotBase64 := "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
return screenShotBase64, screenResult.Resolution, nil
}
// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session
func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// get compressed screenshot buffer
compressBufSource, err := getScreenShotBuffer(dExt.IDriver)
if err != nil {
@@ -146,6 +134,13 @@ func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (
session := dExt.GetSession()
session.screenResults = append(session.screenResults, screenResult)
// Convert screenshot buffer to base64 string
if screenshotOptions.ScreenShotWithBase64 {
screenResult.Base64 = "data:image/jpeg;base64," +
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
}
screenResult.Elapsed = time.Since(screenshotStartTime).Milliseconds()
logger.Msg("log screenshot")
return screenResult, nil
}
@@ -161,13 +156,7 @@ func needsCVProcessing(options *option.ActionOptions) bool {
options.ScreenShotWithOCRCluster != ""
}
// GetScreenResult takes a screenshot, returns the image recognition result
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
// Enable OCR processing for GetScreenResult
opts = append(opts, option.WithScreenShotOCR(true))
return dExt.createScreenshotWithSession(opts...)
}
// GetScreenTexts takes a screenshot, returns the OCR recognition result
func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) {
options := option.NewActionOptions(opts...)
if options.ScreenShotFileName == "" {
@@ -467,17 +456,18 @@ func MarkUIOperation(driver IDriver, actionType option.ActionName, actionCoordin
timestamp := builtin.GenNameWithTimestamp("%d")
imagePath := filepath.Join(
config.GetConfig().ScreenShotsPath(),
fmt.Sprintf("action_%s_pre_%s.png", timestamp, actionType),
fmt.Sprintf("%s_pre_mark_%s.png", timestamp, actionType),
)
if actionType == option.ACTION_TapAbsXY || actionType == option.ACTION_DoubleTapXY {
switch actionType {
case option.ACTION_TapAbsXY, option.ACTION_DoubleTapXY:
if len(actionCoordinates) != 2 {
return fmt.Errorf("invalid tap action coordinates: %v", actionCoordinates)
}
x, y := actionCoordinates[0], actionCoordinates[1]
point := image.Point{X: int(x), Y: int(y)}
err = SaveImageWithCircleMarker(compressedBufSource, point, imagePath)
} else if actionType == option.ACTION_SwipeDirection || actionType == option.ACTION_SwipeCoordinate || actionType == option.ACTION_Drag {
case option.ACTION_SwipeDirection, option.ACTION_SwipeCoordinate, option.ACTION_Drag:
if len(actionCoordinates) != 4 {
return fmt.Errorf("invalid swipe action coordinates: %v", actionCoordinates)
}

View File

@@ -6,16 +6,17 @@ import (
"path/filepath"
"time"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/internal/builtin"
"github.com/httprunner/httprunner/v5/internal/config"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/rs/zerolog/log"
)
func preHandler_TapAbsXY(driver IDriver, options *option.ActionOptions, rawX, rawY float64) (
x, y float64, err error) {
x, y float64, err error,
) {
// Call MCP action tool if anti-risk is enabled
if options.AntiRisk {
arguments := getAntiRisk_SetTouchInfoList_Arguments(driver, []ai.PointF{
@@ -40,8 +41,8 @@ func preHandler_TapAbsXY(driver IDriver, options *option.ActionOptions, rawX, ra
}
func preHandler_DoubleTap(driver IDriver, options *option.ActionOptions, rawX, rawY float64) (
x, y float64, err error) {
x, y float64, err error,
) {
x, y, err = convertToAbsolutePoint(driver, rawX, rawY)
if err != nil {
return 0, 0, err
@@ -60,8 +61,8 @@ func preHandler_DoubleTap(driver IDriver, options *option.ActionOptions, rawX, r
}
func preHandler_Drag(driver IDriver, options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) (
fromX, fromY, toX, toY float64, err error) {
fromX, fromY, toX, toY float64, err error,
) {
fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY)
if err != nil {
return 0, 0, 0, 0, err
@@ -92,8 +93,8 @@ func preHandler_Drag(driver IDriver, options *option.ActionOptions, rawFomX, raw
func preHandler_Swipe(driver IDriver, actionType option.ActionName,
options *option.ActionOptions, rawFomX, rawFromY, rawToX, rawToY float64) (
fromX, fromY, toX, toY float64, err error) {
fromX, fromY, toX, toY float64, err error,
) {
fromX, fromY, toX, toY, err = convertToAbsoluteCoordinates(driver, rawFomX, rawFromY, rawToX, rawToY)
if err != nil {
return 0, 0, 0, 0, err
@@ -142,7 +143,7 @@ func postHandler(driver IDriver, actionType option.ActionName, options *option.A
timestamp := builtin.GenNameWithTimestamp("%d")
imagePath := filepath.Join(
config.GetConfig().ScreenShotsPath(),
fmt.Sprintf("action_%s_post_%s.png", timestamp, actionType),
fmt.Sprintf("%s_post_mark_%s.png", timestamp, actionType),
)
go func() {
@@ -157,7 +158,8 @@ func postHandler(driver IDriver, actionType option.ActionName, options *option.A
// callMCPActionTool calls MCP tool for the given action
func callMCPActionTool(driver IDriver,
serverName, actionType string, arguments map[string]any) {
serverName, actionType string, arguments map[string]any,
) {
// Get XTDriver from cache
dExt := getXTDriverFromCache(driver)
if dExt == nil {

View File

@@ -4,10 +4,11 @@ import (
"context"
"fmt"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/uixt/option"
)
// ToolScreenShot implements the screenshot tool call.
@@ -34,14 +35,17 @@ func (t *ToolScreenShot) Implement() server.ToolHandlerFunc {
if err != nil {
return nil, err
}
bufferBase64, _, err := driverExt.GetScreenshotBase64WithSize()
screenResult, err := driverExt.GetScreenResult(
option.WithScreenShotFileName("tool_screenshot"),
option.WithScreenShotBase64(true),
)
if err != nil {
log.Error().Err(err).Msg("ScreenShot failed")
return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil
}
log.Debug().Int("imageBytes", len(bufferBase64)).Msg("take screenshot success")
log.Debug().Int("imageBytes", len(screenResult.Base64)).Msg("take screenshot success")
return mcp.NewToolResultImage("screenshot", bufferBase64, "image/jpeg"), nil
return mcp.NewToolResultImage("screenshot", screenResult.Base64, "image/jpeg"), nil
}
}

View File

@@ -16,6 +16,7 @@ type ScreenShotOptions struct {
ScreenShotWithUpload bool `json:"screenshot_with_upload,omitempty" yaml:"screenshot_with_upload,omitempty"`
ScreenShotWithLiveType bool `json:"screenshot_with_live_type,omitempty" yaml:"screenshot_with_live_type,omitempty"`
ScreenShotWithLivePopularity bool `json:"screenshot_with_live_popularity,omitempty" yaml:"screenshot_with_live_popularity,omitempty"`
ScreenShotWithBase64 bool `json:"screenshot_with_base64,omitempty" yaml:"screenshot_with_base64,omitempty"`
ScreenShotWithUITypes []string `json:"screenshot_with_ui_types,omitempty" yaml:"screenshot_with_ui_types,omitempty"`
ScreenShotWithClosePopups bool `json:"screenshot_with_close_popups,omitempty" yaml:"screenshot_with_close_popups,omitempty"`
ScreenShotWithOCRCluster string `json:"screenshot_with_ocr_cluster,omitempty" yaml:"screenshot_with_ocr_cluster,omitempty"`
@@ -53,6 +54,9 @@ func (o *ScreenShotOptions) GetScreenShotOptions() []ActionOption {
if o.ScreenShotFileName != "" {
options = append(options, WithScreenShotFileName(o.ScreenShotFileName))
}
if o.ScreenShotWithBase64 {
options = append(options, WithScreenShotBase64(true))
}
return options
}
@@ -129,6 +133,12 @@ func WithScreenShotFileName(fileName string) ActionOption {
}
}
func WithScreenShotBase64(base64 bool) ActionOption {
return func(o *ActionOptions) {
o.ScreenShotWithBase64 = base64
}
}
type ScreenRecordOptions struct {
ScreenRecordDuration float64 `json:"screenrecord_duration,omitempty" yaml:"screenrecord_duration,omitempty"`
ScreenRecordWithAudio bool `json:"screenrecord_with_audio,omitempty" yaml:"screenrecord_with_audio,omitempty"`