mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-06 20:32:44 +08:00
refactor: enhance screenshot handling by introducing base64 encoding and updating related methods
This commit is contained in:
@@ -10,7 +10,6 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/code"
|
||||
"github.com/httprunner/httprunner/v5/internal/builtin"
|
||||
"github.com/httprunner/httprunner/v5/internal/json"
|
||||
"github.com/httprunner/httprunner/v5/uixt/ai"
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
@@ -143,12 +142,11 @@ func (dExt *XTDriver) StartToGoal(ctx context.Context, prompt string, opts ...op
|
||||
func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...option.ActionOption) (*AIExecutionResult, error) {
|
||||
log.Info().Str("prompt", prompt).Msg("performing AI action")
|
||||
|
||||
// Step 1: Take screenshot and measure time
|
||||
screenshotStartTime := time.Now()
|
||||
screenResult, err := dExt.createScreenshotWithSession(
|
||||
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
|
||||
// Step 1: Take screenshot and convert to base64
|
||||
screenResult, err := dExt.GetScreenResult(
|
||||
option.WithScreenShotFileName("ai_action"),
|
||||
option.WithScreenShotBase64(true),
|
||||
)
|
||||
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -160,7 +158,7 @@ func (dExt *XTDriver) AIAction(ctx context.Context, prompt string, opts ...optio
|
||||
aiExecutionResult := &AIExecutionResult{
|
||||
Type: "action",
|
||||
ModelCallElapsed: modelCallElapsed,
|
||||
ScreenshotElapsed: screenshotElapsed,
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
ImagePath: screenResult.ImagePath,
|
||||
Resolution: &screenResult.Resolution,
|
||||
PlanningResult: &planningResult.PlanningResult,
|
||||
@@ -193,13 +191,11 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
|
||||
options := option.NewActionOptions(opts...)
|
||||
resetHistory := options.ResetHistory
|
||||
|
||||
// Step 1: Take screenshot
|
||||
screenshotStartTime := time.Now()
|
||||
// Use GetScreenResult to handle screenshot capture, save, and session tracking
|
||||
screenResult, err := dExt.createScreenshotWithSession(
|
||||
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
|
||||
// Step 1: Take screenshot and convert to base64
|
||||
screenResult, err := dExt.GetScreenResult(
|
||||
option.WithScreenShotFileName("ai_planning"),
|
||||
option.WithScreenShotBase64(true),
|
||||
)
|
||||
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -208,12 +204,6 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
|
||||
// The planning screenshot is already stored in planningResult.ScreenResult
|
||||
dExt.GetSession().GetData(true) // reset session data to exclude planning screenshot from sub-actions
|
||||
|
||||
// get screen shot buffer base64 and size
|
||||
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(code.DeviceGetInfoError, err.Error())
|
||||
}
|
||||
|
||||
// Step 2: Call model
|
||||
modelCallStartTime := time.Now()
|
||||
planningOpts := &ai.PlanningOptions{
|
||||
@@ -224,12 +214,12 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
|
||||
{
|
||||
Type: schema.ChatMessagePartTypeImageURL,
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: screenShotBase64,
|
||||
URL: screenResult.Base64,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Size: size,
|
||||
Size: screenResult.Resolution,
|
||||
ResetHistory: resetHistory,
|
||||
}
|
||||
|
||||
@@ -250,7 +240,7 @@ func (dExt *XTDriver) PlanNextAction(ctx context.Context, prompt string, opts ..
|
||||
planningResult := &PlanningExecutionResult{
|
||||
PlanningResult: *result, // Inherit all fields from ai.PlanningResult
|
||||
// Planning process timing and metadata
|
||||
ScreenshotElapsed: screenshotElapsed,
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
ImagePath: screenResult.ImagePath,
|
||||
Resolution: &screenResult.Resolution,
|
||||
ScreenResult: screenResult,
|
||||
@@ -374,17 +364,11 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
|
||||
return nil, errors.New("LLM service is not initialized")
|
||||
}
|
||||
|
||||
// Step 1: Take screenshot and measure time
|
||||
screenshotStartTime := time.Now()
|
||||
screenResult, err := dExt.createScreenshotWithSession(
|
||||
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
|
||||
// Step 1: Take screenshot and convert to base64
|
||||
screenResult, err := dExt.GetScreenResult(
|
||||
option.WithScreenShotFileName("ai_query"),
|
||||
option.WithScreenShotBase64(true),
|
||||
)
|
||||
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -398,8 +382,8 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
|
||||
// execute query
|
||||
queryOpts := &ai.QueryOptions{
|
||||
Query: text,
|
||||
Screenshot: screenShotBase64,
|
||||
Size: size,
|
||||
Screenshot: screenResult.Base64,
|
||||
Size: screenResult.Resolution,
|
||||
OutputSchema: actionOptions.OutputSchema,
|
||||
}
|
||||
result, err := dExt.LLMService.Query(context.Background(), queryOpts)
|
||||
@@ -412,7 +396,7 @@ func (dExt *XTDriver) AIQuery(text string, opts ...option.ActionOption) (*AIExec
|
||||
aiResult := &AIExecutionResult{
|
||||
Type: "query",
|
||||
ModelCallElapsed: modelCallElapsed, // model call timing
|
||||
ScreenshotElapsed: screenshotElapsed, // screenshot timing
|
||||
ScreenshotElapsed: screenResult.Elapsed, // screenshot timing
|
||||
ImagePath: screenResult.ImagePath, // screenshot path
|
||||
Resolution: &screenResult.Resolution, // screen resolution
|
||||
QueryResult: result, // query-specific result
|
||||
@@ -426,35 +410,28 @@ func (dExt *XTDriver) AIAssert(assertion string, opts ...option.ActionOption) (*
|
||||
return nil, errors.New("LLM service is not initialized")
|
||||
}
|
||||
|
||||
// Step 1: Take screenshot and measure time
|
||||
screenshotStartTime := time.Now()
|
||||
screenResult, err := dExt.createScreenshotWithSession(
|
||||
option.WithScreenShotFileName(builtin.GenNameWithTimestamp("%d_screenshot")),
|
||||
// Step 1: Take screenshot and convert to base64
|
||||
screenResult, err := dExt.GetScreenResult(
|
||||
option.WithScreenShotFileName("ai_assert"),
|
||||
option.WithScreenShotBase64(true),
|
||||
)
|
||||
screenshotElapsed := time.Since(screenshotStartTime).Milliseconds()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
assertResult := &AIExecutionResult{
|
||||
Type: "assert",
|
||||
ScreenshotElapsed: screenshotElapsed,
|
||||
ScreenshotElapsed: screenResult.Elapsed,
|
||||
ImagePath: screenResult.ImagePath,
|
||||
Resolution: &screenResult.Resolution,
|
||||
}
|
||||
|
||||
screenShotBase64, size, err := dExt.GetScreenshotBase64WithSize()
|
||||
if err != nil {
|
||||
assertResult.Error = err.Error()
|
||||
return assertResult, err
|
||||
}
|
||||
|
||||
// Step 2: Call model and measure time
|
||||
modelCallStartTime := time.Now()
|
||||
assertOpts := &ai.AssertOptions{
|
||||
Assertion: assertion,
|
||||
Screenshot: screenShotBase64,
|
||||
Size: size,
|
||||
Screenshot: screenResult.Base64,
|
||||
Size: screenResult.Resolution,
|
||||
}
|
||||
result, err := dExt.LLMService.Assert(context.Background(), assertOpts)
|
||||
assertResult.ModelCallElapsed = time.Since(modelCallStartTime).Milliseconds()
|
||||
|
||||
@@ -37,6 +37,8 @@ type ScreenResult struct {
|
||||
Icons ai.UIResultMap `json:"icons"` // CV 识别的图标
|
||||
Tags []string `json:"tags"` // tags for image, e.g. ["feed", "ad", "live"]
|
||||
Popup *PopupInfo `json:"popup,omitempty"`
|
||||
Elapsed int64 `json:"elapsed_ms,omitempty"` // screenshot elapsed time in milliseconds
|
||||
Base64 string `json:"-"` // base64 encoded screenshot
|
||||
}
|
||||
|
||||
func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
|
||||
@@ -50,26 +52,11 @@ func (s *ScreenResult) FilterTextsByScope(x1, y1, x2, y2 float64) ai.OCRTexts {
|
||||
})
|
||||
}
|
||||
|
||||
// GetScreenshotBase64WithSize takes a screenshot, returns the compressed image buffer in base64 format and screen size
|
||||
// Also saves the screenshot to session for report display
|
||||
func (dExt *XTDriver) GetScreenshotBase64WithSize() (compressedBufBase64 string, size types.Size, err error) {
|
||||
// Create screenshot with session saving, minimal CV processing for AI operations
|
||||
screenResult, err := dExt.createScreenshotWithSession(
|
||||
option.WithScreenShotFileName("screenshot_base64"),
|
||||
)
|
||||
if err != nil {
|
||||
return "", types.Size{}, err
|
||||
}
|
||||
// GetScreenResult takes a screenshot and returns the ScreenResult with metadata
|
||||
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
|
||||
// Take screenshot and measure time
|
||||
screenshotStartTime := time.Now()
|
||||
|
||||
// convert buffer to base64 string
|
||||
screenShotBase64 := "data:image/jpeg;base64," +
|
||||
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
|
||||
|
||||
return screenShotBase64, screenResult.Resolution, nil
|
||||
}
|
||||
|
||||
// createScreenshotWithSession creates a screenshot with optional OCR processing and saves to session
|
||||
func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
|
||||
// get compressed screenshot buffer
|
||||
compressBufSource, err := getScreenShotBuffer(dExt.IDriver)
|
||||
if err != nil {
|
||||
@@ -147,6 +134,13 @@ func (dExt *XTDriver) createScreenshotWithSession(opts ...option.ActionOption) (
|
||||
session := dExt.GetSession()
|
||||
session.screenResults = append(session.screenResults, screenResult)
|
||||
|
||||
// Convert screenshot buffer to base64 string
|
||||
if screenshotOptions.ScreenShotWithBase64 {
|
||||
screenResult.Base64 = "data:image/jpeg;base64," +
|
||||
base64.StdEncoding.EncodeToString(screenResult.bufSource.Bytes())
|
||||
}
|
||||
|
||||
screenResult.Elapsed = time.Since(screenshotStartTime).Milliseconds()
|
||||
logger.Msg("log screenshot")
|
||||
return screenResult, nil
|
||||
}
|
||||
@@ -162,13 +156,7 @@ func needsCVProcessing(options *option.ActionOptions) bool {
|
||||
options.ScreenShotWithOCRCluster != ""
|
||||
}
|
||||
|
||||
// GetScreenResult takes a screenshot, returns the image recognition result
|
||||
func (dExt *XTDriver) GetScreenResult(opts ...option.ActionOption) (screenResult *ScreenResult, err error) {
|
||||
// Enable OCR processing for GetScreenResult
|
||||
opts = append(opts, option.WithScreenShotOCR(true))
|
||||
return dExt.createScreenshotWithSession(opts...)
|
||||
}
|
||||
|
||||
// GetScreenTexts takes a screenshot, returns the OCR recognition result
|
||||
func (dExt *XTDriver) GetScreenTexts(opts ...option.ActionOption) (ocrTexts ai.OCRTexts, err error) {
|
||||
options := option.NewActionOptions(opts...)
|
||||
if options.ScreenShotFileName == "" {
|
||||
|
||||
@@ -4,10 +4,11 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
"github.com/mark3labs/mcp-go/mcp"
|
||||
"github.com/mark3labs/mcp-go/server"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/httprunner/httprunner/v5/uixt/option"
|
||||
)
|
||||
|
||||
// ToolScreenShot implements the screenshot tool call.
|
||||
@@ -34,14 +35,17 @@ func (t *ToolScreenShot) Implement() server.ToolHandlerFunc {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bufferBase64, _, err := driverExt.GetScreenshotBase64WithSize()
|
||||
screenResult, err := driverExt.GetScreenResult(
|
||||
option.WithScreenShotFileName("tool_screenshot"),
|
||||
option.WithScreenShotBase64(true),
|
||||
)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("ScreenShot failed")
|
||||
return mcp.NewToolResultError(fmt.Sprintf("Failed to take screenshot: %v", err)), nil
|
||||
}
|
||||
log.Debug().Int("imageBytes", len(bufferBase64)).Msg("take screenshot success")
|
||||
log.Debug().Int("imageBytes", len(screenResult.Base64)).Msg("take screenshot success")
|
||||
|
||||
return mcp.NewToolResultImage("screenshot", bufferBase64, "image/jpeg"), nil
|
||||
return mcp.NewToolResultImage("screenshot", screenResult.Base64, "image/jpeg"), nil
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ type ScreenShotOptions struct {
|
||||
ScreenShotWithUpload bool `json:"screenshot_with_upload,omitempty" yaml:"screenshot_with_upload,omitempty"`
|
||||
ScreenShotWithLiveType bool `json:"screenshot_with_live_type,omitempty" yaml:"screenshot_with_live_type,omitempty"`
|
||||
ScreenShotWithLivePopularity bool `json:"screenshot_with_live_popularity,omitempty" yaml:"screenshot_with_live_popularity,omitempty"`
|
||||
ScreenShotWithBase64 bool `json:"screenshot_with_base64,omitempty" yaml:"screenshot_with_base64,omitempty"`
|
||||
ScreenShotWithUITypes []string `json:"screenshot_with_ui_types,omitempty" yaml:"screenshot_with_ui_types,omitempty"`
|
||||
ScreenShotWithClosePopups bool `json:"screenshot_with_close_popups,omitempty" yaml:"screenshot_with_close_popups,omitempty"`
|
||||
ScreenShotWithOCRCluster string `json:"screenshot_with_ocr_cluster,omitempty" yaml:"screenshot_with_ocr_cluster,omitempty"`
|
||||
@@ -53,6 +54,9 @@ func (o *ScreenShotOptions) GetScreenShotOptions() []ActionOption {
|
||||
if o.ScreenShotFileName != "" {
|
||||
options = append(options, WithScreenShotFileName(o.ScreenShotFileName))
|
||||
}
|
||||
if o.ScreenShotWithBase64 {
|
||||
options = append(options, WithScreenShotBase64(true))
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
@@ -129,6 +133,12 @@ func WithScreenShotFileName(fileName string) ActionOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithScreenShotBase64(base64 bool) ActionOption {
|
||||
return func(o *ActionOptions) {
|
||||
o.ScreenShotWithBase64 = base64
|
||||
}
|
||||
}
|
||||
|
||||
type ScreenRecordOptions struct {
|
||||
ScreenRecordDuration float64 `json:"screenrecord_duration,omitempty" yaml:"screenrecord_duration,omitempty"`
|
||||
ScreenRecordWithAudio bool `json:"screenrecord_with_audio,omitempty" yaml:"screenrecord_with_audio,omitempty"`
|
||||
|
||||
Reference in New Issue
Block a user