mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-06 20:32:44 +08:00
feat: add ui-tars planner
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,6 +7,7 @@
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
.env
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
@@ -1 +1 @@
|
||||
v5.0.0-beta-2503172040
|
||||
v5.0.0-beta-2503190009
|
||||
|
||||
149
planner/env.go
Normal file
149
planner/env.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package planner
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTimeout = 60 * time.Second
|
||||
)
|
||||
|
||||
type OpenAIInitConfig struct {
|
||||
ReportURL string `json:"REPORT_SERVER_URL"`
|
||||
Headers map[string]string `json:"defaultHeaders"`
|
||||
}
|
||||
|
||||
const (
|
||||
EnvOpenAIBaseURL = "OPENAI_BASE_URL"
|
||||
EnvOpenAIAPIKey = "OPENAI_API_KEY"
|
||||
EnvModelName = "MIDSCENE_MODEL_NAME"
|
||||
EnvOpenAIInitConfigJSON = "MIDSCENE_OPENAI_INIT_CONFIG_JSON"
|
||||
EnvUseVLMUITars = "MIDSCENE_USE_VLM_UI_TARS"
|
||||
)
|
||||
|
||||
// loadEnv loads environment variables from a file
|
||||
func loadEnv(envPath string) error {
|
||||
err := godotenv.Load(envPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().Str("path", envPath).Msg("load env success")
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetEnvConfig(key string) string {
|
||||
return os.Getenv(key)
|
||||
}
|
||||
|
||||
func GetEnvConfigInJSON(key string) (map[string]interface{}, error) {
|
||||
value := GetEnvConfig(key)
|
||||
if value == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(value), &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func GetEnvConfigInBool(key string) bool {
|
||||
value := GetEnvConfig(key)
|
||||
if value == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
boolValue, _ := strconv.ParseBool(value)
|
||||
return boolValue
|
||||
}
|
||||
|
||||
// GetEnvConfigOrDefault get env config or default value
|
||||
func GetEnvConfigOrDefault(key, defaultValue string) string {
|
||||
value := GetEnvConfig(key)
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func GetEnvConfigInInt(key string, defaultValue int) int {
|
||||
value := GetEnvConfig(key)
|
||||
if value == "" {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
intValue, err := strconv.Atoi(value)
|
||||
if err != nil {
|
||||
return defaultValue
|
||||
}
|
||||
return intValue
|
||||
}
|
||||
|
||||
// GetModelConfig get OpenAI config
|
||||
func GetModelConfig() (*openai.ChatModelConfig, error) {
|
||||
envConfig := &OpenAIInitConfig{
|
||||
Headers: make(map[string]string),
|
||||
}
|
||||
|
||||
// read from JSON config first
|
||||
jsonStr := GetEnvConfig(EnvOpenAIInitConfigJSON)
|
||||
if jsonStr != "" {
|
||||
if err := json.Unmarshal([]byte(jsonStr), envConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
config := &openai.ChatModelConfig{
|
||||
Timeout: defaultTimeout,
|
||||
}
|
||||
|
||||
if baseURL := GetEnvConfig(EnvOpenAIBaseURL); baseURL != "" {
|
||||
config.BaseURL = baseURL
|
||||
} else {
|
||||
return nil, fmt.Errorf("miss env %s", EnvOpenAIBaseURL)
|
||||
}
|
||||
|
||||
if apiKey := GetEnvConfig(EnvOpenAIAPIKey); apiKey != "" {
|
||||
config.APIKey = apiKey
|
||||
} else {
|
||||
return nil, fmt.Errorf("miss env %s", EnvOpenAIAPIKey)
|
||||
}
|
||||
|
||||
if modelName := GetEnvConfig(EnvModelName); modelName != "" {
|
||||
config.Model = modelName
|
||||
} else {
|
||||
return nil, fmt.Errorf("miss env %s", EnvModelName)
|
||||
}
|
||||
|
||||
// log config info
|
||||
log.Info().Str("model", config.Model).
|
||||
Str("baseURL", config.BaseURL).
|
||||
Str("apiKey", maskAPIKey(config.APIKey)).
|
||||
Str("timeout", defaultTimeout.String()).
|
||||
Msg("get model config")
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
// maskAPIKey masks the API key
|
||||
func maskAPIKey(key string) string {
|
||||
if len(key) <= 8 {
|
||||
return "******"
|
||||
}
|
||||
|
||||
return key[:4] + "******" + key[len(key)-4:]
|
||||
}
|
||||
|
||||
func IsUseVLMUITars() bool {
|
||||
return GetEnvConfigInBool(EnvUseVLMUITars)
|
||||
}
|
||||
50
planner/go.mod
Normal file
50
planner/go.mod
Normal file
@@ -0,0 +1,50 @@
|
||||
module github.com/httprunner/httprunner/v5/planner
|
||||
|
||||
go 1.24.1
|
||||
|
||||
require (
|
||||
github.com/cloudwego/eino v0.3.16
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/rs/zerolog v1.33.0
|
||||
github.com/stretchr/testify v1.10.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/bytedance/sonic v1.12.2 // indirect
|
||||
github.com/bytedance/sonic/loader v0.2.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250305023926-469de0301955 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/getkin/kin-openapi v0.118.0 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/swag v0.19.5 // indirect
|
||||
github.com/goph/emperror v0.17.2 // indirect
|
||||
github.com/invopop/yaml v0.1.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.19 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
|
||||
github.com/nikolalohinski/gonja v1.5.3 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.9 // indirect
|
||||
github.com/perimeterx/marshmallow v1.1.4 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/sashabaranov/go-openai v1.32.5 // indirect
|
||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||
github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/yargevad/filepathx v1.0.0 // indirect
|
||||
golang.org/x/arch v0.11.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
|
||||
golang.org/x/sys v0.28.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
175
planner/go.sum
Normal file
175
planner/go.sum
Normal file
@@ -0,0 +1,175 @@
|
||||
github.com/airbrake/gobrake v3.6.1+incompatible/go.mod h1:wM4gu3Cn0W0K7GUuVWnlXZU11AGBXMILnrdOU8Kn00o=
|
||||
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
|
||||
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
|
||||
github.com/bugsnag/bugsnag-go v1.4.0/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8=
|
||||
github.com/bugsnag/panicwrap v1.2.0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
|
||||
github.com/bytedance/mockey v1.2.13 h1:jokWZAm/pUEbD939Rhznz615MKUCZNuvCFQlJ2+ntoo=
|
||||
github.com/bytedance/mockey v1.2.13/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY=
|
||||
github.com/bytedance/sonic v1.12.2 h1:oaMFuRTpMHYLpCntGca65YWt5ny+wAceDERTkT2L9lg=
|
||||
github.com/bytedance/sonic v1.12.2/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM=
|
||||
github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
|
||||
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||
github.com/cloudwego/eino v0.3.16 h1:ASN8zISyoEdjEsPnIw5GazSHtbNY97NDthQ2B69yiZw=
|
||||
github.com/cloudwego/eino v0.3.16/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c h1:04WQpGikdQv6fh5wzMYSQhO0SJraV8+xcb9VQ00+HX4=
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c/go.mod h1:YGP4q3uspj5qhkv3CnvlEPSo0YGeWpvkkTUHHpLExas=
|
||||
github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250305023926-469de0301955 h1:fgvkmTqAalDfjdy3b6Ur2mh/KEwB9L2uvqS4MFgTOqc=
|
||||
github.com/cloudwego/eino-ext/libs/acl/openai v0.0.0-20250305023926-469de0301955/go.mod h1:6CThw1XQx/ASXNt31yuvp0X4Yp4GprknQuIvP9VKDpw=
|
||||
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
|
||||
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/getkin/kin-openapi v0.118.0 h1:z43njxPmJ7TaPpMSCQb7PN0dEYno4tyBPQcrFdHoLuM=
|
||||
github.com/getkin/kin-openapi v0.118.0/go.mod h1:l5e9PaFUo9fyLJCPGQeXI2ML8c3P8BHOEV2VaAVf/pc=
|
||||
github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ=
|
||||
github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI=
|
||||
github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
|
||||
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
|
||||
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/swag v0.19.5 h1:lTz6Ys4CmqqCQmZPBlbQENR1/GucA2bzYTE12Pw4tFY=
|
||||
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||
github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM=
|
||||
github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/goph/emperror v0.17.2 h1:yLapQcmEsO0ipe9p5TaN22djm3OFV/TfM/fcYP0/J18=
|
||||
github.com/goph/emperror v0.17.2/go.mod h1:+ZbQ+fUNO/6FNiUo0ujtMjhgad9Xa6fQL9KhH4LNHic=
|
||||
github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
|
||||
github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
|
||||
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
|
||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||
github.com/invopop/yaml v0.1.0 h1:YW3WGUoJEXYfzWBjn00zIlrw7brGVD0fUKRYDPAPhrc=
|
||||
github.com/invopop/yaml v0.1.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q=
|
||||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
|
||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4=
|
||||
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
|
||||
github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c=
|
||||
github.com/nikolalohinski/gonja v1.5.3/go.mod h1:RmjwxNiXAEqcq1HeK5SSMmqFJvKOfTfXhkJv6YBtPa4=
|
||||
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0=
|
||||
github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
|
||||
github.com/perimeterx/marshmallow v1.1.4 h1:pZLDH9RjlLGGorbXhcaQLhfuV0pFMNfPO55FuFkxqLw=
|
||||
github.com/perimeterx/marshmallow v1.1.4/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rollbar/rollbar-go v1.0.2/go.mod h1:AcFs5f0I+c71bpHlXNNDbOWJiKwjFDtISeXco0L5PKQ=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/sashabaranov/go-openai v1.32.5 h1:/eNVa8KzlE7mJdKPZDj6886MUzZQjoVHyn0sLvIt5qA=
|
||||
github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f h1:Z2cODYsUxQPofhpYRMQVwWz4yUVpHF+vPi+eUdruUYI=
|
||||
github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f/go.mod h1:JqzWyvTuI2X4+9wOHmKSQCYxybB/8j6Ko43qVmXDuZg=
|
||||
github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY=
|
||||
github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+E8C6HtKdec=
|
||||
github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY=
|
||||
github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/ugorji/go v1.2.7 h1:qYhyWUUd6WbiM+C6JZAUkIJt/1WrjzNHY9+KCIjVqTo=
|
||||
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
|
||||
github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0=
|
||||
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
|
||||
github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg=
|
||||
github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE=
|
||||
github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc=
|
||||
github.com/yargevad/filepathx v1.0.0/go.mod h1:BprfX/gpYNJHJfc35GjRRpVcwWXS89gGulUIU5tK3tA=
|
||||
golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4=
|
||||
golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 h1:MGwJjxBy0HJshjDNfLsYO8xppfqWlA5ZT9OhtUUhTNw=
|
||||
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
|
||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
250
planner/parser.go
Normal file
250
planner/parser.go
Normal file
@@ -0,0 +1,250 @@
|
||||
package planner
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewActionParser creates a new ActionParser instance
|
||||
func NewActionParser(prediction string, factor float64) *ActionParser {
|
||||
return &ActionParser{
|
||||
Prediction: prediction,
|
||||
Factor: factor,
|
||||
}
|
||||
}
|
||||
|
||||
// ActionParser parses VLM responses and converts them to structured actions
|
||||
type ActionParser struct {
|
||||
Prediction string
|
||||
Factor float64
|
||||
}
|
||||
|
||||
// Parse parses the prediction text and extracts actions
|
||||
func (p *ActionParser) Parse(predictionText string) ([]ParsedAction, error) {
|
||||
// try parsing JSON format
|
||||
var jsonActions []ParsedAction
|
||||
jsonActions, jsonErr := p.parseJSON(predictionText)
|
||||
if jsonErr == nil && len(jsonActions) > 0 {
|
||||
return jsonActions, nil
|
||||
}
|
||||
|
||||
// if JSON parsing fails, try parsing Thought/Action format
|
||||
thoughtActions, thoughtErr := p.parseThoughtAction(predictionText)
|
||||
if thoughtErr == nil && len(thoughtActions) > 0 {
|
||||
return thoughtActions, nil
|
||||
}
|
||||
|
||||
// both parsing methods failed
|
||||
if jsonErr != nil && thoughtErr != nil {
|
||||
return nil, fmt.Errorf("failed to parse VLM response: %v; %v", jsonErr, thoughtErr)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no actions returned from VLM")
|
||||
}
|
||||
|
||||
// parseJSON tries to parse the response as JSON format
|
||||
func (p *ActionParser) parseJSON(predictionText string) ([]ParsedAction, error) {
|
||||
predictionText = strings.TrimSpace(predictionText)
|
||||
if strings.HasPrefix(predictionText, "```json") && strings.HasSuffix(predictionText, "```") {
|
||||
predictionText = strings.TrimPrefix(predictionText, "```json")
|
||||
predictionText = strings.TrimSuffix(predictionText, "```")
|
||||
}
|
||||
predictionText = strings.TrimSpace(predictionText)
|
||||
|
||||
var response VLMResponse
|
||||
if err := json.Unmarshal([]byte(predictionText), &response); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse VLM response: %v", err)
|
||||
}
|
||||
|
||||
if response.Error != "" {
|
||||
return nil, errors.New(response.Error)
|
||||
}
|
||||
|
||||
if len(response.Actions) == 0 {
|
||||
return nil, errors.New("no actions returned from VLM")
|
||||
}
|
||||
|
||||
// normalize actions
|
||||
var normalizedActions []ParsedAction
|
||||
for _, action := range response.Actions {
|
||||
if err := p.normalizeAction(&action); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to normalize action")
|
||||
}
|
||||
normalizedActions = append(normalizedActions, action)
|
||||
}
|
||||
|
||||
return normalizedActions, nil
|
||||
}
|
||||
|
||||
// parseThoughtAction parses the Thought/Action format response
|
||||
func (p *ActionParser) parseThoughtAction(predictionText string) ([]ParsedAction, error) {
|
||||
thoughtRegex := regexp.MustCompile(`(?is)Thought:(.+?)Action:`)
|
||||
actionRegex := regexp.MustCompile(`(?is)Action:(.+)`)
|
||||
|
||||
// extract Thought part
|
||||
thoughtMatch := thoughtRegex.FindStringSubmatch(predictionText)
|
||||
var thought string
|
||||
if len(thoughtMatch) > 1 {
|
||||
thought = strings.TrimSpace(thoughtMatch[1])
|
||||
}
|
||||
|
||||
// extract Action part
|
||||
actionMatch := actionRegex.FindStringSubmatch(predictionText)
|
||||
if len(actionMatch) < 2 {
|
||||
return nil, fmt.Errorf("no action found in the response")
|
||||
}
|
||||
|
||||
actionText := strings.TrimSpace(actionMatch[1])
|
||||
|
||||
// parse action type and parameters
|
||||
return p.parseActionText(actionText, thought)
|
||||
}
|
||||
|
||||
// parseActionText parses the action text to extract the action type and parameters
|
||||
func (p *ActionParser) parseActionText(actionText, thought string) ([]ParsedAction, error) {
|
||||
// remove trailing comments
|
||||
if idx := strings.Index(actionText, "#"); idx > 0 {
|
||||
actionText = strings.TrimSpace(actionText[:idx])
|
||||
}
|
||||
|
||||
// supported action types and regexes
|
||||
actionRegexes := map[string]*regexp.Regexp{
|
||||
"click": regexp.MustCompile(`click\(start_box='([^']+)'\)`),
|
||||
"left_double": regexp.MustCompile(`left_double\(start_box='([^']+)'\)`),
|
||||
"right_single": regexp.MustCompile(`right_single\(start_box='([^']+)'\)`),
|
||||
"drag": regexp.MustCompile(`drag\(start_box='([^']+)', end_box='([^']+)'\)`),
|
||||
"hotkey": regexp.MustCompile(`hotkey\(key='([^']+)'\)`),
|
||||
"type": regexp.MustCompile(`type\(content='([^']+)'\)`),
|
||||
"scroll": regexp.MustCompile(`scroll\(start_box='([^']+)', direction='([^']+)'\)`),
|
||||
"wait": regexp.MustCompile(`wait\(\)`),
|
||||
"finished": regexp.MustCompile(`finished\(\)`),
|
||||
"call_user": regexp.MustCompile(`call_user\(\)`),
|
||||
}
|
||||
|
||||
for actionType, regex := range actionRegexes {
|
||||
matches := regex.FindStringSubmatch(actionText)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var action ParsedAction
|
||||
action.ActionType = actionType
|
||||
action.ActionInputs = make(map[string]interface{})
|
||||
action.Thought = thought
|
||||
|
||||
// parse parameters based on action type
|
||||
switch actionType {
|
||||
case "click", "left_double", "right_single":
|
||||
if len(matches) > 1 {
|
||||
coord, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize point failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = coord
|
||||
}
|
||||
case "drag":
|
||||
if len(matches) > 2 {
|
||||
// handle start point
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
|
||||
// handle end point
|
||||
endBox, err := p.normalizeCoordinates(matches[2])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize endBox failed: %s", matches[2])
|
||||
}
|
||||
action.ActionInputs["endBox"] = endBox
|
||||
}
|
||||
case "hotkey":
|
||||
if len(matches) > 1 {
|
||||
action.ActionInputs["key"] = matches[1]
|
||||
}
|
||||
case "type":
|
||||
if len(matches) > 1 {
|
||||
action.ActionInputs["content"] = matches[1]
|
||||
}
|
||||
case "scroll":
|
||||
if len(matches) > 2 {
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
action.ActionInputs["direction"] = matches[2]
|
||||
}
|
||||
case "wait", "finished", "call_user":
|
||||
// 这些动作没有额外参数
|
||||
}
|
||||
|
||||
return []ParsedAction{action}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unknown action format: %s", actionText)
|
||||
}
|
||||
|
||||
// normalizeAction normalizes the coordinates in the action
|
||||
func (p *ActionParser) normalizeAction(action *ParsedAction) error {
|
||||
switch action.ActionType {
|
||||
case "click", "drag":
|
||||
// handle click and drag action coordinates
|
||||
if startBox, ok := action.ActionInputs["startBox"].(string); ok {
|
||||
normalized, err := p.normalizeCoordinates(startBox)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to normalize startBox: %w", err)
|
||||
}
|
||||
action.ActionInputs["startBox"] = normalized
|
||||
}
|
||||
|
||||
if endBox, ok := action.ActionInputs["endBox"].(string); ok {
|
||||
normalized, err := p.normalizeCoordinates(endBox)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to normalize endBox: %w", err)
|
||||
}
|
||||
action.ActionInputs["endBox"] = normalized
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// normalizeCoordinates normalizes the coordinates based on the factor
|
||||
func (p *ActionParser) normalizeCoordinates(coordStr string) (string, error) {
|
||||
var coords []float64
|
||||
|
||||
// check empty string
|
||||
if coordStr == "" {
|
||||
return "", fmt.Errorf("empty coordinate string")
|
||||
}
|
||||
|
||||
if !strings.Contains(coordStr, ",") {
|
||||
return "", fmt.Errorf("invalid coordinate string: %s", coordStr)
|
||||
}
|
||||
|
||||
// remove possible brackets and split coordinates
|
||||
coordStr = strings.Trim(coordStr, "[]() \t")
|
||||
|
||||
// try parsing JSON array
|
||||
jsonStr := coordStr
|
||||
if !strings.HasPrefix(jsonStr, "[") {
|
||||
jsonStr = "[" + coordStr + "]"
|
||||
}
|
||||
|
||||
err := json.Unmarshal([]byte(jsonStr), &coords)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse coordinate string: %w", err)
|
||||
}
|
||||
|
||||
normalized, err := json.Marshal(coords)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to marshal normalized coordinates: %w", err)
|
||||
}
|
||||
|
||||
return string(normalized), nil
|
||||
}
|
||||
396
planner/planner.go
Normal file
396
planner/planner.go
Normal file
@@ -0,0 +1,396 @@
|
||||
package planner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
"image/png"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// Error types
|
||||
var (
|
||||
ErrInvalidInput = fmt.Errorf("invalid input parameters")
|
||||
ErrEmptyInstruction = fmt.Errorf("user instruction is empty")
|
||||
ErrNoConversationHistory = fmt.Errorf("conversation history is empty")
|
||||
ErrInvalidImageData = fmt.Errorf("invalid image data")
|
||||
)
|
||||
|
||||
const uiTarsPlanningPrompt = `
|
||||
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
||||
|
||||
## Output Format
|
||||
Thought: ...
|
||||
Action: ...
|
||||
|
||||
## Action Space
|
||||
click(start_box='[x1, y1, x2, y2]')
|
||||
left_double(start_box='[x1, y1, x2, y2]')
|
||||
right_single(start_box='[x1, y1, x2, y2]')
|
||||
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
||||
hotkey(key='')
|
||||
type(content='') #If you want to submit your input, use "\n" at the end of content.
|
||||
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||
finished()
|
||||
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
||||
|
||||
## Note
|
||||
- Use Chinese in Thought part.
|
||||
- Write a small plan and finally summarize your next action (with its target element) in one sentence in Thought part.
|
||||
|
||||
## User Instruction
|
||||
`
|
||||
|
||||
func NewPlanner(ctx context.Context) (*Planner, error) {
|
||||
config, err := GetModelConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create OpenAI config: %w", err)
|
||||
}
|
||||
model, err := openai.NewChatModel(ctx, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize OpenAI model: %w", err)
|
||||
}
|
||||
return &Planner{
|
||||
ctx: ctx,
|
||||
model: model,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Planner struct {
|
||||
ctx context.Context
|
||||
model *openai.ChatModel
|
||||
}
|
||||
|
||||
// Start performs UI planning using Vision Language Model
|
||||
func (p *Planner) Start(opts PlanningOptions) (*PlanningResult, error) {
|
||||
log.Info().Str("user_instruction", opts.UserInstruction).Msg("start VLM planning")
|
||||
|
||||
// 1. validate input parameters
|
||||
if err := validateInput(opts); err != nil {
|
||||
return nil, errors.Wrap(err, "validate input parameters failed")
|
||||
}
|
||||
|
||||
// 2. call VLM service
|
||||
resp, err := p.callVLMService(opts)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "call VLM service failed")
|
||||
}
|
||||
|
||||
// 3. process response
|
||||
result, err := processVLMResponse(resp)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "process VLM response failed")
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Interface("summary", result.ActionSummary).
|
||||
Interface("actions", result.Actions).
|
||||
Msg("VLM planning completed")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func validateInput(opts PlanningOptions) error {
|
||||
if opts.UserInstruction == "" {
|
||||
return ErrEmptyInstruction
|
||||
}
|
||||
|
||||
if len(opts.ConversationHistory) == 0 {
|
||||
return ErrNoConversationHistory
|
||||
}
|
||||
|
||||
if opts.Size.Width <= 0 || opts.Size.Height <= 0 {
|
||||
return ErrInvalidInput
|
||||
}
|
||||
|
||||
// ensure at least one image URL
|
||||
hasImageURL := false
|
||||
for _, msg := range opts.ConversationHistory {
|
||||
if msg.Role == "user" {
|
||||
// check MultiContent
|
||||
if len(msg.MultiContent) > 0 {
|
||||
for _, content := range msg.MultiContent {
|
||||
if content.Type == "image_url" && content.ImageURL != nil {
|
||||
hasImageURL = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if hasImageURL {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !hasImageURL {
|
||||
return ErrInvalidInput
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// callVLMService makes the actual call to the VLM service
|
||||
func (p *Planner) callVLMService(opts PlanningOptions) (*VLMResponse, error) {
|
||||
log.Info().Msg("calling VLM service...")
|
||||
|
||||
// prepare prompt
|
||||
systemPrompt := uiTarsPlanningPrompt + opts.UserInstruction
|
||||
messages := []*schema.Message{
|
||||
{
|
||||
Role: schema.System,
|
||||
Content: systemPrompt,
|
||||
},
|
||||
}
|
||||
messages = append(messages, opts.ConversationHistory...)
|
||||
|
||||
// generate response
|
||||
resp, err := p.model.Generate(p.ctx, messages)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("OpenAI API request failed: %w", err)
|
||||
}
|
||||
|
||||
// parse response
|
||||
content := resp.Content
|
||||
parser := NewActionParser(content, 1000) // 使用与 TypeScript 版本相同的 factor
|
||||
actions, err := parser.Parse(content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse actions: %w", err)
|
||||
}
|
||||
|
||||
return &VLMResponse{
|
||||
Actions: actions,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// processVLMResponse processes the VLM response and converts it to PlanningResult
|
||||
func processVLMResponse(resp *VLMResponse) (*PlanningResult, error) {
|
||||
log.Info().Msg("processing VLM response...")
|
||||
if resp.Error != "" {
|
||||
return nil, fmt.Errorf("VLM error: %s", resp.Error)
|
||||
}
|
||||
|
||||
if len(resp.Actions) == 0 {
|
||||
return nil, fmt.Errorf("no actions returned from VLM")
|
||||
}
|
||||
|
||||
// 验证和后处理每个动作
|
||||
for i := range resp.Actions {
|
||||
// 验证动作类型
|
||||
switch resp.Actions[i].ActionType {
|
||||
case "click", "left_double", "right_single":
|
||||
validateCoordinateAction(&resp.Actions[i], "startBox")
|
||||
case "drag":
|
||||
validateCoordinateAction(&resp.Actions[i], "startBox")
|
||||
validateCoordinateAction(&resp.Actions[i], "endBox")
|
||||
case "scroll":
|
||||
validateCoordinateAction(&resp.Actions[i], "startBox")
|
||||
validateScrollDirection(&resp.Actions[i])
|
||||
case "type":
|
||||
validateTypeContent(&resp.Actions[i])
|
||||
case "hotkey":
|
||||
validateHotkeyAction(&resp.Actions[i])
|
||||
case "wait", "finished", "call_user":
|
||||
// 这些动作不需要额外参数
|
||||
default:
|
||||
log.Printf("警告: 未知的动作类型: %s, 将尝试继续处理", resp.Actions[i].ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
// 提取动作摘要
|
||||
actionSummary := extractActionSummary(resp.Actions)
|
||||
|
||||
// 将ParsedAction转换为接口类型
|
||||
var actions []interface{}
|
||||
for _, action := range resp.Actions {
|
||||
actionMap := map[string]interface{}{
|
||||
"actionType": action.ActionType,
|
||||
"actionInputs": action.ActionInputs,
|
||||
"thought": action.Thought,
|
||||
}
|
||||
actions = append(actions, actionMap)
|
||||
}
|
||||
|
||||
return &PlanningResult{
|
||||
Actions: actions,
|
||||
RealActions: resp.Actions,
|
||||
ActionSummary: actionSummary,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// extractActionSummary 从动作中提取摘要
|
||||
func extractActionSummary(actions []ParsedAction) string {
|
||||
if len(actions) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 优先使用第一个动作的Thought作为摘要
|
||||
if actions[0].Thought != "" {
|
||||
return actions[0].Thought
|
||||
}
|
||||
|
||||
// 如果没有Thought,则根据动作类型生成摘要
|
||||
action := actions[0]
|
||||
switch action.ActionType {
|
||||
case "click":
|
||||
return "点击操作"
|
||||
case "drag":
|
||||
return "拖拽操作"
|
||||
case "left_double":
|
||||
return "双击操作"
|
||||
case "right_single":
|
||||
return "右键点击操作"
|
||||
case "scroll":
|
||||
direction, _ := action.ActionInputs["direction"].(string)
|
||||
return fmt.Sprintf("滚动操作 (%s)", direction)
|
||||
case "type":
|
||||
content, _ := action.ActionInputs["content"].(string)
|
||||
if len(content) > 20 {
|
||||
content = content[:20] + "..."
|
||||
}
|
||||
return fmt.Sprintf("输入文本: %s", content)
|
||||
case "hotkey":
|
||||
key, _ := action.ActionInputs["key"].(string)
|
||||
return fmt.Sprintf("快捷键: %s", key)
|
||||
case "wait":
|
||||
return "等待操作"
|
||||
case "finished":
|
||||
return "完成操作"
|
||||
case "call_user":
|
||||
return "请求用户协助"
|
||||
default:
|
||||
return fmt.Sprintf("执行 %s 操作", action.ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
// validateCoordinateAction 验证坐标类动作
|
||||
func validateCoordinateAction(action *ParsedAction, boxField string) {
|
||||
if box, ok := action.ActionInputs[boxField]; !ok || box == "" {
|
||||
// 为空或缺失的坐标设置默认值
|
||||
action.ActionInputs[boxField] = "[0.5, 0.5]"
|
||||
log.Printf("警告: %s动作缺少%s参数, 已设置默认值", action.ActionType, boxField)
|
||||
}
|
||||
}
|
||||
|
||||
// validateScrollDirection 验证滚动方向
|
||||
func validateScrollDirection(action *ParsedAction) {
|
||||
if direction, ok := action.ActionInputs["direction"].(string); !ok || direction == "" {
|
||||
// 为空或缺失的方向设置默认值
|
||||
action.ActionInputs["direction"] = "down"
|
||||
log.Printf("警告: scroll动作缺少direction参数, 已设置默认值")
|
||||
} else {
|
||||
// 标准化方向
|
||||
switch strings.ToLower(direction) {
|
||||
case "up", "down", "left", "right":
|
||||
// 保持原样
|
||||
default:
|
||||
// 非标准方向设为默认值
|
||||
action.ActionInputs["direction"] = "down"
|
||||
log.Printf("警告: 非标准滚动方向: %s, 已设置为down", direction)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// validateTypeContent 验证输入文本内容
|
||||
func validateTypeContent(action *ParsedAction) {
|
||||
if content, ok := action.ActionInputs["content"]; !ok || content == "" {
|
||||
// 为空或缺失的内容设置默认值
|
||||
action.ActionInputs["content"] = ""
|
||||
log.Printf("警告: type动作缺少content参数, 已设置为空字符串")
|
||||
}
|
||||
}
|
||||
|
||||
// validateHotkeyAction 验证快捷键动作
|
||||
func validateHotkeyAction(action *ParsedAction) {
|
||||
if key, ok := action.ActionInputs["key"]; !ok || key == "" {
|
||||
// 为空或缺失的键设置默认值
|
||||
action.ActionInputs["key"] = "Enter"
|
||||
log.Printf("警告: hotkey动作缺少key参数, 已设置默认值")
|
||||
}
|
||||
}
|
||||
|
||||
// SavePositionImg saves an image with position markers
|
||||
func SavePositionImg(params struct {
|
||||
InputImgBase64 string
|
||||
Rect struct {
|
||||
X float64
|
||||
Y float64
|
||||
}
|
||||
OutputPath string
|
||||
}) error {
|
||||
// 解码Base64图像
|
||||
imgData := params.InputImgBase64
|
||||
// 如果包含了数据URL前缀,去掉它
|
||||
if strings.HasPrefix(imgData, "data:image/") {
|
||||
parts := strings.Split(imgData, ",")
|
||||
if len(parts) > 1 {
|
||||
imgData = parts[1]
|
||||
}
|
||||
}
|
||||
|
||||
// 解码Base64
|
||||
unbased, err := base64.StdEncoding.DecodeString(imgData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("无法解码Base64图像: %w", err)
|
||||
}
|
||||
|
||||
// 解码图像
|
||||
reader := bytes.NewReader(unbased)
|
||||
img, _, err := image.Decode(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("无法解码图像数据: %w", err)
|
||||
}
|
||||
|
||||
// 创建一个可以在其上绘制的图像
|
||||
bounds := img.Bounds()
|
||||
rgba := image.NewRGBA(bounds)
|
||||
draw.Draw(rgba, bounds, img, bounds.Min, draw.Src)
|
||||
|
||||
// 在点击/拖动位置绘制标记
|
||||
markRadius := 10
|
||||
x, y := int(params.Rect.X), int(params.Rect.Y)
|
||||
|
||||
// 绘制红色圆圈
|
||||
for i := -markRadius; i <= markRadius; i++ {
|
||||
for j := -markRadius; j <= markRadius; j++ {
|
||||
if i*i+j*j <= markRadius*markRadius {
|
||||
if x+i >= 0 && x+i < bounds.Max.X && y+j >= 0 && y+j < bounds.Max.Y {
|
||||
rgba.Set(x+i, y+j, color.RGBA{255, 0, 0, 255})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 保存图像
|
||||
outFile, err := os.Create(params.OutputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("无法创建输出文件: %w", err)
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
// 编码为PNG并保存
|
||||
if err := png.Encode(outFile, rgba); err != nil {
|
||||
return fmt.Errorf("无法编码和保存图像: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// loadImage loads image and returns base64 encoded string
|
||||
func loadImage(imagePath string) (base64Str string, err error) {
|
||||
imageData, err := os.ReadFile(imagePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
base64Str = "data:image/png;base64," + base64.StdEncoding.EncodeToString(imageData)
|
||||
return
|
||||
}
|
||||
263
planner/planner_test.go
Normal file
263
planner/planner_test.go
Normal file
@@ -0,0 +1,263 @@
|
||||
package planner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestVLMPlanning(t *testing.T) {
|
||||
err := loadEnv("testdata/.env")
|
||||
require.NoError(t, err)
|
||||
|
||||
// imageBase64, err := loadImage("testdata/popup_risk_warning.png")
|
||||
imageBase64, err := loadImage("testdata/llk_1.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
userInstruction := `连连看是一款经典的益智消除类小游戏,通常以图案或图标为主要元素。以下是连连看的基本规则说明:
|
||||
1. 游戏目标: 玩家需要在规定时间内,通过连接相同的图案或图标,将它们从游戏界面中消除。
|
||||
2. 连接规则:
|
||||
- 两个相同的图案可以通过不超过三条直线连接。
|
||||
- 连接线可以水平或垂直,但不能穿过其他图案。
|
||||
- 连接线的转折次数不能超过两次。
|
||||
3. 游戏界面: 游戏界面通常是一个矩形区域,内含多个图案或图标,排列成行和列。
|
||||
4. 时间限制: 游戏通常设有时间限制,玩家需要在时间耗尽前完成所有图案的消除。
|
||||
5. 得分机制: 每成功连接并消除一对图案,玩家会获得相应的分数。完成游戏后,根据剩余时间和消除效率计算总分。
|
||||
6. 关卡设计: 游戏可能包含多个关卡,随着关卡的推进,图案的复杂度和数量会增加。`
|
||||
|
||||
userInstruction += "\n\n请基于以上游戏规则,给出下一步可点击的两个图标坐标"
|
||||
|
||||
planner, err := NewPlanner(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
opts := PlanningOptions{
|
||||
UserInstruction: userInstruction,
|
||||
ConversationHistory: []*schema.Message{
|
||||
{
|
||||
Role: schema.User,
|
||||
MultiContent: []schema.ChatMessagePart{
|
||||
{
|
||||
Type: "image_url",
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: imageBase64,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Size: Size{
|
||||
Width: 1920,
|
||||
Height: 1080,
|
||||
},
|
||||
}
|
||||
|
||||
// 执行规划
|
||||
result, err := planner.Start(opts)
|
||||
|
||||
// 验证结果
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, result)
|
||||
require.NotEmpty(t, result.RealActions)
|
||||
|
||||
// 验证动作
|
||||
action := result.RealActions[0]
|
||||
assert.NotEmpty(t, action.ActionType)
|
||||
assert.NotEmpty(t, action.Thought)
|
||||
|
||||
// 根据动作类型验证参数
|
||||
switch action.ActionType {
|
||||
case "click", "drag", "left_double", "right_single", "scroll":
|
||||
// 这些动作需要验证坐标
|
||||
assert.NotEmpty(t, action.ActionInputs["startBox"])
|
||||
|
||||
// 验证坐标格式
|
||||
var coords []float64
|
||||
err = json.Unmarshal([]byte(action.ActionInputs["startBox"].(string)), &coords)
|
||||
require.NoError(t, err)
|
||||
require.True(t, len(coords) >= 2) // 至少有 x, y 坐标
|
||||
|
||||
// 验证坐标范围
|
||||
for _, coord := range coords {
|
||||
assert.GreaterOrEqual(t, coord, float64(0))
|
||||
assert.LessOrEqual(t, coord, float64(1920)) // 最大屏幕宽度
|
||||
}
|
||||
|
||||
case "type":
|
||||
// 验证文本内容
|
||||
assert.NotEmpty(t, action.ActionInputs["content"])
|
||||
|
||||
case "hotkey":
|
||||
// 验证按键
|
||||
assert.NotEmpty(t, action.ActionInputs["key"])
|
||||
|
||||
case "wait", "finished", "call_user":
|
||||
// 这些动作不需要额外参数
|
||||
|
||||
default:
|
||||
t.Fatalf("未知的动作类型: %s", action.ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateInput(t *testing.T) {
|
||||
imageBase64, err := loadImage("testdata/popup_risk_warning.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
opts PlanningOptions
|
||||
wantErr error
|
||||
}{
|
||||
{
|
||||
name: "valid input",
|
||||
opts: PlanningOptions{
|
||||
UserInstruction: "点击继续使用按钮",
|
||||
ConversationHistory: []*schema.Message{
|
||||
{
|
||||
Role: schema.User,
|
||||
MultiContent: []schema.ChatMessagePart{
|
||||
{
|
||||
Type: "image_url",
|
||||
ImageURL: &schema.ChatMessageImageURL{
|
||||
URL: imageBase64,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Size: Size{Width: 100, Height: 100},
|
||||
},
|
||||
wantErr: nil,
|
||||
},
|
||||
{
|
||||
name: "empty instruction",
|
||||
opts: PlanningOptions{
|
||||
UserInstruction: "",
|
||||
ConversationHistory: []*schema.Message{
|
||||
{
|
||||
Role: schema.User,
|
||||
Content: "",
|
||||
},
|
||||
},
|
||||
Size: Size{Width: 100, Height: 100},
|
||||
},
|
||||
wantErr: ErrEmptyInstruction,
|
||||
},
|
||||
{
|
||||
name: "empty conversation history",
|
||||
opts: PlanningOptions{
|
||||
UserInstruction: "点击立即卸载按钮",
|
||||
ConversationHistory: []*schema.Message{},
|
||||
Size: Size{Width: 100, Height: 100},
|
||||
},
|
||||
wantErr: ErrNoConversationHistory,
|
||||
},
|
||||
{
|
||||
name: "invalid size",
|
||||
opts: PlanningOptions{
|
||||
UserInstruction: "勾选不再提示选项",
|
||||
ConversationHistory: []*schema.Message{
|
||||
{
|
||||
Role: schema.User,
|
||||
Content: "",
|
||||
},
|
||||
},
|
||||
Size: Size{Width: 0, Height: 0},
|
||||
},
|
||||
wantErr: ErrInvalidInput,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := validateInput(tt.opts)
|
||||
if tt.wantErr != nil {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessVLMResponse(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
resp *VLMResponse
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid response",
|
||||
resp: &VLMResponse{
|
||||
Actions: []ParsedAction{
|
||||
{
|
||||
ActionType: "click",
|
||||
ActionInputs: map[string]interface{}{
|
||||
"startBox": "[0.5, 0.5]",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "error response",
|
||||
resp: &VLMResponse{
|
||||
Error: "test error",
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "empty actions",
|
||||
resp: &VLMResponse{},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := processVLMResponse(tt.resp)
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, result)
|
||||
return
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.Equal(t, tt.resp.Actions, result.RealActions)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSavePositionImg(t *testing.T) {
|
||||
imageBase64, err := loadImage("testdata/popup_risk_warning.png")
|
||||
require.NoError(t, err)
|
||||
|
||||
tempFile := t.TempDir() + "/test.png"
|
||||
params := struct {
|
||||
InputImgBase64 string
|
||||
Rect struct {
|
||||
X float64
|
||||
Y float64
|
||||
}
|
||||
OutputPath string
|
||||
}{
|
||||
InputImgBase64: imageBase64,
|
||||
Rect: struct {
|
||||
X float64
|
||||
Y float64
|
||||
}{
|
||||
X: 100,
|
||||
Y: 100,
|
||||
},
|
||||
OutputPath: tempFile,
|
||||
}
|
||||
|
||||
err = SavePositionImg(params)
|
||||
assert.NoError(t, err)
|
||||
// TODO: Add more assertions when SavePositionImg is implemented
|
||||
}
|
||||
BIN
planner/testdata/llk_1.png
vendored
Normal file
BIN
planner/testdata/llk_1.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 437 KiB |
BIN
planner/testdata/llk_2.png
vendored
Normal file
BIN
planner/testdata/llk_2.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 407 KiB |
BIN
planner/testdata/popup_risk_warning.png
vendored
Normal file
BIN
planner/testdata/popup_risk_warning.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
38
planner/types.go
Normal file
38
planner/types.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package planner
|
||||
|
||||
import (
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
// PlanningOptions represents the input options for planning
|
||||
type PlanningOptions struct {
|
||||
UserInstruction string `json:"user_instruction"`
|
||||
ConversationHistory []*schema.Message `json:"conversation_history"`
|
||||
Size Size `json:"size"`
|
||||
}
|
||||
|
||||
// Size represents the dimensions of a screen
|
||||
type Size struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
// PlanningResult represents the result of planning
|
||||
type PlanningResult struct {
|
||||
Actions []interface{} `json:"actions"`
|
||||
RealActions []ParsedAction `json:"real_actions"`
|
||||
ActionSummary string `json:"action_summary"`
|
||||
}
|
||||
|
||||
// VLMResponse represents the response from the Vision Language Model
|
||||
type VLMResponse struct {
|
||||
Actions []ParsedAction `json:"actions"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ParsedAction represents a parsed action from the VLM response
|
||||
type ParsedAction struct {
|
||||
ActionType string `json:"actionType"`
|
||||
ActionInputs map[string]interface{} `json:"actionInputs"`
|
||||
Thought string `json:"thought"`
|
||||
}
|
||||
Reference in New Issue
Block a user