mirror of
https://github.com/httprunner/httprunner.git
synced 2026-05-06 20:32:44 +08:00
stash
This commit is contained in:
5
go.mod
5
go.mod
@@ -8,7 +8,8 @@ require (
|
||||
github.com/Masterminds/semver v1.5.0
|
||||
github.com/andybalholm/brotli v1.0.4
|
||||
github.com/bytedance/sonic v1.13.2
|
||||
github.com/cloudwego/eino v0.3.16
|
||||
github.com/cloudwego/eino v0.3.23
|
||||
github.com/cloudwego/eino-ext/components/model/ark v0.1.6
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c
|
||||
github.com/cloudwego/eino-ext/components/tool/mcp v0.0.0-20250328102648-b47e7f1587fa
|
||||
github.com/danielpaulus/go-ios v1.0.161
|
||||
@@ -100,6 +101,8 @@ require (
|
||||
github.com/tadglines/go-pkgs v0.0.0-20210623144937-b983b20f54f9 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
github.com/volcengine/volc-sdk-golang v1.0.23 // indirect
|
||||
github.com/volcengine/volcengine-go-sdk v1.0.185 // indirect
|
||||
github.com/yargevad/filepathx v1.0.0 // indirect
|
||||
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
|
||||
go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 // indirect
|
||||
|
||||
75
go.sum
75
go.sum
@@ -1,14 +1,17 @@
|
||||
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
|
||||
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
|
||||
github.com/airbrake/gobrake v3.6.1+incompatible/go.mod h1:wM4gu3Cn0W0K7GUuVWnlXZU11AGBXMILnrdOU8Kn00o=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
|
||||
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
|
||||
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
|
||||
github.com/bugsnag/bugsnag-go v1.4.0/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8=
|
||||
github.com/bugsnag/panicwrap v1.2.0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
|
||||
github.com/bytedance/mockey v1.2.13 h1:jokWZAm/pUEbD939Rhznz615MKUCZNuvCFQlJ2+ntoo=
|
||||
github.com/bytedance/mockey v1.2.13/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY=
|
||||
github.com/bytedance/mockey v1.2.14 h1:KZaFgPdiUwW+jOWFieo3Lr7INM1P+6adO3hxZhDswY8=
|
||||
github.com/bytedance/mockey v1.2.14/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY=
|
||||
github.com/bytedance/sonic v1.13.2 h1:8/H1FempDZqC4VqjptGo14QQlJx8VdZJegxs6wwfqpQ=
|
||||
github.com/bytedance/sonic v1.13.2/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
@@ -16,14 +19,18 @@ github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCN
|
||||
github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4=
|
||||
github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||
github.com/cloudwego/eino v0.3.16 h1:ASN8zISyoEdjEsPnIw5GazSHtbNY97NDthQ2B69yiZw=
|
||||
github.com/cloudwego/eino v0.3.16/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
|
||||
github.com/cloudwego/eino v0.3.23 h1:fPjskHM85I4PPa+GZPoh76bMCbdRKWVKd52gdvcGHt8=
|
||||
github.com/cloudwego/eino v0.3.23/go.mod h1:wUjz990apdsaOraOXdh6CdhVXq8DJsOvLsVlxNTcNfY=
|
||||
github.com/cloudwego/eino-ext/components/model/ark v0.1.6 h1:k17Z9VIRBL0/t7Ty1drGgY9tVOraM5xuO6gy7Qx7xus=
|
||||
github.com/cloudwego/eino-ext/components/model/ark v0.1.6/go.mod h1:13kQjYGLMgla6xTbejlpqhuk3i5BPlNv5S+1pmknlOo=
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c h1:04WQpGikdQv6fh5wzMYSQhO0SJraV8+xcb9VQ00+HX4=
|
||||
github.com/cloudwego/eino-ext/components/model/openai v0.0.0-20250314110024-9e89ba18146c/go.mod h1:YGP4q3uspj5qhkv3CnvlEPSo0YGeWpvkkTUHHpLExas=
|
||||
github.com/cloudwego/eino-ext/components/tool/mcp v0.0.0-20250328102648-b47e7f1587fa h1:Jrmw8Q9g1WcE+x5t3o0TsEBM8RoMRURJI6P52I/ld74=
|
||||
@@ -46,6 +53,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elazarl/goproxy v0.0.0-20240726154733-8b0c20506380 h1:1NyRx2f4W4WBRyg0Kys0ZbaNmDDzZ2R/C7DTi+bbsJ0=
|
||||
github.com/elazarl/goproxy v0.0.0-20240726154733-8b0c20506380/go.mod h1:thX175TtLTzLj3p7N/Q9IiKZ7NF+p72cvL91emV0hzo=
|
||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
||||
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
|
||||
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
||||
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||
@@ -96,12 +105,27 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
||||
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
||||
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
||||
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
||||
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
||||
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
||||
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
||||
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
@@ -110,6 +134,7 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/goph/emperror v0.17.2 h1:yLapQcmEsO0ipe9p5TaN22djm3OFV/TfM/fcYP0/J18=
|
||||
@@ -160,6 +185,7 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
@@ -229,6 +255,7 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/quic-go/qtls-go1-20 v0.4.1 h1:D33340mCNDAIKBqXuAvexTNMUByrYmFYVfKfDN5nfFs=
|
||||
github.com/quic-go/qtls-go1-20 v0.4.1/go.mod h1:X9Nh97ZL80Z+bX/gUXMbipO6OxdiDi58b/fMC9mAL+k=
|
||||
github.com/quic-go/quic-go v0.40.1-0.20231203135336-87ef8ec48d55 h1:I4N3ZRnkZPbDN935Tg8QDf8fRpHp3bZ0U0/L42jBgNE=
|
||||
@@ -269,6 +296,7 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
@@ -287,6 +315,10 @@ github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6
|
||||
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/volcengine/volc-sdk-golang v1.0.23 h1:anOslb2Qp6ywnsbyq9jqR0ljuO63kg9PY+4OehIk5R8=
|
||||
github.com/volcengine/volc-sdk-golang v1.0.23/go.mod h1:AfG/PZRUkHJ9inETvbjNifTDgut25Wbkm2QoYBTbvyU=
|
||||
github.com/volcengine/volcengine-go-sdk v1.0.185 h1:MIH+YgdWZhO1fNg/vxLohl8ad7hlklaf46wpaTS1TN0=
|
||||
github.com/volcengine/volcengine-go-sdk v1.0.185/go.mod h1:gfEDc1s7SYaGoY+WH2dRrS3qiuDJMkwqyfXWCa7+7oA=
|
||||
github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg=
|
||||
github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE=
|
||||
github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc=
|
||||
@@ -305,12 +337,20 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
|
||||
golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY=
|
||||
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
@@ -318,10 +358,13 @@ golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLL
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
|
||||
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
@@ -353,6 +396,10 @@ golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
|
||||
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
||||
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
@@ -360,16 +407,34 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
||||
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240116215550-a9fa1716bcac h1:nUQEQmH/csSvFECKYRv6HWEyypysidKl2I6Qpsglq/0=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240116215550-a9fa1716bcac/go.mod h1:daQN87bsDqDoe316QbbvX60nMoJQa4r6Ds0ZuoAe5yA=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
||||
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw=
|
||||
google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
||||
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
||||
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
@@ -388,6 +453,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gvisor.dev/gvisor v0.0.0-20240405191320-0878b34101b5 h1:DOUDfNS+CFMM46k18FRF5k/0yz5NhZYMiUQxf4xglIU=
|
||||
gvisor.dev/gvisor v0.0.0-20240405191320-0878b34101b5/go.mod h1:NQHVAzMwvZ+Qe3ElSiHmq9RUm1MdNHpUZ52fiEqvn+0=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
|
||||
@@ -1 +1 @@
|
||||
v5.0.0-beta-2504101120
|
||||
v5.0.0-beta-2504211439
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
@@ -24,7 +25,6 @@ type ActionParser struct {
|
||||
// Parse parses the prediction text and extracts actions
|
||||
func (p *ActionParser) Parse(predictionText string) ([]ParsedAction, error) {
|
||||
// try parsing JSON format, from VLM like openai/gpt-4o
|
||||
var jsonActions []ParsedAction
|
||||
jsonActions, jsonErr := p.parseJSON(predictionText)
|
||||
if jsonErr == nil {
|
||||
return jsonActions, nil
|
||||
@@ -93,17 +93,17 @@ func (p *ActionParser) parseThoughtAction(predictionText string) ([]ParsedAction
|
||||
return nil, errors.New("no action found in the response")
|
||||
}
|
||||
|
||||
actionText := strings.TrimSpace(actionMatch[1])
|
||||
actionsText := strings.TrimSpace(actionMatch[1])
|
||||
|
||||
// parse action type and parameters
|
||||
return p.parseActionText(actionText, thought)
|
||||
return p.parseActionText(actionsText, thought)
|
||||
}
|
||||
|
||||
// parseActionText parses the action text to extract the action type and parameters
|
||||
func (p *ActionParser) parseActionText(actionText, thought string) ([]ParsedAction, error) {
|
||||
func (p *ActionParser) parseActionText(actionsText, thought string) ([]ParsedAction, error) {
|
||||
// remove trailing comments
|
||||
if idx := strings.Index(actionText, "#"); idx > 0 {
|
||||
actionText = strings.TrimSpace(actionText[:idx])
|
||||
if idx := strings.Index(actionsText, "#"); idx > 0 {
|
||||
actionsText = strings.TrimSpace(actionsText[:idx])
|
||||
}
|
||||
|
||||
// supported action types and regexes
|
||||
@@ -119,62 +119,68 @@ func (p *ActionParser) parseActionText(actionText, thought string) ([]ParsedActi
|
||||
"call_user": regexp.MustCompile(`call_user\(\)`),
|
||||
}
|
||||
|
||||
// one or multiple actions, separated by newline
|
||||
// "click(start_box='<bbox>229 379 229 379</bbox>')
|
||||
// "click(start_box='<bbox>229 379 229 379</bbox>')\n\nclick(start_box='<bbox>769 519 769 519</bbox>')"
|
||||
parsedActions := make([]ParsedAction, 0)
|
||||
for actionType, regex := range actionRegexes {
|
||||
matches := regex.FindStringSubmatch(actionText)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
for _, actionText := range strings.Split(actionsText, "\n") {
|
||||
actionText = strings.TrimSpace(actionText)
|
||||
for actionType, regex := range actionRegexes {
|
||||
matches := regex.FindStringSubmatch(actionText)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var action ParsedAction
|
||||
action.ActionType = actionType
|
||||
action.ActionInputs = make(map[string]interface{})
|
||||
action.Thought = thought
|
||||
|
||||
// parse parameters based on action type
|
||||
switch actionType {
|
||||
case ActionTypeClick:
|
||||
if len(matches) > 1 {
|
||||
coord, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize point failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = coord
|
||||
}
|
||||
case ActionTypeDrag:
|
||||
if len(matches) > 2 {
|
||||
// handle start point
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
|
||||
// handle end point
|
||||
endBox, err := p.normalizeCoordinates(matches[2])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize endBox failed: %s", matches[2])
|
||||
}
|
||||
action.ActionInputs["endBox"] = endBox
|
||||
}
|
||||
case ActionTypeType:
|
||||
if len(matches) > 1 {
|
||||
action.ActionInputs["content"] = matches[1]
|
||||
}
|
||||
case ActionTypeScroll:
|
||||
if len(matches) > 2 {
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
action.ActionInputs["direction"] = matches[2]
|
||||
}
|
||||
case ActionTypeWait, ActionTypeFinished, ActionTypeCallUser:
|
||||
// 这些动作没有额外参数
|
||||
}
|
||||
|
||||
parsedActions = append(parsedActions, action)
|
||||
}
|
||||
|
||||
var action ParsedAction
|
||||
action.ActionType = actionType
|
||||
action.ActionInputs = make(map[string]interface{})
|
||||
action.Thought = thought
|
||||
|
||||
// parse parameters based on action type
|
||||
switch actionType {
|
||||
case ActionTypeClick:
|
||||
if len(matches) > 1 {
|
||||
coord, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize point failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = coord
|
||||
}
|
||||
case ActionTypeDrag:
|
||||
if len(matches) > 2 {
|
||||
// handle start point
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
|
||||
// handle end point
|
||||
endBox, err := p.normalizeCoordinates(matches[2])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize endBox failed: %s", matches[2])
|
||||
}
|
||||
action.ActionInputs["endBox"] = endBox
|
||||
}
|
||||
case ActionTypeType:
|
||||
if len(matches) > 1 {
|
||||
action.ActionInputs["content"] = matches[1]
|
||||
}
|
||||
case ActionTypeScroll:
|
||||
if len(matches) > 2 {
|
||||
startBox, err := p.normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
action.ActionInputs["direction"] = matches[2]
|
||||
}
|
||||
case ActionTypeWait, ActionTypeFinished, ActionTypeCallUser:
|
||||
// 这些动作没有额外参数
|
||||
}
|
||||
|
||||
parsedActions = append(parsedActions, action)
|
||||
}
|
||||
|
||||
if len(parsedActions) == 0 {
|
||||
@@ -215,22 +221,47 @@ func (p *ActionParser) normalizeCoordinates(coordStr string) (coords []float64,
|
||||
return nil, fmt.Errorf("empty coordinate string")
|
||||
}
|
||||
|
||||
if !strings.Contains(coordStr, ",") {
|
||||
return nil, fmt.Errorf("invalid coordinate string: %s", coordStr)
|
||||
// handle BBox format: <bbox>x1 y1 x2 y2</bbox>
|
||||
bboxRegex := regexp.MustCompile(`<bbox>(\d+\s+\d+\s+\d+\s+\d+)</bbox>`)
|
||||
bboxMatches := bboxRegex.FindStringSubmatch(coordStr)
|
||||
if len(bboxMatches) > 1 {
|
||||
// Extract space-separated values from inside the bbox tags
|
||||
bboxContent := bboxMatches[1]
|
||||
// Split by whitespace
|
||||
parts := strings.Fields(bboxContent)
|
||||
if len(parts) == 4 {
|
||||
coords = make([]float64, 4)
|
||||
for i, part := range parts {
|
||||
val, e := strconv.ParseFloat(part, 64)
|
||||
if e != nil {
|
||||
return nil, fmt.Errorf("failed to parse coordinate value '%s': %w", part, e)
|
||||
}
|
||||
coords[i] = val
|
||||
}
|
||||
// 将 val 转换为 [x,y] 坐标
|
||||
x := (coords[0] + coords[2]) / 2
|
||||
y := (coords[1] + coords[3]) / 2
|
||||
return []float64{x, y}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// remove possible brackets and split coordinates
|
||||
coordStr = strings.Trim(coordStr, "[]() \t")
|
||||
// handle coordinate string, e.g. "[100, 200]", "(100, 200)"
|
||||
if strings.Contains(coordStr, ",") {
|
||||
// remove possible brackets and split coordinates
|
||||
coordStr = strings.Trim(coordStr, "[]() \t")
|
||||
|
||||
// try parsing JSON array
|
||||
jsonStr := coordStr
|
||||
if !strings.HasPrefix(jsonStr, "[") {
|
||||
jsonStr = "[" + coordStr + "]"
|
||||
// try parsing JSON array
|
||||
jsonStr := coordStr
|
||||
if !strings.HasPrefix(jsonStr, "[") {
|
||||
jsonStr = "[" + coordStr + "]"
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(jsonStr), &coords)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse coordinate string: %w", err)
|
||||
}
|
||||
return coords, nil
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(jsonStr), &coords)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse coordinate string: %w", err)
|
||||
}
|
||||
return coords, nil
|
||||
return nil, fmt.Errorf("invalid coordinate string format: %s", coordStr)
|
||||
}
|
||||
|
||||
484
uixt/ai/planner-ui-tars.go
Normal file
484
uixt/ai/planner-ui-tars.go
Normal file
@@ -0,0 +1,484 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/ark"
|
||||
"github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/httprunner/httprunner/v5/internal/json"
|
||||
"github.com/httprunner/httprunner/v5/uixt/types"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func GetArkModelConfig() (*ark.ChatModelConfig, error) {
|
||||
return &ark.ChatModelConfig{
|
||||
APIKey: os.Getenv("ARK_API_KEY"),
|
||||
Model: os.Getenv("ARK_MODEL_ID"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewUITarsPlanner(ctx context.Context) (*UITarsPlanner, error) {
|
||||
config, err := GetArkModelConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
chatModel, err := ark.NewChatModel(ctx, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &UITarsPlanner{
|
||||
ctx: ctx,
|
||||
config: config,
|
||||
model: chatModel,
|
||||
systemPrompt: uiTarsPlanningPrompt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// https://www.volcengine.com/docs/82379/1536429
|
||||
const uiTarsPlanningPrompt = `
|
||||
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
||||
|
||||
## Output Format
|
||||
` + "```" + `
|
||||
Thought: ...
|
||||
Action: ...
|
||||
` + "```" + `
|
||||
|
||||
## Action Space
|
||||
click(start_box='[x1, y1, x2, y2]')
|
||||
left_double(start_box='[x1, y1, x2, y2]')
|
||||
right_single(start_box='[x1, y1, x2, y2]')
|
||||
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
||||
hotkey(key='')
|
||||
type(content='') #If you want to submit your input, use "\n" at the end of ` + "`content`" + `.
|
||||
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
||||
|
||||
## Note
|
||||
- Use Chinese in ` + "`Thought`" + ` part.
|
||||
- Write a small plan and finally summarize your next action (with its target element) in one sentence in ` + "`Thought`" + ` part.
|
||||
|
||||
## User Instruction
|
||||
`
|
||||
|
||||
type UITarsPlanner struct {
|
||||
ctx context.Context
|
||||
model model.ToolCallingChatModel
|
||||
config *ark.ChatModelConfig
|
||||
systemPrompt string
|
||||
history []*schema.Message // conversation history
|
||||
}
|
||||
|
||||
// Call performs UI planning using Vision Language Model
|
||||
func (p *UITarsPlanner) Call(opts *PlanningOptions) (*PlanningResult, error) {
|
||||
// validate input parameters
|
||||
if err := validateInput(opts); err != nil {
|
||||
return nil, errors.Wrap(err, "validate input parameters failed")
|
||||
}
|
||||
|
||||
// prepare prompt
|
||||
if len(p.history) == 0 {
|
||||
// add system message
|
||||
systemPrompt := uiTarsPlanningPrompt + opts.UserInstruction
|
||||
p.history = []*schema.Message{
|
||||
{
|
||||
Role: schema.System,
|
||||
Content: systemPrompt,
|
||||
},
|
||||
}
|
||||
}
|
||||
// append user image message
|
||||
appendConversationHistory(p.history, opts.Message)
|
||||
|
||||
// call model service, generate response
|
||||
logRequest(p.history)
|
||||
startTime := time.Now()
|
||||
resp, err := p.model.Generate(p.ctx, p.history)
|
||||
log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).
|
||||
Str("model", p.config.Model).Msg("call model service")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("request model service failed: %w", err)
|
||||
}
|
||||
logResponse(resp)
|
||||
|
||||
// parse result
|
||||
result, err := parseResult(resp, opts.Size)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse result failed")
|
||||
}
|
||||
|
||||
// append assistant message
|
||||
appendConversationHistory(p.history, &schema.Message{
|
||||
Role: schema.Assistant,
|
||||
Content: result.ActionSummary,
|
||||
})
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// appendConversationHistory adds a message to the conversation history
|
||||
func appendConversationHistory(history []*schema.Message, msg *schema.Message) {
|
||||
// for user image message:
|
||||
// - keep at most 4 user image messages
|
||||
// - delete the oldest user image message when the limit is reached
|
||||
if msg.Role == schema.User {
|
||||
// get all existing user messages
|
||||
userImgCount := 0
|
||||
firstUserImgIndex := -1
|
||||
|
||||
// calculate the number of user messages and find the index of the first user message
|
||||
for i, item := range history {
|
||||
if item.Role == schema.User {
|
||||
userImgCount++
|
||||
if firstUserImgIndex == -1 {
|
||||
firstUserImgIndex = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if there are already 4 user messages, delete the first one before adding the new message
|
||||
if userImgCount >= 4 && firstUserImgIndex >= 0 {
|
||||
// delete the first user message
|
||||
history = append(
|
||||
history[:firstUserImgIndex],
|
||||
history[firstUserImgIndex+1:]...,
|
||||
)
|
||||
}
|
||||
// add the new user message to the history
|
||||
history = append(history, msg)
|
||||
}
|
||||
|
||||
// for assistant message:
|
||||
// - keep at most the last 10 assistant messages
|
||||
if msg.Role == schema.Assistant {
|
||||
// add the new assistant message to the history
|
||||
history = append(history, msg)
|
||||
|
||||
// if there are more than 10 assistant messages, remove the oldest ones
|
||||
assistantMsgCount := 0
|
||||
for i := len(history) - 1; i >= 0; i-- {
|
||||
if history[i].Role == schema.Assistant {
|
||||
assistantMsgCount++
|
||||
if assistantMsgCount > 10 {
|
||||
history = append(history[:i], history[i+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseResult(msg *schema.Message, size types.Size) (*PlanningResult, error) {
|
||||
// parse Thought/Action format from UI-TARS
|
||||
parseActions, thoughtErr := parseThoughtAction(msg.Content)
|
||||
if thoughtErr != nil {
|
||||
return nil, thoughtErr
|
||||
}
|
||||
|
||||
// process response
|
||||
result, err := processVLMResponse(parseActions, size)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "process VLM response failed")
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Interface("summary", result.ActionSummary).
|
||||
Interface("actions", result.NextActions).
|
||||
Msg("get VLM planning result")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseThoughtAction parses the Thought/Action format response
|
||||
func parseThoughtAction(predictionText string) ([]ParsedAction, error) {
|
||||
thoughtRegex := regexp.MustCompile(`(?is)Thought:(.+?)Action:`)
|
||||
actionRegex := regexp.MustCompile(`(?is)Action:(.+)`)
|
||||
|
||||
// extract Thought part
|
||||
thoughtMatch := thoughtRegex.FindStringSubmatch(predictionText)
|
||||
var thought string
|
||||
if len(thoughtMatch) > 1 {
|
||||
thought = strings.TrimSpace(thoughtMatch[1])
|
||||
}
|
||||
|
||||
// extract Action part, e.g. "click(start_box='(552,454)')"
|
||||
actionMatch := actionRegex.FindStringSubmatch(predictionText)
|
||||
if len(actionMatch) < 2 {
|
||||
return nil, errors.New("no action found in the response")
|
||||
}
|
||||
|
||||
actionsText := strings.TrimSpace(actionMatch[1])
|
||||
|
||||
// parse action type and parameters
|
||||
return parseActionText(actionsText, thought)
|
||||
}
|
||||
|
||||
// parseActionText parses the action text to extract the action type and parameters
|
||||
func parseActionText(actionsText, thought string) ([]ParsedAction, error) {
|
||||
// remove trailing comments
|
||||
if idx := strings.Index(actionsText, "#"); idx > 0 {
|
||||
actionsText = strings.TrimSpace(actionsText[:idx])
|
||||
}
|
||||
|
||||
// supported action types and regexes
|
||||
actionRegexes := map[ActionType]*regexp.Regexp{
|
||||
"click": regexp.MustCompile(`click\(start_box='([^']+)'\)`),
|
||||
"left_double": regexp.MustCompile(`left_double\(start_box='([^']+)'\)`),
|
||||
"right_single": regexp.MustCompile(`right_single\(start_box='([^']+)'\)`),
|
||||
"drag": regexp.MustCompile(`drag\(start_box='([^']+)', end_box='([^']+)'\)`),
|
||||
"type": regexp.MustCompile(`type\(content='([^']+)'\)`),
|
||||
"scroll": regexp.MustCompile(`scroll\(start_box='([^']+)', direction='([^']+)'\)`),
|
||||
"wait": regexp.MustCompile(`wait\(\)`),
|
||||
"finished": regexp.MustCompile(`finished\(content='([^']+)'\)`),
|
||||
"call_user": regexp.MustCompile(`call_user\(\)`),
|
||||
}
|
||||
|
||||
// one or multiple actions, separated by newline
|
||||
// "click(start_box='<bbox>229 379 229 379</bbox>')
|
||||
// "click(start_box='<bbox>229 379 229 379</bbox>')\n\nclick(start_box='<bbox>769 519 769 519</bbox>')"
|
||||
parsedActions := make([]ParsedAction, 0)
|
||||
for _, actionText := range strings.Split(actionsText, "\n") {
|
||||
actionText = strings.TrimSpace(actionText)
|
||||
for actionType, regex := range actionRegexes {
|
||||
matches := regex.FindStringSubmatch(actionText)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var action ParsedAction
|
||||
action.ActionType = actionType
|
||||
action.ActionInputs = make(map[string]interface{})
|
||||
action.Thought = thought
|
||||
|
||||
// parse parameters based on action type
|
||||
switch actionType {
|
||||
case ActionTypeClick:
|
||||
if len(matches) > 1 {
|
||||
coord, err := normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize point failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = coord
|
||||
}
|
||||
case ActionTypeDrag:
|
||||
if len(matches) > 2 {
|
||||
// handle start point
|
||||
startBox, err := normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
|
||||
// handle end point
|
||||
endBox, err := normalizeCoordinates(matches[2])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize endBox failed: %s", matches[2])
|
||||
}
|
||||
action.ActionInputs["endBox"] = endBox
|
||||
}
|
||||
case ActionTypeType:
|
||||
if len(matches) > 1 {
|
||||
action.ActionInputs["content"] = matches[1]
|
||||
}
|
||||
case ActionTypeScroll:
|
||||
if len(matches) > 2 {
|
||||
startBox, err := normalizeCoordinates(matches[1])
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "normalize startBox failed: %s", matches[1])
|
||||
}
|
||||
action.ActionInputs["startBox"] = startBox
|
||||
action.ActionInputs["direction"] = matches[2]
|
||||
}
|
||||
case ActionTypeWait, ActionTypeFinished, ActionTypeCallUser:
|
||||
// 这些动作没有额外参数
|
||||
}
|
||||
|
||||
parsedActions = append(parsedActions, action)
|
||||
}
|
||||
}
|
||||
|
||||
if len(parsedActions) == 0 {
|
||||
return nil, fmt.Errorf("no valid actions returned from VLM")
|
||||
}
|
||||
return parsedActions, nil
|
||||
}
|
||||
|
||||
// normalizeCoordinates normalizes the coordinates based on the factor
|
||||
func normalizeCoordinates(coordStr string) (coords []float64, err error) {
|
||||
// check empty string
|
||||
if coordStr == "" {
|
||||
return nil, fmt.Errorf("empty coordinate string")
|
||||
}
|
||||
|
||||
// handle BBox format: <bbox>x1 y1 x2 y2</bbox>
|
||||
bboxRegex := regexp.MustCompile(`<bbox>(\d+\s+\d+\s+\d+\s+\d+)</bbox>`)
|
||||
bboxMatches := bboxRegex.FindStringSubmatch(coordStr)
|
||||
if len(bboxMatches) > 1 {
|
||||
// Extract space-separated values from inside the bbox tags
|
||||
bboxContent := bboxMatches[1]
|
||||
// Split by whitespace
|
||||
parts := strings.Fields(bboxContent)
|
||||
if len(parts) == 4 {
|
||||
coords = make([]float64, 4)
|
||||
for i, part := range parts {
|
||||
val, e := strconv.ParseFloat(part, 64)
|
||||
if e != nil {
|
||||
return nil, fmt.Errorf("failed to parse coordinate value '%s': %w", part, e)
|
||||
}
|
||||
coords[i] = val
|
||||
}
|
||||
// 将 val 转换为 [x,y] 坐标
|
||||
x := (coords[0] + coords[2]) / 2
|
||||
y := (coords[1] + coords[3]) / 2
|
||||
return []float64{x, y}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// handle coordinate string, e.g. "[100, 200]", "(100, 200)"
|
||||
if strings.Contains(coordStr, ",") {
|
||||
// remove possible brackets and split coordinates
|
||||
coordStr = strings.Trim(coordStr, "[]() \t")
|
||||
|
||||
// try parsing JSON array
|
||||
jsonStr := coordStr
|
||||
if !strings.HasPrefix(jsonStr, "[") {
|
||||
jsonStr = "[" + coordStr + "]"
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(jsonStr), &coords)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse coordinate string: %w", err)
|
||||
}
|
||||
return coords, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid coordinate string format: %s", coordStr)
|
||||
}
|
||||
|
||||
// processVLMResponse processes the VLM response and converts it to PlanningResult
|
||||
func processVLMResponse(actions []ParsedAction, size types.Size) (*PlanningResult, error) {
|
||||
log.Info().Msg("processing VLM response...")
|
||||
|
||||
if len(actions) == 0 {
|
||||
return nil, fmt.Errorf("no actions returned from VLM")
|
||||
}
|
||||
|
||||
// validate and post-process each action
|
||||
for i := range actions {
|
||||
// validate action type
|
||||
switch actions[i].ActionType {
|
||||
case "click":
|
||||
if err := convertCoordinateAction(&actions[i], "startBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
case "drag":
|
||||
if err := convertCoordinateAction(&actions[i], "startBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
if err := convertCoordinateAction(&actions[i], "endBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
case "type":
|
||||
validateTypeContent(&actions[i])
|
||||
case "wait", "finished", "call_user":
|
||||
// these actions do not need extra parameters
|
||||
default:
|
||||
log.Printf("warning: unknown action type: %s, will try to continue processing", actions[i].ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
// extract action summary
|
||||
actionSummary := extractActionSummary(actions)
|
||||
|
||||
return &PlanningResult{
|
||||
NextActions: actions,
|
||||
ActionSummary: actionSummary,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// extractActionSummary extracts the summary from the actions
|
||||
func extractActionSummary(actions []ParsedAction) string {
|
||||
if len(actions) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// use the Thought of the first action as summary
|
||||
if actions[0].Thought != "" {
|
||||
return actions[0].Thought
|
||||
}
|
||||
|
||||
// if no Thought, generate summary from action type
|
||||
action := actions[0]
|
||||
switch action.ActionType {
|
||||
case "click":
|
||||
return "点击操作"
|
||||
case "drag":
|
||||
return "拖拽操作"
|
||||
case "type":
|
||||
content, _ := action.ActionInputs["content"].(string)
|
||||
if len(content) > 20 {
|
||||
content = content[:20] + "..."
|
||||
}
|
||||
return fmt.Sprintf("输入文本: %s", content)
|
||||
case "wait":
|
||||
return "等待操作"
|
||||
case "finished":
|
||||
return "完成操作"
|
||||
case "call_user":
|
||||
return "请求用户协助"
|
||||
default:
|
||||
return fmt.Sprintf("执行 %s 操作", action.ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
func convertCoordinateAction(action *ParsedAction, boxField string, size types.Size) error {
|
||||
// The model generates a 2D coordinate output that represents relative positions.
|
||||
// To convert these values to image-relative coordinates, divide each component by 1000 to obtain values in the range [0,1].
|
||||
// The absolute coordinates required by the Action can be calculated by:
|
||||
// - X absolute = X relative × image width / 1000
|
||||
// - Y absolute = Y relative × image height / 1000
|
||||
|
||||
// get image width and height
|
||||
imageWidth := size.Width
|
||||
imageHeight := size.Height
|
||||
|
||||
box := action.ActionInputs[boxField]
|
||||
coords, ok := box.([]float64)
|
||||
if !ok {
|
||||
log.Error().Interface("inputs", action.ActionInputs).Msg("invalid action inputs")
|
||||
return fmt.Errorf("invalid action inputs")
|
||||
}
|
||||
|
||||
if len(coords) == 2 {
|
||||
coords[0] = math.Round((coords[0]/1000*float64(imageWidth))*10) / 10
|
||||
coords[1] = math.Round((coords[1]/1000*float64(imageHeight))*10) / 10
|
||||
} else if len(coords) == 4 {
|
||||
coords[0] = math.Round((coords[0]/1000*float64(imageWidth))*10) / 10
|
||||
coords[1] = math.Round((coords[1]/1000*float64(imageHeight))*10) / 10
|
||||
coords[2] = math.Round((coords[2]/1000*float64(imageWidth))*10) / 10
|
||||
coords[3] = math.Round((coords[3]/1000*float64(imageHeight))*10) / 10
|
||||
} else {
|
||||
log.Error().Interface("inputs", action.ActionInputs).Msg("invalid action inputs")
|
||||
return fmt.Errorf("invalid action inputs")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateTypeContent 验证输入文本内容
|
||||
func validateTypeContent(action *ParsedAction) {
|
||||
if content, ok := action.ActionInputs["content"]; !ok || content == "" {
|
||||
// default to empty string
|
||||
action.ActionInputs["content"] = ""
|
||||
log.Warn().Msg("type action missing content parameter, set to default")
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"image/draw"
|
||||
_ "image/jpeg"
|
||||
"image/png"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -41,19 +40,21 @@ func NewPlanner(ctx context.Context) (*Planner, error) {
|
||||
}
|
||||
parser := NewActionParser(1000)
|
||||
return &Planner{
|
||||
ctx: ctx,
|
||||
config: config,
|
||||
model: model,
|
||||
parser: parser,
|
||||
ctx: ctx,
|
||||
config: config,
|
||||
model: model,
|
||||
systemPrompt: uiTarsPlanningPrompt,
|
||||
parser: parser,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Planner struct {
|
||||
ctx context.Context
|
||||
model model.ChatModel
|
||||
config *openai.ChatModelConfig
|
||||
parser *ActionParser
|
||||
history []*schema.Message // conversation history
|
||||
ctx context.Context
|
||||
model model.ChatModel
|
||||
config *openai.ChatModelConfig
|
||||
systemPrompt string
|
||||
parser *ActionParser
|
||||
history []*schema.Message // conversation history
|
||||
}
|
||||
|
||||
// Call performs UI planning using Vision Language Model
|
||||
@@ -238,125 +239,6 @@ func (p *Planner) parseResult(msg *schema.Message, size types.Size) (*PlanningRe
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// processVLMResponse processes the VLM response and converts it to PlanningResult
|
||||
func processVLMResponse(actions []ParsedAction, size types.Size) (*PlanningResult, error) {
|
||||
log.Info().Msg("processing VLM response...")
|
||||
|
||||
if len(actions) == 0 {
|
||||
return nil, fmt.Errorf("no actions returned from VLM")
|
||||
}
|
||||
|
||||
// validate and post-process each action
|
||||
for i := range actions {
|
||||
// validate action type
|
||||
switch actions[i].ActionType {
|
||||
case "click":
|
||||
if err := convertCoordinateAction(&actions[i], "startBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
case "drag":
|
||||
if err := convertCoordinateAction(&actions[i], "startBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
if err := convertCoordinateAction(&actions[i], "endBox", size); err != nil {
|
||||
return nil, errors.Wrap(err, "convert coordinate action failed")
|
||||
}
|
||||
case "type":
|
||||
validateTypeContent(&actions[i])
|
||||
case "wait", "finished", "call_user":
|
||||
// these actions do not need extra parameters
|
||||
default:
|
||||
log.Printf("warning: unknown action type: %s, will try to continue processing", actions[i].ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
// extract action summary
|
||||
actionSummary := extractActionSummary(actions)
|
||||
|
||||
return &PlanningResult{
|
||||
NextActions: actions,
|
||||
ActionSummary: actionSummary,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// extractActionSummary extracts the summary from the actions
|
||||
func extractActionSummary(actions []ParsedAction) string {
|
||||
if len(actions) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// use the Thought of the first action as summary
|
||||
if actions[0].Thought != "" {
|
||||
return actions[0].Thought
|
||||
}
|
||||
|
||||
// if no Thought, generate summary from action type
|
||||
action := actions[0]
|
||||
switch action.ActionType {
|
||||
case "click":
|
||||
return "点击操作"
|
||||
case "drag":
|
||||
return "拖拽操作"
|
||||
case "type":
|
||||
content, _ := action.ActionInputs["content"].(string)
|
||||
if len(content) > 20 {
|
||||
content = content[:20] + "..."
|
||||
}
|
||||
return fmt.Sprintf("输入文本: %s", content)
|
||||
case "wait":
|
||||
return "等待操作"
|
||||
case "finished":
|
||||
return "完成操作"
|
||||
case "call_user":
|
||||
return "请求用户协助"
|
||||
default:
|
||||
return fmt.Sprintf("执行 %s 操作", action.ActionType)
|
||||
}
|
||||
}
|
||||
|
||||
func convertCoordinateAction(action *ParsedAction, boxField string, size types.Size) error {
|
||||
// The model generates a 2D coordinate output that represents relative positions.
|
||||
// To convert these values to image-relative coordinates, divide each component by 1000 to obtain values in the range [0,1].
|
||||
// The absolute coordinates required by the Action can be calculated by:
|
||||
// - X absolute = X relative × image width / 1000
|
||||
// - Y absolute = Y relative × image height / 1000
|
||||
|
||||
// get image width and height
|
||||
imageWidth := size.Width
|
||||
imageHeight := size.Height
|
||||
|
||||
box := action.ActionInputs[boxField]
|
||||
coords, ok := box.([]float64)
|
||||
if !ok {
|
||||
log.Error().Interface("inputs", action.ActionInputs).Msg("invalid action inputs")
|
||||
return fmt.Errorf("invalid action inputs")
|
||||
}
|
||||
|
||||
if len(coords) == 2 {
|
||||
coords[0] = math.Round((coords[0]/1000*float64(imageWidth))*10) / 10
|
||||
coords[1] = math.Round((coords[1]/1000*float64(imageHeight))*10) / 10
|
||||
} else if len(coords) == 4 {
|
||||
coords[0] = math.Round((coords[0]/1000*float64(imageWidth))*10) / 10
|
||||
coords[1] = math.Round((coords[1]/1000*float64(imageHeight))*10) / 10
|
||||
coords[2] = math.Round((coords[2]/1000*float64(imageWidth))*10) / 10
|
||||
coords[3] = math.Round((coords[3]/1000*float64(imageHeight))*10) / 10
|
||||
} else {
|
||||
log.Error().Interface("inputs", action.ActionInputs).Msg("invalid action inputs")
|
||||
return fmt.Errorf("invalid action inputs")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateTypeContent 验证输入文本内容
|
||||
func validateTypeContent(action *ParsedAction) {
|
||||
if content, ok := action.ActionInputs["content"]; !ok || content == "" {
|
||||
// default to empty string
|
||||
action.ActionInputs["content"] = ""
|
||||
log.Warn().Msg("type action missing content parameter, set to default")
|
||||
}
|
||||
}
|
||||
|
||||
// SavePositionImg saves an image with position markers
|
||||
func SavePositionImg(params struct {
|
||||
InputImgBase64 string
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
package ai
|
||||
|
||||
const uiTarsPlanningPrompt = `
|
||||
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
||||
|
||||
## Output Format
|
||||
Thought: ...
|
||||
Action: ...
|
||||
|
||||
## Action Space
|
||||
click(start_box='[x1,y1]')
|
||||
long_press(start_box='[x1,y1]', time='')
|
||||
type(content='')
|
||||
drag(start_box='[x1,y1]', end_box='[x2,y2]')
|
||||
press_home()
|
||||
press_back()
|
||||
finished(content='') # Submit the task regardless of whether it succeeds or fails.
|
||||
|
||||
## Note
|
||||
- Use Chinese in Thought part.
|
||||
- Write a small plan and finally summarize your next action (with its target element) in one sentence in Thought part.
|
||||
|
||||
## User Instruction
|
||||
`
|
||||
Reference in New Issue
Block a user