Files
httprunner/uixt/ai/wings_service.go
2025-07-17 14:47:33 +08:00

577 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package ai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
"time"
"github.com/cloudwego/eino/schema"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/httprunner/httprunner/v5/internal/builtin"
)
// WingsService implements ILLMService interface using external Wings API
type WingsService struct {
apiURL string
bizId string
isExternal bool
accessKey string
secretKey string
}
// NewWingsService creates a new Wings service instance
func NewWingsService() ILLMService {
// Check for environment variables for external API access
accessKey := ""
secretKey := ""
isExternal := false
apiURL := "https://vedem-algorithm.bytedance.net/algorithm/StepActionDecision"
// If environment variables are set, use external API with authentication
if ak, sk := os.Getenv("VEDEM_WINGS_AK"), os.Getenv("VEDEM_WINGS_SK"); ak != "" && sk != "" {
accessKey = ak
secretKey = sk
isExternal = true
apiURL = "https://vedem-algorithm.zijieapi.com/algorithm/StepActionDecision"
}
return &WingsService{
apiURL: apiURL,
bizId: "489fdae44de048e0922a32834ea668af",
isExternal: isExternal,
accessKey: accessKey,
secretKey: secretKey,
}
}
// Plan implements the ILLMService.Plan method using Wings API
func (w *WingsService) Plan(ctx context.Context, opts *PlanningOptions) (*PlanningResult, error) {
// Validate input parameters
if err := validatePlanningInput(opts); err != nil {
return nil, errors.Wrap(err, "validate planning parameters failed")
}
// Extract screenshot from message
screenshot, err := w.extractScreenshotFromMessage(opts.Message)
if err != nil {
return nil, errors.Wrap(err, "extract screenshot failed")
}
// Get device info from context (if available)
deviceInfo := w.getDeviceInfoFromContext(ctx, screenshot)
// Prepare Wings API request
apiRequest := WingsActionRequest{
Historys: []interface{}{}, // empty as specified
DeviceInfos: []WingsDeviceInfo{
deviceInfo,
},
StepText: opts.UserInstruction,
BizId: w.bizId,
TextCase: "整体描述:\\n前置条件\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项\\n",
StepType: "automation",
DeviceID: deviceInfo.DeviceID,
Base: WingsBase{
LogID: generateWingsUUID(),
},
}
// Call Wings API
startTime := time.Now()
response, err := w.callWingsAPI(ctx, apiRequest)
elapsed := time.Since(startTime).Milliseconds()
if err != nil {
return &PlanningResult{
Thought: "Wings API call failed",
Error: err.Error(),
ModelName: "wings-api",
}, errors.Wrap(err, "Wings API call failed")
}
// Check API response status
if response.BaseResp.StatusCode != 0 {
err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage)
return &PlanningResult{
Thought: response.ThoughtChain.Thought,
Error: err.Error(),
ModelName: "wings-api",
}, err
}
// Convert Wings API response to tool calls
toolCalls, err := w.convertWingsResponseToToolCalls(response.ActionParams)
if err != nil {
return &PlanningResult{
Thought: response.ThoughtChain.Thought,
Error: err.Error(),
ModelName: "wings-api",
}, errors.Wrap(err, "convert Wings response to tool calls failed")
}
log.Info().
Str("thought", response.ThoughtChain.Thought).
Int("tool_calls_count", len(toolCalls)).
Int64("elapsed_ms", elapsed).
Msg("Wings API planning completed")
return &PlanningResult{
ToolCalls: toolCalls,
Thought: response.ThoughtChain.Thought,
Content: response.ThoughtChain.Summary,
ModelName: "wings-api",
}, nil
}
// Assert implements the ILLMService.Assert method using Wings API
func (w *WingsService) Assert(ctx context.Context, opts *AssertOptions) (*AssertionResult, error) {
// Validate input parameters
if err := validateAssertionInput(opts); err != nil {
return nil, errors.Wrap(err, "validate assertion parameters failed")
}
// Clean screenshot data URL prefix
cleanScreenshot := w.cleanScreenshotDataURL(opts.Screenshot)
// Get device info from context (if available)
deviceInfo := w.getDeviceInfoFromScreenshot(ctx, cleanScreenshot)
// Prepare Wings API request for assertion
apiRequest := WingsActionRequest{
Historys: []interface{}{}, // empty as specified
DeviceInfos: []WingsDeviceInfo{
deviceInfo,
},
StepText: opts.Assertion,
BizId: w.bizId,
TextCase: "整体描述:\\n前置条件\\n获取 1 台设备 A。\\n获取 1 个[万粉创作者]账号a。\\n获取 2 个[普通]账号 b、c。\\n账号 a 和账号 b 互相关注。\\n账号 a 和账号 c 互相关注。\\n账号 a 给账号 b 设置备注为 “11131b”。\\n账号 a 给账号 c 设置备注为 “11131c”。\\n账号 a 创建一个粉丝群 m。\\n 账号 a 修改粉丝群 m 名称为“11131群”。\\n 账号 a 邀请账号 b 加入粉丝群 m。\\n账号 a 邀请账号 c 加入粉丝群 m。\\n账号 a 给群聊 m 发送一条文字消息。\\n设备 A 打开抖音 app。\\n设备 A 登录账号 a。\\n设备 A 退出抖音 app。\\n操作步骤\\n账号a打开抖音app。\\n点击“消息”。\\n点击“11131群”cell。\\n点击“聊天信息页入口”按钮。\\n点击“分享公开群”按钮。\\n点击文字“群口令”。\\n断言屏幕中存在文字“口令复制成功”。\\n停止操作。\\n注意事项\\n",
StepType: "assert", // Different from automation
DeviceID: deviceInfo.DeviceID,
Base: WingsBase{
LogID: generateWingsUUID(),
},
}
log.Info().Interface("apiRequest", apiRequest).Msg("Wings API request")
// Call Wings API
startTime := time.Now()
response, err := w.callWingsAPI(ctx, apiRequest)
elapsed := time.Since(startTime).Milliseconds()
if err != nil {
return &AssertionResult{
Pass: false,
Thought: "Wings API call failed",
ModelName: "wings-api",
}, errors.Wrap(err, "Wings API call failed")
}
// Check API response status
if response.BaseResp.StatusCode != 0 {
err = fmt.Errorf("API returned error: %s", response.BaseResp.StatusMessage)
return &AssertionResult{
Pass: false,
Thought: response.ThoughtChain.Thought,
ModelName: "wings-api",
}, err
}
// Parse assertion result from action_params
passed, assertionThought, err := w.parseAssertionResult(response.ActionParams, response.ThoughtChain)
if err != nil {
return &AssertionResult{
Pass: false,
Thought: response.ThoughtChain.Thought,
ModelName: "wings-api",
}, errors.Wrap(err, "parse assertion result failed")
}
log.Info().
Bool("passed", passed).
Str("thought", assertionThought).
Int64("elapsed_ms", elapsed).
Msg("Wings API assertion completed")
result := &AssertionResult{
Pass: passed,
Thought: assertionThought,
ModelName: "wings-api",
}
// Return error if assertion failed (consistent with original behavior)
if !passed {
return result, errors.New(assertionThought)
}
return result, nil
}
// Query implements the ILLMService.Query method (not supported)
func (w *WingsService) Query(ctx context.Context, opts *QueryOptions) (*QueryResult, error) {
return nil, errors.New("Query operation is not supported by Wings service")
}
// RegisterTools implements the ILLMService.RegisterTools method (no-op for Wings)
func (w *WingsService) RegisterTools(tools []*schema.ToolInfo) error {
// Wings service doesn't need tool registration as it determines actions via API
log.Debug().Int("tools_count", len(tools)).Msg("Wings service ignoring tool registration")
return nil
}
// Wings API data structures
type WingsActionRequest struct {
Historys []interface{} `json:"historys"`
DeviceInfos []WingsDeviceInfo `json:"device_infos"`
StepText string `json:"step_text"`
BizId string `json:"biz_id"`
TextCase string `json:"text_case"`
StepType string `json:"step_type"`
DeviceID string `json:"device_id"`
Base WingsBase `json:"Base"`
}
type WingsDeviceInfo struct {
DeviceID string `json:"device_id"`
NowImage string `json:"now_image"`
PreImage string `json:"pre_image"`
NowImageUrl string `json:"now_image_url"`
PreImageUrl string `json:"pre_image_url"`
NowLayoutJSON string `json:"now_layout_json"`
OperationSystem string `json:"operation_system"`
}
type WingsBase struct {
LogID string `json:"LogID"`
}
type WingsActionResponse struct {
StepType string `json:"step_type"`
ActionParams string `json:"action_params"`
ThoughtChain WingsThoughtChain `json:"thought_chain"`
BaseResp WingsBaseResp `json:"BaseResp"`
}
type WingsThoughtChain struct {
Observation string `json:"observation"`
Thought string `json:"thought"`
Summary string `json:"summary"`
}
type WingsBaseResp struct {
StatusCode int `json:"StatusCode"`
StatusMessage string `json:"StatusMessage"`
Extra WingsExtra `json:"Extra"`
}
type WingsExtra struct {
CostTime string `json:"cost_time"`
LogID string `json:"_log_id"`
}
// Action parameter structures
type WingsActionParams struct {
Type string `json:"Type"`
Params interface{} `json:"Params"`
Bounds [][]float64 `json:"Bounds"`
UiDict interface{} `json:"UiDict"`
UiIndex string `json:"UiIndex"`
}
type WingsTapParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
}
type WingsDoubleTapParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
}
type WingsLongPressParams struct {
X float64 `json:"x"`
Y float64 `json:"y"`
Duration float64 `json:"duration"`
}
type WingsSwipeParams struct {
FromX float64 `json:"from_x"`
FromY float64 `json:"from_y"`
ToX float64 `json:"to_x"`
ToY float64 `json:"to_y"`
Duration float64 `json:"duration"`
}
type WingsTextParams struct {
Text string `json:"text"`
}
// Helper methods
// generateWingsUUID generates a random UUID for LogID
func generateWingsUUID() string {
return uuid.New().String()
}
// extractScreenshotFromMessage extracts base64 screenshot from message
func (w *WingsService) extractScreenshotFromMessage(message *schema.Message) (string, error) {
if message == nil || len(message.MultiContent) == 0 {
return "", errors.New("no message content found")
}
for _, content := range message.MultiContent {
if content.Type == schema.ChatMessagePartTypeImageURL && content.ImageURL != nil {
// Extract base64 data from data URL
screenshot := content.ImageURL.URL
if strings.HasPrefix(screenshot, "data:image/") {
// Remove data URL prefix
parts := strings.Split(screenshot, ",")
if len(parts) == 2 {
return parts[1], nil
}
}
return screenshot, nil
}
}
return "", errors.New("no image found in message")
}
// getDeviceInfoFromContext gets device info from context with fallback
func (w *WingsService) getDeviceInfoFromContext(ctx context.Context, screenshot string) WingsDeviceInfo {
// Try to get device info from context
if deviceID, ok := ctx.Value("device_id").(string); ok {
platformType := "android"
if platform, ok := ctx.Value("platform_type").(string); ok {
platformType = platform
}
return WingsDeviceInfo{
DeviceID: deviceID,
NowImage: screenshot,
PreImage: screenshot,
NowLayoutJSON: "",
OperationSystem: platformType,
}
}
// Fallback to default device info
return WingsDeviceInfo{
DeviceID: "default-device",
NowImage: screenshot,
PreImage: screenshot,
NowLayoutJSON: "",
OperationSystem: "android",
}
}
// getDeviceInfoFromScreenshot gets device info from screenshot (for Assert)
func (w *WingsService) getDeviceInfoFromScreenshot(ctx context.Context, screenshot string) WingsDeviceInfo {
return w.getDeviceInfoFromContext(ctx, screenshot)
}
// cleanScreenshotDataURL removes data URL prefix from screenshot string
func (w *WingsService) cleanScreenshotDataURL(screenshot string) string {
if strings.HasPrefix(screenshot, "data:image/") {
// Remove data URL prefix like "data:image/jpeg;base64,"
parts := strings.Split(screenshot, ",")
if len(parts) == 2 {
return parts[1]
}
}
return screenshot
}
// callWingsAPI calls the external Wings API
func (w *WingsService) callWingsAPI(ctx context.Context, request WingsActionRequest) (*WingsActionResponse, error) {
// Marshal request to JSON
requestBody, err := json.Marshal(request)
if err != nil {
return nil, errors.Wrap(err, "marshal request failed")
}
// Create HTTP request
httpReq, err := http.NewRequestWithContext(ctx, "POST", w.apiURL, bytes.NewBuffer(requestBody))
if err != nil {
return nil, errors.Wrap(err, "create HTTP request failed")
}
// Set headers
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "application/json")
// Add authentication headers if using external API
if w.isExternal {
signToken := "UNSIGNED-PAYLOAD"
token := builtin.Sign("auth-v2", w.accessKey, w.secretKey, []byte(signToken))
httpReq.Header.Add("Agw-Auth", token)
httpReq.Header.Add("Agw-Auth-Content", signToken)
httpReq.Header.Add("Content-Type", "application/json")
}
// Execute HTTP request
client := &http.Client{
Timeout: 60 * time.Second,
}
resp, err := client.Do(httpReq)
if err != nil {
return nil, errors.Wrap(err, "HTTP request failed")
}
defer resp.Body.Close()
// Read response body
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, errors.Wrap(err, "read response body failed")
}
// Check HTTP status
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP request failed with status %d: %s", resp.StatusCode, string(responseBody))
}
// Parse response
var apiResponse WingsActionResponse
if err := json.Unmarshal(responseBody, &apiResponse); err != nil {
return nil, errors.Wrap(err, "unmarshal response failed")
}
return &apiResponse, nil
}
// convertWingsResponseToToolCalls converts Wings API response to tool calls using generic approach
func (w *WingsService) convertWingsResponseToToolCalls(actionParamsStr string) ([]schema.ToolCall, error) {
if actionParamsStr == "" {
return []schema.ToolCall{}, nil
}
var actionParams WingsActionParams
if err := json.Unmarshal([]byte(actionParamsStr), &actionParams); err != nil {
return nil, fmt.Errorf("parse action params failed: %w", err)
}
// Use Wings API Type as tool name directly
toolName := actionParams.Type
params := actionParams.Params
// Create tool call using generic method
toolCall, err := w.createToolCall(toolName, params)
if err != nil {
return nil, fmt.Errorf("create tool call for %s failed: %w", toolName, err)
}
return []schema.ToolCall{toolCall}, nil
}
// createToolCall creates a generic tool call with given name and arguments
func (w *WingsService) createToolCall(toolName string, params interface{}) (schema.ToolCall, error) {
// Convert params to arguments map
arguments := make(map[string]interface{})
if params != nil {
// Try to convert params to map[string]interface{}
switch p := params.(type) {
case map[string]interface{}:
arguments = p
case string:
// If params is a string, try to unmarshal it as JSON
if err := json.Unmarshal([]byte(p), &arguments); err != nil {
// If not JSON, treat as simple text parameter
arguments["text"] = p
}
default:
// For other types, try to marshal and unmarshal
paramsBytes, err := json.Marshal(params)
if err != nil {
return schema.ToolCall{}, fmt.Errorf("marshal params failed: %w", err)
}
if err := json.Unmarshal(paramsBytes, &arguments); err != nil {
// If unmarshal fails, create a generic params field
arguments["params"] = params
}
}
}
// Convert arguments to JSON string
argumentsJSON, err := json.Marshal(arguments)
if err != nil {
return schema.ToolCall{}, fmt.Errorf("marshal arguments failed: %w", err)
}
// Generate unique tool call ID
toolCallID := fmt.Sprintf("call_%s", uuid.New().String()[:8])
return schema.ToolCall{
ID: toolCallID,
Function: schema.FunctionCall{
Name: toolName,
Arguments: string(argumentsJSON),
},
}, nil
}
// parseAssertionResult parses the assertion result from action_params
func (w *WingsService) parseAssertionResult(actionParamsStr string, thoughtChain WingsThoughtChain) (bool, string, error) {
// Parse action parameters JSON
var actionParams map[string]interface{}
if err := json.Unmarshal([]byte(actionParamsStr), &actionParams); err != nil {
return false, "", errors.Wrap(err, "parse action params failed")
}
// Extract action_type from the parsed JSON
actionType, exists := actionParams["action_type"]
if !exists {
// If no action_type field, try to parse nested structure
if totalRes, ok := actionParams["total_res"].([]interface{}); ok && len(totalRes) > 0 {
if firstRes, ok := totalRes[0].(map[string]interface{}); ok {
if actionParamsNested, ok := firstRes["action_params"].(map[string]interface{}); ok {
if nestedActionType, ok := actionParamsNested["action_type"]; ok {
actionType = nestedActionType
}
}
}
}
}
// Default to failed if no action_type found
if actionType == nil {
return false, thoughtChain.Summary, nil
}
// Convert action_type to string and check result
actionTypeStr, ok := actionType.(string)
if !ok {
return false, thoughtChain.Summary, nil
}
// Determine assertion result based on action_type
passed := strings.ToLower(actionTypeStr) == "passed"
// Use thoughtChain.Summary as the assertion thought
assertionThought := thoughtChain.Summary
if assertionThought == "" {
assertionThought = thoughtChain.Thought
}
if assertionThought == "" {
assertionThought = thoughtChain.Observation
}
log.Info().
Str("action_type", actionTypeStr).
Bool("passed", passed).
Str("thought", assertionThought).
Msg("parsed Wings assertion result")
return passed, assertionThought, nil
}