From 2ad5c4f6dbab565c974f76292d19b96f81f8962b Mon Sep 17 00:00:00 2001 From: "lilong.129" Date: Sun, 23 Mar 2025 10:06:50 +0800 Subject: [PATCH] fix: load env --- internal/version/VERSION | 2 +- uixt/ai/env.go | 23 +++++++++++++++++++++++ uixt/ai/parser.go | 2 +- uixt/ai/planner.go | 16 +++++++++++++--- uixt/ai/prompt-ui-tars.go | 6 +++--- 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/internal/version/VERSION b/internal/version/VERSION index 4b033839..4f1a9e68 100644 --- a/internal/version/VERSION +++ b/internal/version/VERSION @@ -1 +1 @@ -v5.0.0-beta-2503221523 +v5.0.0-beta-2503231006 diff --git a/uixt/ai/env.go b/uixt/ai/env.go index 2240fd6e..56d25abf 100644 --- a/uixt/ai/env.go +++ b/uixt/ai/env.go @@ -58,6 +58,7 @@ func loadEnv() { Str("path", envFile).Msg("overload env file failed") } log.Info().Str("path", envFile).Msg("overload env success") + return } // reached root directory @@ -154,6 +155,12 @@ func (c *CustomTransport) RoundTrip(req *http.Request) (*http.Response, error) { return c.Transport.RoundTrip(req) } +type OutputFormat struct { + Thought string `json:"thought"` + Action string `json:"action"` + Error string `json:"error,omitempty"` +} + // GetModelConfig get OpenAI config func GetModelConfig() (*openai.ChatModelConfig, error) { loadEnv() @@ -169,6 +176,11 @@ func GetModelConfig() (*openai.ChatModelConfig, error) { } } + // outputFormatSchema, err := openapi3gen.NewSchemaRefForValue(&OutputFormat{}, nil) + // if err != nil { + // log.Fatal().Err(err).Msg("NewSchemaRefForValue failed") + // } + config := &openai.ChatModelConfig{ HTTPClient: &http.Client{ Timeout: defaultTimeout, @@ -177,6 +189,17 @@ func GetModelConfig() (*openai.ChatModelConfig, error) { Headers: envConfig.Headers, }, }, + // TODO: set structured response format + // https://github.com/cloudwego/eino-ext/blob/main/components/model/openai/examples/structured/structured.go + // ResponseFormat: &openai2.ChatCompletionResponseFormat{ + // Type: openai2.ChatCompletionResponseFormatTypeJSONSchema, + // JSONSchema: &openai2.ChatCompletionResponseFormatJSONSchema{ + // Name: "thought_and_action", + // Description: "data that describes planning thought and action", + // Schema: outputFormatSchema.Value, + // Strict: false, + // }, + // }, } if baseURL := GetEnvConfig(EnvOpenAIBaseURL); baseURL != "" { diff --git a/uixt/ai/parser.go b/uixt/ai/parser.go index e9ef3b1f..e7bff116 100644 --- a/uixt/ai/parser.go +++ b/uixt/ai/parser.go @@ -23,7 +23,7 @@ type ActionParser struct { // Parse parses the prediction text and extracts actions func (p *ActionParser) Parse(predictionText string) ([]ParsedAction, error) { - // try parsing JSON format, from VLM like GPT-4o + // try parsing JSON format, from VLM like openai/gpt-4o var jsonActions []ParsedAction jsonActions, jsonErr := p.parseJSON(predictionText) if jsonErr == nil { diff --git a/uixt/ai/planner.go b/uixt/ai/planner.go index 29a2ab10..d12dacb4 100644 --- a/uixt/ai/planner.go +++ b/uixt/ai/planner.go @@ -42,6 +42,7 @@ func NewPlanner(ctx context.Context) (*Planner, error) { parser := NewActionParser(1000) return &Planner{ ctx: ctx, + config: config, model: model, parser: parser, }, nil @@ -50,6 +51,7 @@ func NewPlanner(ctx context.Context) (*Planner, error) { type Planner struct { ctx context.Context model model.ChatModel + config *openai.ChatModelConfig parser *ActionParser history []*schema.Message // conversation history } @@ -79,7 +81,8 @@ func (p *Planner) Call(opts *PlanningOptions) (*PlanningResult, error) { logRequest(p.history) startTime := time.Now() resp, err := p.model.Generate(p.ctx, p.history) - log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()).Msg("call model service") + log.Info().Float64("elapsed(s)", time.Since(startTime).Seconds()). + Str("model", p.config.Model).Msg("call model service") if err != nil { return nil, fmt.Errorf("request model service failed: %w", err) } @@ -153,8 +156,15 @@ func logRequest(messages []*schema.Message) { } func logResponse(resp *schema.Message) { - log.Info().Str("role", string(resp.Role)). - Str("content", resp.Content).Msg("log response message") + logger := log.Info().Str("role", string(resp.Role)). + Str("content", resp.Content) + if resp.ResponseMeta != nil { + logger = logger.Interface("response_meta", resp.ResponseMeta) + } + if resp.Extra != nil { + logger = logger.Interface("extra", resp.Extra) + } + logger.Msg("log response message") } // appendConversationHistory adds a message to the conversation history diff --git a/uixt/ai/prompt-ui-tars.go b/uixt/ai/prompt-ui-tars.go index 76af2def..4542a46b 100644 --- a/uixt/ai/prompt-ui-tars.go +++ b/uixt/ai/prompt-ui-tars.go @@ -8,10 +8,10 @@ Thought: ... Action: ... ## Action Space -click(start_box='<|box_start|>(x1,y1)<|box_end|>') -long_press(start_box='<|box_start|>(x1,y1)<|box_end|>', time='') +click(start_box='[x1,y1]') +long_press(start_box='[x1,y1]', time='') type(content='') -drag(start_box='<|box_start|>(x1,y1)<|box_end|>', end_box='<|box_start|>(x3,y3)<|box_end|>') +drag(start_box='[x1,y1]', end_box='[x2,y2]') press_home() press_back() finished(content='') # Submit the task regardless of whether it succeeds or fails.