fix: chat with screenshot

This commit is contained in:
lilong.129
2025-05-21 22:35:16 +08:00
parent d58bbaeb5f
commit bb592548b4
5 changed files with 51 additions and 16 deletions

View File

@@ -1 +1 @@
v5.0.0-beta-2505211805
v5.0.0-beta-2505212235

View File

@@ -16,6 +16,7 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/httprunner/httprunner/v5/uixt/ai"
"github.com/httprunner/httprunner/v5/uixt/option"
"github.com/mark3labs/mcp-go/mcp"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"golang.org/x/term"
@@ -174,24 +175,46 @@ func (c *Chat) handleToolCalls(ctx context.Context, toolCalls []schema.ToolCall)
continue
}
// Format tool result
resultStr := ""
// Format tool result, append message to history
renderStr := ""
if result != nil && len(result.Content) > 0 {
for _, item := range result.Content {
resultStr += fmt.Sprintf("%v\n", item)
if contentMap, ok := item.(mcp.TextContent); ok {
renderStr += contentMap.Text + "\n"
toolMsg := &schema.Message{
Role: schema.Tool,
ToolCallID: toolCall.ID,
Content: contentMap.Text,
}
c.planner.History().Append(toolMsg)
} else if contentMap, ok := item.(mcp.ImageContent); ok {
renderStr += "<data:image/base64...>\n" // base64-encoded image data
toolMsg := &schema.Message{
Role: schema.Tool,
ToolCallID: toolCall.ID,
MultiContent: []schema.ChatMessagePart{
{
Type: schema.ChatMessagePartTypeImageURL,
ImageURL: &schema.ChatMessageImageURL{
URL: contentMap.Data,
MIMEType: contentMap.MIMEType,
},
},
},
}
c.planner.History().Append(toolMsg)
}
}
} else {
resultStr = fmt.Sprintf("%+v", result)
renderStr = fmt.Sprintf("%+v", result)
toolMsg := &schema.Message{
Role: schema.Tool,
ToolCallID: toolCall.ID,
Content: renderStr,
}
c.planner.History().Append(toolMsg)
}
c.renderContent("Tool Result", resultStr)
// Add tool result to history
toolMsg := &schema.Message{
Role: schema.Tool,
Content: resultStr,
ToolCallID: toolCall.ID,
}
c.planner.History().Append(toolMsg)
c.renderContent("Tool Result", renderStr)
}
return nil
}

View File

@@ -95,14 +95,12 @@ func GetModelConfig(modelType option.LLMServiceType) (*ModelConfig, error) {
"env %s missed", EnvModelName)
}
maxTokens := 4096
temperature := float32(0.7)
modelConfig := &openai.ChatModelConfig{
BaseURL: openaiBaseURL,
APIKey: openaiAPIKey,
Model: modelName,
Timeout: defaultTimeout,
MaxTokens: &maxTokens,
Temperature: &temperature,
}

View File

@@ -100,6 +100,15 @@ func logRequest(messages ConversationHistory) {
func logResponse(message *schema.Message) {
logger := log.Info().Str("role", string(message.Role)).
Str("content", message.Content)
var toolCalls []string
if len(message.ToolCalls) > 0 {
for _, toolCall := range message.ToolCalls {
toolCalls = append(toolCalls, toolCall.Function.Name)
}
logger = logger.Strs("tool_calls", toolCalls)
}
if message.ResponseMeta != nil {
logger = logger.Str("finish_reason", message.ResponseMeta.FinishReason)
// Log usage statistics

View File

@@ -292,6 +292,7 @@ func saveScreenShot(raw *bytes.Buffer, screenshotPath string) error {
}
func compressImageBuffer(raw *bytes.Buffer) (compressed *bytes.Buffer, err error) {
rawSize := raw.Len()
// decode image from buffer
img, format, err := image.Decode(raw)
if err != nil {
@@ -312,6 +313,10 @@ func compressImageBuffer(raw *bytes.Buffer) (compressed *bytes.Buffer, err error
return nil, fmt.Errorf("unsupported image format: %s", format)
}
compressedSize := buf.Len()
log.Debug().Int("rawSize", rawSize).Int("compressedSize", compressedSize).
Msg("compress image buffer")
// return compressed image buffer
return &buf, nil
}