refactor: add AIAction

This commit is contained in:
lilong.129
2025-03-22 12:08:42 +08:00
parent f46fcfb456
commit 5ebfca7f62
4 changed files with 28 additions and 36 deletions

View File

@@ -21,24 +21,8 @@ func (dExt *XTDriver) StartToGoal(text string, opts ...option.ActionOption) erro
for {
attempt++
log.Info().Int("attempt", attempt).Msg("planning attempt")
// plan next action
result, err := dExt.PlanNextAction(text, opts...)
if err != nil {
return errors.Wrap(err, "failed to get next action from planner")
}
// do actions
for _, action := range result.NextActions {
switch action.ActionType {
case ai.ActionTypeClick:
point := action.ActionInputs["startBox"].([]float64)
if err := dExt.TapAbsXY(point[0], point[1], opts...); err != nil {
return err
}
case ai.ActionTypeFinished:
return nil
}
if err := dExt.AIAction(text, opts...); err != nil {
return err
}
if options.MaxRetryTimes > 1 && attempt >= options.MaxRetryTimes {
@@ -47,6 +31,30 @@ func (dExt *XTDriver) StartToGoal(text string, opts ...option.ActionOption) erro
}
}
func (dExt *XTDriver) AIAction(text string, opts ...option.ActionOption) error {
// plan next action
result, err := dExt.PlanNextAction(text, opts...)
if err != nil {
return err
}
// do actions
for _, action := range result.NextActions {
switch action.ActionType {
case ai.ActionTypeClick:
point := action.ActionInputs["startBox"].([]float64)
if err := dExt.TapAbsXY(point[0], point[1], opts...); err != nil {
return err
}
case ai.ActionTypeFinished:
log.Info().Msg("ai action done")
return nil
}
}
return nil
}
func (dExt *XTDriver) PlanNextAction(text string, opts ...option.ActionOption) (*ai.PlanningResult, error) {
if dExt.LLMService == nil {
return nil, errors.New("LLM service is not initialized")

View File

@@ -8,22 +8,6 @@ import (
"github.com/rs/zerolog/log"
)
func (dExt *XTDriver) TapByLLM(text string, opts ...option.ActionOption) error {
text = "[click] " + text
result, err := dExt.PlanNextAction(text, opts...)
if err != nil {
return err
}
action := result.NextActions[0]
if action.ActionType != ai.ActionTypeClick {
return fmt.Errorf("expected click action, got: %s", action.ActionType)
}
point := action.ActionInputs["startBox"].([]float64)
return dExt.TapAbsXY(point[0], point[1], opts...)
}
func (dExt *XTDriver) TapByOCR(text string, opts ...option.ActionOption) error {
actionOptions := option.NewActionOptions(opts...)
if actionOptions.ScreenShotFileName == "" {

View File

@@ -125,7 +125,7 @@ func TestDriverExt_TapByOCR(t *testing.T) {
func TestDriverExt_TapByLLM(t *testing.T) {
driver := setupDriverExt(t)
err := driver.TapByLLM("点击第一个帖子的作者头像")
err := driver.AIAction("点击第一个帖子的作者头像")
assert.Nil(t, err)
}