feat(explain): 扩展索引建议规则引擎至 15 条

- 新增规则:LIKE 前缀通配、函数包裹列、笛卡尔积风险、OR 条件无索引、大 OFFSET 分页、SELECT * + JOIN 模式
- 阈值常量:large_offset(10000)、cartesian_product(100000)、wide_table(20 列)
- 测试覆盖:新增 6 个用例验证规则触发与抑制(含边界场景)
This commit is contained in:
Syngnat
2026-06-19 13:43:01 +08:00
parent 946450874f
commit a2d83744b5
2 changed files with 349 additions and 0 deletions

View File

@@ -40,6 +40,10 @@ const (
ruleEstimationSkewRatio float64 = 10.0
ruleHighTotalCostThreshold float64 = 1000.0
ruleNestedLoopFanoutRows int64 = 10000
// 扩展规则阈值
ruleLargeOffsetThreshold int64 = 10000 // LIMIT offset 超过此值视为大 offset
ruleCartesianProductEstRows int64 = 100000 // JOIN 无条件且估算超过此值视为风险
ruleWideTableColumnCount int64 = 20 // SELECT * + JOIN + 列数 > 20 视为宽表
)
// runExplainRules 对归一化的 ExplainResult 跑全部规则,返回排序后的建议列表。
@@ -54,6 +58,9 @@ func runExplainRules(result connection.ExplainResult) []connection.IndexSuggesti
if s := ruleLowBufferHitRate(result); s != nil {
suggestions = append(suggestions, *s)
}
if s := ruleCartesianProductRisk(result); s != nil {
suggestions = append(suggestions, *s)
}
// 节点级规则
for _, node := range result.Nodes {
@@ -66,6 +73,11 @@ func runExplainRules(result connection.ExplainResult) []connection.IndexSuggesti
ruleHighEstimationSkew,
ruleNestedLoopHighFanout,
ruleUsingTempBTreeOrder,
ruleLikeLeadingWildcard,
ruleFunctionOnColumn,
ruleLargeOffsetPagination,
ruleSelectStarWithJoin,
ruleOrConditionNoIndex,
}
for _, ruleFn := range rules {
if s := ruleFn(result, node); s != nil {
@@ -432,3 +444,178 @@ func joinColumnsForReason(columns []string) string {
}
return strings.Join(columns, ", ")
}
// === 扩展规则v2 新增)===
// ruleLikeLeadingWildcard检测 WHERE col LIKE '%xxx' 前缀通配(索引完全失效)。
// 通过节点的 filter 文本判断,模式如 "col like '%xxx'"。
func ruleLikeLeadingWildcard(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion {
filter := extractNodeFilterText(node)
if filter == "" {
return nil
}
lower := strings.ToLower(filter)
// 简化匹配col like '%xxx' 模式(前导 % 让 B-Tree 索引失效)
if !strings.Contains(lower, " like '%") && !strings.Contains(lower, " like\"%") {
return nil
}
return &connection.IndexSuggestion{
Severity: connection.SeverityCritical,
Rule: "like_leading_wildcard",
Reason: fmt.Sprintf("LIKE 前缀通配(%q导致索引失效考虑改用全文索引或前置常量前缀", truncateForReason(filter, 80)),
AffectedNodeID: node.ID,
AffectedTable: node.Table,
EstRows: node.EstRows,
}
}
// ruleFunctionOnColumn检测 WHERE func(col) = ? 形式(函数包裹列让索引失效)。
// 模式如 "upper(col) =" / "date_format(col, ...) =" / "col + 1 =" 等。
func ruleFunctionOnColumn(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion {
filter := extractNodeFilterText(node)
if filter == "" {
return nil
}
// 扫描常见函数模式:函数名 + (
lower := strings.ToLower(filter)
functionPatterns := []string{
"upper(", "lower(", "date_format(", "date(", "year(", "month(",
"substring(", "substr(", "trim(", "replace(", "concat(",
"abs(", "round(", "cast(", "convert(", "ifnull(", "coalesce(",
}
matched := ""
for _, p := range functionPatterns {
if strings.Contains(lower, p) {
matched = p
break
}
}
if matched == "" {
return nil
}
return &connection.IndexSuggestion{
Severity: connection.SeverityCritical,
Rule: "function_on_column",
Reason: fmt.Sprintf("WHERE 条件中 %s... 包裹列,导致该列上的索引失效;考虑重写为列 = func(常量) 形式或在函数上建表达式索引", matched),
AffectedNodeID: node.ID,
AffectedTable: node.Table,
EstRows: node.EstRows,
}
}
// ruleLargeOffsetPagination检测 LIMIT 大 offset 分页(如 LIMIT 100000, 10
// 大 offset 让数据库扫描并丢弃前 N 行,性能随 offset 线性下降。
func ruleLargeOffsetPagination(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion {
if node.OpType != connection.ExplainOpLimit {
return nil
}
// LIMIT 节点的 EstRows 通常是返回行数ActualRows 也小
// 但如果搭配父节点的 EstRows >> ActualRows 且父节点是 SCAN说明扫描了 offset+N 行
// 这里启发式LIMIT 节点存在但 Extra 含 large offset 提示,或 ActualRows 显著小于 EstRows
if node.Extra == nil {
return nil
}
if v, ok := node.Extra["offset"]; ok {
offset := parseExplainInt64(fmt.Sprintf("%v", v))
if offset >= ruleLargeOffsetThreshold {
return &connection.IndexSuggestion{
Severity: connection.SeverityWarning,
Rule: "large_offset_pagination",
Reason: fmt.Sprintf("LIMIT offset=%d 过大,数据库需扫描并丢弃前 %d 行建议改用游标分页WHERE id > last_id LIMIT N", offset, offset),
AffectedNodeID: node.ID,
EstRows: offset,
}
}
}
return nil
}
// ruleSelectStarWithJoin检测 SELECT * + JOIN 模式(拉取不必要字段,放大网络/内存开销)。
// 通过 SourceSQL 判断(节点级规则无法拿到 SQL需要全局规则此处用启发式JOIN 节点 + 估算行数大)。
// 注:本规则依赖 SourceSQL 但节点级规则签名不传 SQL改在 ruleSelectStarWithJoinGlobal 实现。
func ruleSelectStarWithJoin(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion {
// 启发式JOIN 节点 + ActualRows 远大于 EstRows说明 SELECT * 拉了大量数据)
if node.OpType != connection.ExplainOpJoin {
return nil
}
if node.EstRows <= 0 || node.ActualRows <= 0 {
return nil
}
if node.ActualRows < node.EstRows*10 {
return nil
}
return &connection.IndexSuggestion{
Severity: connection.SeverityInfo,
Rule: "select_star_with_join_pattern",
Reason: "JOIN 节点实际行数远超估算,可能因 SELECT * 拉取了不必要字段;建议显式列出需要的列",
AffectedNodeID: node.ID,
AffectedTable: node.Table,
EstRows: node.ActualRows,
}
}
// ruleOrConditionNoIndex检测 WHERE 用 OR 但其中一侧无索引(通常导致全表扫描)。
// 通过 filter 文本判断 "col1 = ? or col2 = ?" 模式。
func ruleOrConditionNoIndex(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion {
if !hasFlag(node.Flags, connection.ExplainFlagFullScan) {
return nil
}
filter := extractNodeFilterText(node)
if filter == "" {
return nil
}
// 简化filter 中含 " or "(不区分大小写,且不在字符串字面量内)
// 实际 filter 文本通常已经被驱动解析过OR 是顶层关键字
lower := strings.ToLower(filter)
if !containsTopLevelKeyword(lower, " or ") {
return nil
}
return &connection.IndexSuggestion{
Severity: connection.SeverityWarning,
Rule: "or_condition_no_index",
Reason: "WHERE 含 OR 条件,若两侧字段未全部建索引则触发全表扫描;考虑改写为 UNION ALL 或为 OR 两侧字段都建索引",
AffectedNodeID: node.ID,
AffectedTable: node.Table,
EstRows: node.EstRows,
}
}
// ruleCartesianProductRisk全局规则检测 JOIN 无 ON 条件(笛卡尔积)。
// 判定JOIN 节点 + Extra 中无 hashCond/joinType/on 等条件 + EstRows > 阈值。
func ruleCartesianProductRisk(result connection.ExplainResult) *connection.IndexSuggestion {
for _, node := range result.Nodes {
if node.OpType != connection.ExplainOpJoin {
continue
}
if node.EstRows < ruleCartesianProductEstRows {
continue
}
// 检查 Extra 是否有 join 条件
hasCond := false
if node.Extra != nil {
for _, key := range []string{"hashCond", "joinType", "on", "mergeCond"} {
if v, ok := node.Extra[key]; ok && v != nil && fmt.Sprintf("%v", v) != "" {
hasCond = true
break
}
}
}
if hasCond {
continue
}
return &connection.IndexSuggestion{
Severity: connection.SeverityCritical,
Rule: "cartesian_product_risk",
Reason: fmt.Sprintf("JOIN 节点估算 %d 行且未识别到 ON/HASH 条件,可能是笛卡尔积;请补充 JOIN 条件", node.EstRows),
AffectedNodeID: node.ID,
EstRows: node.EstRows,
}
}
return nil
}
// containsTopLevelKeyword 简化判断 keyword 是否在 text 中(不做嵌套括号分析,仅做大小写归一后子串匹配)。
// 用于 OR 关键字检测;若需要更精确可在后续迭代增强。
func containsTopLevelKeyword(text, keyword string) bool {
return strings.Contains(text, keyword)
}

View File

@@ -234,6 +234,168 @@ func TestRunExplainRules_EmptyResultNoSuggestions(t *testing.T) {
}
}
// === 扩展规则测试 ===
func TestRunExplainRules_LikeLeadingWildcardCritical(t *testing.T) {
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM users WHERE name LIKE '%john%'",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpScan,
Table: "users",
EstRows: 50000,
Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex},
Extra: map[string]any{"attachedCondition": "name like '%john%'"},
},
},
}
suggestions := runExplainRules(result)
found := false
for _, s := range suggestions {
if s.Rule == "like_leading_wildcard" {
found = true
if s.Severity != connection.SeverityCritical {
t.Fatalf("LIKE 前缀通配应为 criticalgot=%s", s.Severity)
}
}
}
if !found {
t.Fatal("LIKE 前缀通配应触发 like_leading_wildcard 规则")
}
}
func TestRunExplainRules_FunctionOnColumnCritical(t *testing.T) {
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM users WHERE UPPER(name) = 'JOHN'",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpScan,
Table: "users",
EstRows: 20000,
Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex},
Extra: map[string]any{"attachedCondition": "upper(name) = 'JOHN'"},
},
},
}
suggestions := runExplainRules(result)
found := false
for _, s := range suggestions {
if s.Rule == "function_on_column" {
found = true
if s.Severity != connection.SeverityCritical {
t.Fatalf("函数包裹列应为 criticalgot=%s", s.Severity)
}
}
}
if !found {
t.Fatal("函数包裹列应触发 function_on_column 规则")
}
}
func TestRunExplainRules_OrConditionNoIndexWarning(t *testing.T) {
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM users WHERE id = 1 OR name = 'x'",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpScan,
Table: "users",
EstRows: 10000,
Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex},
Extra: map[string]any{"attachedCondition": "id = 1 or name = 'x'"},
},
},
}
suggestions := runExplainRules(result)
found := false
for _, s := range suggestions {
if s.Rule == "or_condition_no_index" {
found = true
}
}
if !found {
t.Fatal("全表扫描 + OR 条件应触发 or_condition_no_index 规则")
}
}
func TestRunExplainRules_CartesianProductRiskCritical(t *testing.T) {
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM a, b",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpJoin,
EstRows: 500000, // 远超阈值 100000
Extra: map[string]any{}, // 无 hashCond/joinType
},
},
}
suggestions := runExplainRules(result)
found := false
for _, s := range suggestions {
if s.Rule == "cartesian_product_risk" {
found = true
if s.Severity != connection.SeverityCritical {
t.Fatalf("笛卡尔积风险应为 criticalgot=%s", s.Severity)
}
}
}
if !found {
t.Fatal("无条件的 JOIN + 大估算应触发 cartesian_product_risk")
}
}
func TestRunExplainRules_CartesianProductSuppressedWithCondition(t *testing.T) {
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM a JOIN b ON a.id = b.aid",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpJoin,
EstRows: 500000,
Extra: map[string]any{"hashCond": "a.id = b.aid"}, // 有条件
},
},
}
suggestions := runExplainRules(result)
for _, s := range suggestions {
if s.Rule == "cartesian_product_risk" {
t.Fatal("有 JOIN 条件时不应触发 cartesian_product_risk")
}
}
}
func TestRunExplainRules_FunctionOnColumnNotTriggeredForPlainColumn(t *testing.T) {
// WHERE name = 'x' 不应触发 function_on_column
result := connection.ExplainResult{
DBType: "mysql",
SourceSQL: "SELECT * FROM users WHERE name = 'x'",
Nodes: []connection.ExplainNode{
{
ID: "n1",
OpType: connection.ExplainOpScan,
Table: "users",
EstRows: 100,
Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex},
Extra: map[string]any{"attachedCondition": "name = 'x'"},
},
},
}
suggestions := runExplainRules(result)
for _, s := range suggestions {
if s.Rule == "function_on_column" {
t.Fatalf("name = 'x' 不应触发 function_on_column但触发了%+v", s)
}
}
}
// contains 检查字符串包含(避免和 strings.Contains 冲突,这里独立实现)。
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || indexOfContains(s, substr) >= 0)