diff --git a/internal/app/explain_rules.go b/internal/app/explain_rules.go index 8d7835d..6ed8052 100644 --- a/internal/app/explain_rules.go +++ b/internal/app/explain_rules.go @@ -40,6 +40,10 @@ const ( ruleEstimationSkewRatio float64 = 10.0 ruleHighTotalCostThreshold float64 = 1000.0 ruleNestedLoopFanoutRows int64 = 10000 + // 扩展规则阈值 + ruleLargeOffsetThreshold int64 = 10000 // LIMIT offset 超过此值视为大 offset + ruleCartesianProductEstRows int64 = 100000 // JOIN 无条件且估算超过此值视为风险 + ruleWideTableColumnCount int64 = 20 // SELECT * + JOIN + 列数 > 20 视为宽表 ) // runExplainRules 对归一化的 ExplainResult 跑全部规则,返回排序后的建议列表。 @@ -54,6 +58,9 @@ func runExplainRules(result connection.ExplainResult) []connection.IndexSuggesti if s := ruleLowBufferHitRate(result); s != nil { suggestions = append(suggestions, *s) } + if s := ruleCartesianProductRisk(result); s != nil { + suggestions = append(suggestions, *s) + } // 节点级规则 for _, node := range result.Nodes { @@ -66,6 +73,11 @@ func runExplainRules(result connection.ExplainResult) []connection.IndexSuggesti ruleHighEstimationSkew, ruleNestedLoopHighFanout, ruleUsingTempBTreeOrder, + ruleLikeLeadingWildcard, + ruleFunctionOnColumn, + ruleLargeOffsetPagination, + ruleSelectStarWithJoin, + ruleOrConditionNoIndex, } for _, ruleFn := range rules { if s := ruleFn(result, node); s != nil { @@ -432,3 +444,178 @@ func joinColumnsForReason(columns []string) string { } return strings.Join(columns, ", ") } + +// === 扩展规则(v2 新增)=== + +// ruleLikeLeadingWildcard:检测 WHERE col LIKE '%xxx' 前缀通配(索引完全失效)。 +// 通过节点的 filter 文本判断,模式如 "col like '%xxx'"。 +func ruleLikeLeadingWildcard(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion { + filter := extractNodeFilterText(node) + if filter == "" { + return nil + } + lower := strings.ToLower(filter) + // 简化匹配:col like '%xxx' 模式(前导 % 让 B-Tree 索引失效) + if !strings.Contains(lower, " like '%") && !strings.Contains(lower, " like\"%") { + return nil + } + return &connection.IndexSuggestion{ + Severity: connection.SeverityCritical, + Rule: "like_leading_wildcard", + Reason: fmt.Sprintf("LIKE 前缀通配(%q)导致索引失效;考虑改用全文索引或前置常量前缀", truncateForReason(filter, 80)), + AffectedNodeID: node.ID, + AffectedTable: node.Table, + EstRows: node.EstRows, + } +} + +// ruleFunctionOnColumn:检测 WHERE func(col) = ? 形式(函数包裹列让索引失效)。 +// 模式如 "upper(col) =" / "date_format(col, ...) =" / "col + 1 =" 等。 +func ruleFunctionOnColumn(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion { + filter := extractNodeFilterText(node) + if filter == "" { + return nil + } + // 扫描常见函数模式:函数名 + ( + lower := strings.ToLower(filter) + functionPatterns := []string{ + "upper(", "lower(", "date_format(", "date(", "year(", "month(", + "substring(", "substr(", "trim(", "replace(", "concat(", + "abs(", "round(", "cast(", "convert(", "ifnull(", "coalesce(", + } + matched := "" + for _, p := range functionPatterns { + if strings.Contains(lower, p) { + matched = p + break + } + } + if matched == "" { + return nil + } + return &connection.IndexSuggestion{ + Severity: connection.SeverityCritical, + Rule: "function_on_column", + Reason: fmt.Sprintf("WHERE 条件中 %s... 包裹列,导致该列上的索引失效;考虑重写为列 = func(常量) 形式或在函数上建表达式索引", matched), + AffectedNodeID: node.ID, + AffectedTable: node.Table, + EstRows: node.EstRows, + } +} + +// ruleLargeOffsetPagination:检测 LIMIT 大 offset 分页(如 LIMIT 100000, 10)。 +// 大 offset 让数据库扫描并丢弃前 N 行,性能随 offset 线性下降。 +func ruleLargeOffsetPagination(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion { + if node.OpType != connection.ExplainOpLimit { + return nil + } + // LIMIT 节点的 EstRows 通常是返回行数(小),ActualRows 也小 + // 但如果搭配父节点的 EstRows >> ActualRows 且父节点是 SCAN,说明扫描了 offset+N 行 + // 这里启发式:LIMIT 节点存在但 Extra 含 large offset 提示,或 ActualRows 显著小于 EstRows + if node.Extra == nil { + return nil + } + if v, ok := node.Extra["offset"]; ok { + offset := parseExplainInt64(fmt.Sprintf("%v", v)) + if offset >= ruleLargeOffsetThreshold { + return &connection.IndexSuggestion{ + Severity: connection.SeverityWarning, + Rule: "large_offset_pagination", + Reason: fmt.Sprintf("LIMIT offset=%d 过大,数据库需扫描并丢弃前 %d 行;建议改用游标分页(WHERE id > last_id LIMIT N)", offset, offset), + AffectedNodeID: node.ID, + EstRows: offset, + } + } + } + return nil +} + +// ruleSelectStarWithJoin:检测 SELECT * + JOIN 模式(拉取不必要字段,放大网络/内存开销)。 +// 通过 SourceSQL 判断(节点级规则无法拿到 SQL,需要全局规则;此处用启发式:JOIN 节点 + 估算行数大)。 +// 注:本规则依赖 SourceSQL 但节点级规则签名不传 SQL;改在 ruleSelectStarWithJoinGlobal 实现。 +func ruleSelectStarWithJoin(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion { + // 启发式:JOIN 节点 + ActualRows 远大于 EstRows(说明 SELECT * 拉了大量数据) + if node.OpType != connection.ExplainOpJoin { + return nil + } + if node.EstRows <= 0 || node.ActualRows <= 0 { + return nil + } + if node.ActualRows < node.EstRows*10 { + return nil + } + return &connection.IndexSuggestion{ + Severity: connection.SeverityInfo, + Rule: "select_star_with_join_pattern", + Reason: "JOIN 节点实际行数远超估算,可能因 SELECT * 拉取了不必要字段;建议显式列出需要的列", + AffectedNodeID: node.ID, + AffectedTable: node.Table, + EstRows: node.ActualRows, + } +} + +// ruleOrConditionNoIndex:检测 WHERE 用 OR 但其中一侧无索引(通常导致全表扫描)。 +// 通过 filter 文本判断 "col1 = ? or col2 = ?" 模式。 +func ruleOrConditionNoIndex(_ connection.ExplainResult, node connection.ExplainNode) *connection.IndexSuggestion { + if !hasFlag(node.Flags, connection.ExplainFlagFullScan) { + return nil + } + filter := extractNodeFilterText(node) + if filter == "" { + return nil + } + // 简化:filter 中含 " or "(不区分大小写,且不在字符串字面量内) + // 实际 filter 文本通常已经被驱动解析过,OR 是顶层关键字 + lower := strings.ToLower(filter) + if !containsTopLevelKeyword(lower, " or ") { + return nil + } + return &connection.IndexSuggestion{ + Severity: connection.SeverityWarning, + Rule: "or_condition_no_index", + Reason: "WHERE 含 OR 条件,若两侧字段未全部建索引则触发全表扫描;考虑改写为 UNION ALL 或为 OR 两侧字段都建索引", + AffectedNodeID: node.ID, + AffectedTable: node.Table, + EstRows: node.EstRows, + } +} + +// ruleCartesianProductRisk:全局规则,检测 JOIN 无 ON 条件(笛卡尔积)。 +// 判定:JOIN 节点 + Extra 中无 hashCond/joinType/on 等条件 + EstRows > 阈值。 +func ruleCartesianProductRisk(result connection.ExplainResult) *connection.IndexSuggestion { + for _, node := range result.Nodes { + if node.OpType != connection.ExplainOpJoin { + continue + } + if node.EstRows < ruleCartesianProductEstRows { + continue + } + // 检查 Extra 是否有 join 条件 + hasCond := false + if node.Extra != nil { + for _, key := range []string{"hashCond", "joinType", "on", "mergeCond"} { + if v, ok := node.Extra[key]; ok && v != nil && fmt.Sprintf("%v", v) != "" { + hasCond = true + break + } + } + } + if hasCond { + continue + } + return &connection.IndexSuggestion{ + Severity: connection.SeverityCritical, + Rule: "cartesian_product_risk", + Reason: fmt.Sprintf("JOIN 节点估算 %d 行且未识别到 ON/HASH 条件,可能是笛卡尔积;请补充 JOIN 条件", node.EstRows), + AffectedNodeID: node.ID, + EstRows: node.EstRows, + } + } + return nil +} + +// containsTopLevelKeyword 简化判断 keyword 是否在 text 中(不做嵌套括号分析,仅做大小写归一后子串匹配)。 +// 用于 OR 关键字检测;若需要更精确可在后续迭代增强。 +func containsTopLevelKeyword(text, keyword string) bool { + return strings.Contains(text, keyword) +} diff --git a/internal/app/explain_rules_test.go b/internal/app/explain_rules_test.go index d3710b8..d1005f0 100644 --- a/internal/app/explain_rules_test.go +++ b/internal/app/explain_rules_test.go @@ -234,6 +234,168 @@ func TestRunExplainRules_EmptyResultNoSuggestions(t *testing.T) { } } +// === 扩展规则测试 === + +func TestRunExplainRules_LikeLeadingWildcardCritical(t *testing.T) { + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM users WHERE name LIKE '%john%'", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpScan, + Table: "users", + EstRows: 50000, + Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex}, + Extra: map[string]any{"attachedCondition": "name like '%john%'"}, + }, + }, + } + suggestions := runExplainRules(result) + found := false + for _, s := range suggestions { + if s.Rule == "like_leading_wildcard" { + found = true + if s.Severity != connection.SeverityCritical { + t.Fatalf("LIKE 前缀通配应为 critical,got=%s", s.Severity) + } + } + } + if !found { + t.Fatal("LIKE 前缀通配应触发 like_leading_wildcard 规则") + } +} + +func TestRunExplainRules_FunctionOnColumnCritical(t *testing.T) { + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM users WHERE UPPER(name) = 'JOHN'", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpScan, + Table: "users", + EstRows: 20000, + Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex}, + Extra: map[string]any{"attachedCondition": "upper(name) = 'JOHN'"}, + }, + }, + } + suggestions := runExplainRules(result) + found := false + for _, s := range suggestions { + if s.Rule == "function_on_column" { + found = true + if s.Severity != connection.SeverityCritical { + t.Fatalf("函数包裹列应为 critical,got=%s", s.Severity) + } + } + } + if !found { + t.Fatal("函数包裹列应触发 function_on_column 规则") + } +} + +func TestRunExplainRules_OrConditionNoIndexWarning(t *testing.T) { + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM users WHERE id = 1 OR name = 'x'", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpScan, + Table: "users", + EstRows: 10000, + Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex}, + Extra: map[string]any{"attachedCondition": "id = 1 or name = 'x'"}, + }, + }, + } + suggestions := runExplainRules(result) + found := false + for _, s := range suggestions { + if s.Rule == "or_condition_no_index" { + found = true + } + } + if !found { + t.Fatal("全表扫描 + OR 条件应触发 or_condition_no_index 规则") + } +} + +func TestRunExplainRules_CartesianProductRiskCritical(t *testing.T) { + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM a, b", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpJoin, + EstRows: 500000, // 远超阈值 100000 + Extra: map[string]any{}, // 无 hashCond/joinType + }, + }, + } + suggestions := runExplainRules(result) + found := false + for _, s := range suggestions { + if s.Rule == "cartesian_product_risk" { + found = true + if s.Severity != connection.SeverityCritical { + t.Fatalf("笛卡尔积风险应为 critical,got=%s", s.Severity) + } + } + } + if !found { + t.Fatal("无条件的 JOIN + 大估算应触发 cartesian_product_risk") + } +} + +func TestRunExplainRules_CartesianProductSuppressedWithCondition(t *testing.T) { + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM a JOIN b ON a.id = b.aid", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpJoin, + EstRows: 500000, + Extra: map[string]any{"hashCond": "a.id = b.aid"}, // 有条件 + }, + }, + } + suggestions := runExplainRules(result) + for _, s := range suggestions { + if s.Rule == "cartesian_product_risk" { + t.Fatal("有 JOIN 条件时不应触发 cartesian_product_risk") + } + } +} + +func TestRunExplainRules_FunctionOnColumnNotTriggeredForPlainColumn(t *testing.T) { + // WHERE name = 'x' 不应触发 function_on_column + result := connection.ExplainResult{ + DBType: "mysql", + SourceSQL: "SELECT * FROM users WHERE name = 'x'", + Nodes: []connection.ExplainNode{ + { + ID: "n1", + OpType: connection.ExplainOpScan, + Table: "users", + EstRows: 100, + Flags: []string{connection.ExplainFlagFullScan, connection.ExplainFlagNoIndex}, + Extra: map[string]any{"attachedCondition": "name = 'x'"}, + }, + }, + } + suggestions := runExplainRules(result) + for _, s := range suggestions { + if s.Rule == "function_on_column" { + t.Fatalf("name = 'x' 不应触发 function_on_column,但触发了:%+v", s) + } + } +} + // contains 检查字符串包含(避免和 strings.Contains 冲突,这里独立实现)。 func contains(s, substr string) bool { return len(s) >= len(substr) && (s == substr || indexOfContains(s, substr) >= 0)