From fc0f5ed83a6adff5e1911bb270b04d6e054361c8 Mon Sep 17 00:00:00 2001 From: Sukka Date: Sun, 15 Jun 2025 17:55:07 +0800 Subject: [PATCH] Improve AdGuard rule-set parser --- cmd/sing-box/cmd_rule_set_convert.go | 4 +- .../convertor/adguard/convertor.go | 68 +++++++++++++------ .../convertor/adguard/convertor_test.go | 7 +- 3 files changed, 53 insertions(+), 26 deletions(-) rename {cmd/sing-box/internal => common}/convertor/adguard/convertor.go (83%) rename {cmd/sing-box/internal => common}/convertor/adguard/convertor_test.go (96%) diff --git a/cmd/sing-box/cmd_rule_set_convert.go b/cmd/sing-box/cmd_rule_set_convert.go index 320149cf..fe76d7cd 100644 --- a/cmd/sing-box/cmd_rule_set_convert.go +++ b/cmd/sing-box/cmd_rule_set_convert.go @@ -5,7 +5,7 @@ import ( "os" "strings" - "github.com/sagernet/sing-box/cmd/sing-box/internal/convertor/adguard" + "github.com/sagernet/sing-box/common/convertor/adguard" "github.com/sagernet/sing-box/common/srs" C "github.com/sagernet/sing-box/constant" "github.com/sagernet/sing-box/log" @@ -54,7 +54,7 @@ func convertRuleSet(sourcePath string) error { var rules []option.HeadlessRule switch flagRuleSetConvertType { case "adguard": - rules, err = adguard.Convert(reader) + rules, err = adguard.Convert(reader, log.StdLogger()) case "": return E.New("source type is required") default: diff --git a/cmd/sing-box/internal/convertor/adguard/convertor.go b/common/convertor/adguard/convertor.go similarity index 83% rename from cmd/sing-box/internal/convertor/adguard/convertor.go rename to common/convertor/adguard/convertor.go index ff60929b..4392736b 100644 --- a/cmd/sing-box/internal/convertor/adguard/convertor.go +++ b/common/convertor/adguard/convertor.go @@ -9,10 +9,10 @@ import ( "strings" C "github.com/sagernet/sing-box/constant" - "github.com/sagernet/sing-box/log" "github.com/sagernet/sing-box/option" "github.com/sagernet/sing/common" E "github.com/sagernet/sing/common/exceptions" + "github.com/sagernet/sing/common/logger" M "github.com/sagernet/sing/common/metadata" ) @@ -27,7 +27,7 @@ type agdguardRuleLine struct { isImportant bool } -func Convert(reader io.Reader) ([]option.HeadlessRule, error) { +func Convert(reader io.Reader, logger logger.Logger) ([]option.HeadlessRule, error) { scanner := bufio.NewScanner(reader) var ( ruleLines []agdguardRuleLine @@ -36,9 +36,45 @@ func Convert(reader io.Reader) ([]option.HeadlessRule, error) { parseLine: for scanner.Scan() { ruleLine := scanner.Text() - if ruleLine == "" || ruleLine[0] == '!' || ruleLine[0] == '#' { + + // Empty line + if ruleLine == "" { continue } + // Comment (both line comment and in-line comment) + if strings.Contains(ruleLine, "!") { + continue + } + // Either comment or cosmetic filter + if strings.Contains(ruleLine, "#") { + ignoredLines++ + logger.Debug("ignored unsupported cosmetic filter: ", ruleLine) + continue + } + // We don't support URL query anyway + if strings.Contains(ruleLine, "?") || strings.Contains(ruleLine, "&") { + ignoredLines++ + logger.Debug("ignored unsupported rule with query: ", ruleLine) + continue + } + // Commonly seen in CSS selectors of cosmetic filters + if strings.Contains(ruleLine, "[") || strings.Contains(ruleLine, "]") { + ignoredLines++ + logger.Debug("ignored unsupported cosmetic filter: ", ruleLine) + continue + } + if strings.Contains(ruleLine, "(") || strings.Contains(ruleLine, ")") { + ignoredLines++ + logger.Debug("ignored unsupported cosmetic filter: ", ruleLine) + continue + } + // We don't support $domain modifier + if strings.Contains(ruleLine, "~") { + ignoredLines++ + logger.Debug("ignored unsupported rule modifier: ", ruleLine) + continue + } + originRuleLine := ruleLine if M.IsDomainName(ruleLine) { ruleLines = append(ruleLines, agdguardRuleLine{ @@ -92,7 +128,7 @@ parseLine: } if !ignored { ignoredLines++ - log.Debug("ignored unsupported rule with modifier: ", paramParts[0], ": ", ruleLine) + logger.Debug("ignored unsupported rule with modifier: ", paramParts[0], ": ", ruleLine) continue parseLine } } @@ -120,7 +156,7 @@ parseLine: ruleLine = ruleLine[1 : len(ruleLine)-1] if ignoreIPCIDRRegexp(ruleLine) { ignoredLines++ - log.Debug("ignored unsupported rule with IPCIDR regexp: ", ruleLine) + logger.Debug("ignored unsupported rule with IPCIDR regexp: ", ruleLine) continue } isRegexp = true @@ -130,17 +166,7 @@ parseLine: } if strings.Contains(ruleLine, "/") { ignoredLines++ - log.Debug("ignored unsupported rule with path: ", ruleLine) - continue - } - if strings.Contains(ruleLine, "##") { - ignoredLines++ - log.Debug("ignored unsupported rule with element hiding: ", ruleLine) - continue - } - if strings.Contains(ruleLine, "#$#") { - ignoredLines++ - log.Debug("ignored unsupported rule with element hiding: ", ruleLine) + logger.Debug("ignored unsupported rule with path: ", ruleLine) continue } var domainCheck string @@ -151,7 +177,7 @@ parseLine: } if ruleLine == "" { ignoredLines++ - log.Debug("ignored unsupported rule with empty domain", originRuleLine) + logger.Debug("ignored unsupported rule with empty domain", originRuleLine) continue } else { domainCheck = strings.ReplaceAll(domainCheck, "*", "x") @@ -159,13 +185,13 @@ parseLine: _, ipErr := parseADGuardIPCIDRLine(ruleLine) if ipErr == nil { ignoredLines++ - log.Debug("ignored unsupported rule with IPCIDR: ", ruleLine) + logger.Debug("ignored unsupported rule with IPCIDR: ", ruleLine) continue } if M.ParseSocksaddr(domainCheck).Port != 0 { - log.Debug("ignored unsupported rule with port: ", ruleLine) + logger.Debug("ignored unsupported rule with port: ", ruleLine) } else { - log.Debug("ignored unsupported rule with invalid domain: ", ruleLine) + logger.Debug("ignored unsupported rule with invalid domain: ", ruleLine) } ignoredLines++ continue @@ -283,7 +309,7 @@ parseLine: }, } } - log.Info("parsed rules: ", len(ruleLines), "/", len(ruleLines)+ignoredLines) + logger.Info("parsed rules: ", len(ruleLines), "/", len(ruleLines)+ignoredLines) return []option.HeadlessRule{currentRule}, nil } diff --git a/cmd/sing-box/internal/convertor/adguard/convertor_test.go b/common/convertor/adguard/convertor_test.go similarity index 96% rename from cmd/sing-box/internal/convertor/adguard/convertor_test.go rename to common/convertor/adguard/convertor_test.go index 212c2170..be3358e5 100644 --- a/cmd/sing-box/internal/convertor/adguard/convertor_test.go +++ b/common/convertor/adguard/convertor_test.go @@ -7,6 +7,7 @@ import ( "github.com/sagernet/sing-box/adapter" "github.com/sagernet/sing-box/route/rule" + "github.com/sagernet/sing/common/logger" "github.com/stretchr/testify/require" ) @@ -24,7 +25,7 @@ example.arpa @@|sagernet.example.org| ||sagernet.org^$important @@|sing-box.sagernet.org^$important -`)) +`), logger.NOP()) require.NoError(t, err) require.Len(t, rules, 1) rule, err := rule.NewHeadlessRule(context.Background(), rules[0]) @@ -83,7 +84,7 @@ func TestHosts(t *testing.T) { 127.0.0.1 localhost ::1 localhost #[IPv6] 0.0.0.0 google.com -`)) +`), logger.NOP()) require.NoError(t, err) require.Len(t, rules, 1) rule, err := rule.NewHeadlessRule(context.Background(), rules[0]) @@ -113,7 +114,7 @@ func TestSimpleHosts(t *testing.T) { rules, err := Convert(strings.NewReader(` example.com www.example.org -`)) +`), logger.NOP()) require.NoError(t, err) require.Len(t, rules, 1) rule, err := rule.NewHeadlessRule(context.Background(), rules[0])