Improve AdGuard rule-set parser

This commit is contained in:
Sukka 2025-06-15 17:55:07 +08:00 committed by 世界
parent c0588c30d7
commit fc0f5ed83a
No known key found for this signature in database
GPG Key ID: CD109927C34A63C4
3 changed files with 53 additions and 26 deletions

View File

@ -5,7 +5,7 @@ import (
"os" "os"
"strings" "strings"
"github.com/sagernet/sing-box/cmd/sing-box/internal/convertor/adguard" "github.com/sagernet/sing-box/common/convertor/adguard"
"github.com/sagernet/sing-box/common/srs" "github.com/sagernet/sing-box/common/srs"
C "github.com/sagernet/sing-box/constant" C "github.com/sagernet/sing-box/constant"
"github.com/sagernet/sing-box/log" "github.com/sagernet/sing-box/log"
@ -54,7 +54,7 @@ func convertRuleSet(sourcePath string) error {
var rules []option.HeadlessRule var rules []option.HeadlessRule
switch flagRuleSetConvertType { switch flagRuleSetConvertType {
case "adguard": case "adguard":
rules, err = adguard.Convert(reader) rules, err = adguard.Convert(reader, log.StdLogger())
case "": case "":
return E.New("source type is required") return E.New("source type is required")
default: default:

View File

@ -9,10 +9,10 @@ import (
"strings" "strings"
C "github.com/sagernet/sing-box/constant" C "github.com/sagernet/sing-box/constant"
"github.com/sagernet/sing-box/log"
"github.com/sagernet/sing-box/option" "github.com/sagernet/sing-box/option"
"github.com/sagernet/sing/common" "github.com/sagernet/sing/common"
E "github.com/sagernet/sing/common/exceptions" E "github.com/sagernet/sing/common/exceptions"
"github.com/sagernet/sing/common/logger"
M "github.com/sagernet/sing/common/metadata" M "github.com/sagernet/sing/common/metadata"
) )
@ -27,7 +27,7 @@ type agdguardRuleLine struct {
isImportant bool isImportant bool
} }
func Convert(reader io.Reader) ([]option.HeadlessRule, error) { func Convert(reader io.Reader, logger logger.Logger) ([]option.HeadlessRule, error) {
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
var ( var (
ruleLines []agdguardRuleLine ruleLines []agdguardRuleLine
@ -36,9 +36,45 @@ func Convert(reader io.Reader) ([]option.HeadlessRule, error) {
parseLine: parseLine:
for scanner.Scan() { for scanner.Scan() {
ruleLine := scanner.Text() ruleLine := scanner.Text()
if ruleLine == "" || ruleLine[0] == '!' || ruleLine[0] == '#' {
// Empty line
if ruleLine == "" {
continue continue
} }
// Comment (both line comment and in-line comment)
if strings.Contains(ruleLine, "!") {
continue
}
// Either comment or cosmetic filter
if strings.Contains(ruleLine, "#") {
ignoredLines++
logger.Debug("ignored unsupported cosmetic filter: ", ruleLine)
continue
}
// We don't support URL query anyway
if strings.Contains(ruleLine, "?") || strings.Contains(ruleLine, "&") {
ignoredLines++
logger.Debug("ignored unsupported rule with query: ", ruleLine)
continue
}
// Commonly seen in CSS selectors of cosmetic filters
if strings.Contains(ruleLine, "[") || strings.Contains(ruleLine, "]") {
ignoredLines++
logger.Debug("ignored unsupported cosmetic filter: ", ruleLine)
continue
}
if strings.Contains(ruleLine, "(") || strings.Contains(ruleLine, ")") {
ignoredLines++
logger.Debug("ignored unsupported cosmetic filter: ", ruleLine)
continue
}
// We don't support $domain modifier
if strings.Contains(ruleLine, "~") {
ignoredLines++
logger.Debug("ignored unsupported rule modifier: ", ruleLine)
continue
}
originRuleLine := ruleLine originRuleLine := ruleLine
if M.IsDomainName(ruleLine) { if M.IsDomainName(ruleLine) {
ruleLines = append(ruleLines, agdguardRuleLine{ ruleLines = append(ruleLines, agdguardRuleLine{
@ -92,7 +128,7 @@ parseLine:
} }
if !ignored { if !ignored {
ignoredLines++ ignoredLines++
log.Debug("ignored unsupported rule with modifier: ", paramParts[0], ": ", ruleLine) logger.Debug("ignored unsupported rule with modifier: ", paramParts[0], ": ", ruleLine)
continue parseLine continue parseLine
} }
} }
@ -120,7 +156,7 @@ parseLine:
ruleLine = ruleLine[1 : len(ruleLine)-1] ruleLine = ruleLine[1 : len(ruleLine)-1]
if ignoreIPCIDRRegexp(ruleLine) { if ignoreIPCIDRRegexp(ruleLine) {
ignoredLines++ ignoredLines++
log.Debug("ignored unsupported rule with IPCIDR regexp: ", ruleLine) logger.Debug("ignored unsupported rule with IPCIDR regexp: ", ruleLine)
continue continue
} }
isRegexp = true isRegexp = true
@ -130,17 +166,7 @@ parseLine:
} }
if strings.Contains(ruleLine, "/") { if strings.Contains(ruleLine, "/") {
ignoredLines++ ignoredLines++
log.Debug("ignored unsupported rule with path: ", ruleLine) logger.Debug("ignored unsupported rule with path: ", ruleLine)
continue
}
if strings.Contains(ruleLine, "##") {
ignoredLines++
log.Debug("ignored unsupported rule with element hiding: ", ruleLine)
continue
}
if strings.Contains(ruleLine, "#$#") {
ignoredLines++
log.Debug("ignored unsupported rule with element hiding: ", ruleLine)
continue continue
} }
var domainCheck string var domainCheck string
@ -151,7 +177,7 @@ parseLine:
} }
if ruleLine == "" { if ruleLine == "" {
ignoredLines++ ignoredLines++
log.Debug("ignored unsupported rule with empty domain", originRuleLine) logger.Debug("ignored unsupported rule with empty domain", originRuleLine)
continue continue
} else { } else {
domainCheck = strings.ReplaceAll(domainCheck, "*", "x") domainCheck = strings.ReplaceAll(domainCheck, "*", "x")
@ -159,13 +185,13 @@ parseLine:
_, ipErr := parseADGuardIPCIDRLine(ruleLine) _, ipErr := parseADGuardIPCIDRLine(ruleLine)
if ipErr == nil { if ipErr == nil {
ignoredLines++ ignoredLines++
log.Debug("ignored unsupported rule with IPCIDR: ", ruleLine) logger.Debug("ignored unsupported rule with IPCIDR: ", ruleLine)
continue continue
} }
if M.ParseSocksaddr(domainCheck).Port != 0 { if M.ParseSocksaddr(domainCheck).Port != 0 {
log.Debug("ignored unsupported rule with port: ", ruleLine) logger.Debug("ignored unsupported rule with port: ", ruleLine)
} else { } else {
log.Debug("ignored unsupported rule with invalid domain: ", ruleLine) logger.Debug("ignored unsupported rule with invalid domain: ", ruleLine)
} }
ignoredLines++ ignoredLines++
continue continue
@ -283,7 +309,7 @@ parseLine:
}, },
} }
} }
log.Info("parsed rules: ", len(ruleLines), "/", len(ruleLines)+ignoredLines) logger.Info("parsed rules: ", len(ruleLines), "/", len(ruleLines)+ignoredLines)
return []option.HeadlessRule{currentRule}, nil return []option.HeadlessRule{currentRule}, nil
} }

View File

@ -7,6 +7,7 @@ import (
"github.com/sagernet/sing-box/adapter" "github.com/sagernet/sing-box/adapter"
"github.com/sagernet/sing-box/route/rule" "github.com/sagernet/sing-box/route/rule"
"github.com/sagernet/sing/common/logger"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -24,7 +25,7 @@ example.arpa
@@|sagernet.example.org| @@|sagernet.example.org|
||sagernet.org^$important ||sagernet.org^$important
@@|sing-box.sagernet.org^$important @@|sing-box.sagernet.org^$important
`)) `), logger.NOP())
require.NoError(t, err) require.NoError(t, err)
require.Len(t, rules, 1) require.Len(t, rules, 1)
rule, err := rule.NewHeadlessRule(context.Background(), rules[0]) rule, err := rule.NewHeadlessRule(context.Background(), rules[0])
@ -83,7 +84,7 @@ func TestHosts(t *testing.T) {
127.0.0.1 localhost 127.0.0.1 localhost
::1 localhost #[IPv6] ::1 localhost #[IPv6]
0.0.0.0 google.com 0.0.0.0 google.com
`)) `), logger.NOP())
require.NoError(t, err) require.NoError(t, err)
require.Len(t, rules, 1) require.Len(t, rules, 1)
rule, err := rule.NewHeadlessRule(context.Background(), rules[0]) rule, err := rule.NewHeadlessRule(context.Background(), rules[0])
@ -113,7 +114,7 @@ func TestSimpleHosts(t *testing.T) {
rules, err := Convert(strings.NewReader(` rules, err := Convert(strings.NewReader(`
example.com example.com
www.example.org www.example.org
`)) `), logger.NOP())
require.NoError(t, err) require.NoError(t, err)
require.Len(t, rules, 1) require.Len(t, rules, 1)
rule, err := rule.NewHeadlessRule(context.Background(), rules[0]) rule, err := rule.NewHeadlessRule(context.Background(), rules[0])