feat: switch to local full surge sync and local merge-convert pipeline

This commit is contained in:
袁震
2026-04-06 13:53:11 +08:00
parent f8ff7279c6
commit f1b218f57d
8 changed files with 169 additions and 697 deletions
+94 -24
View File
@@ -38,6 +38,8 @@ class Config:
repo: str
ref: str
token: str | None
source_mode: str
local_source_root: str
source_root: str
source_filename_pattern: str
output_dir: str
@@ -150,6 +152,8 @@ def load_config(path: Path) -> Config:
repo=gitea["repo"],
ref=gitea.get("ref", "main"),
token=token,
source_mode=source.get("mode", "gitea"),
local_source_root=source.get("local_root", "."),
source_root=source.get("root", "rule/Surge"),
source_filename_pattern=source.get("filename_pattern", "{name}.list"),
output_dir=output.get("dir", "dist"),
@@ -173,6 +177,19 @@ def parse_rules(content: str) -> list[RuleLine]:
continue
seen.add(line)
# Domain-only files (e.g. *_Domain.list) may contain plain host suffixes
# without a rule prefix. Normalize them to DOMAIN-SUFFIX.
if "," not in line:
domain = line.lstrip(".").strip()
if not domain:
continue
normalized = f"DOMAIN-SUFFIX,{domain}"
if normalized in seen:
continue
seen.add(normalized)
rules.append(RuleLine(raw=normalized, rule_type="DOMAIN-SUFFIX"))
continue
parts = [part.strip() for part in line.split(",") if part.strip()]
if not parts:
continue
@@ -301,11 +318,39 @@ def should_include_category(name: str, cfg: Config, cli_names: set[str]) -> bool
return True
def local_abs_path(cfg: Config, relative_path: str) -> Path:
return Path(cfg.local_source_root).expanduser().resolve() / relative_path
def list_dir_source(client: GiteaClient, cfg: Config, path: str) -> list[dict[str, Any]]:
if cfg.source_mode == "local":
base = local_abs_path(cfg, path)
if not base.is_dir():
raise RuntimeError(f"Local source path is not a directory: {base}")
entries: list[dict[str, Any]] = []
for p in base.iterdir():
entry_type = "dir" if p.is_dir() else "file"
entries.append({"name": p.name, "type": entry_type})
return entries
return client.list_dir(cfg.owner, cfg.repo, path, cfg.ref)
def read_source_file(client: GiteaClient, cfg: Config, path: str) -> str:
if cfg.source_mode == "local":
local_path = local_abs_path(cfg, path)
if not local_path.is_file():
raise FileNotFoundError(str(local_path))
return local_path.read_text(encoding="utf-8", errors="replace")
return client.read_file(cfg.owner, cfg.repo, path, cfg.ref)
def find_categories(client: GiteaClient, cfg: Config, cli_names: set[str]) -> list[str]:
if cfg.include_categories:
return sorted([n for n in cfg.include_categories if should_include_category(n, cfg, cli_names)])
entries = client.list_dir(cfg.owner, cfg.repo, cfg.source_root, cfg.ref)
entries = list_dir_source(client, cfg, cfg.source_root)
categories: list[str] = []
for entry in entries:
@@ -327,37 +372,62 @@ def find_categories(client: GiteaClient, cfg: Config, cli_names: set[str]) -> li
return sorted(categories)
def read_file_optional(client: GiteaClient, cfg: Config, candidate_paths: list[str]) -> tuple[str | None, str | None]:
for path in candidate_paths:
try:
return path, read_source_file(client, cfg, path)
except Exception:
continue
return None, None
def build_one_category(client: GiteaClient, cfg: Config, name: str, base_out: Path) -> tuple[int, int, int, int]:
filename_base = cfg.source_filename_pattern.format(name=name)
candidate_filenames = [
# Preferred merge model:
# 1) <Name>.list (keyword/ua/ip with no-resolve)
# 2) <Name>_Domain.list (domain rules)
# 3) <Name>_Resolve.list (keyword/ua/ip without no-resolve)
# Merge then dedupe.
merge_filenames = [
filename_base,
f"{name}_All.list",
f"{name}_Domain.list",
f"{name}_Resolve.list",
]
candidate_paths: list[str] = []
for fn in candidate_filenames:
candidate_paths.append(f"{cfg.source_root}/{name}/{fn}") # nested
candidate_paths.append(f"{cfg.source_root}/{fn}") # flat
merged_chunks: list[str] = []
merged_sources: list[str] = []
for fn in merge_filenames:
nested = f"{cfg.source_root}/{name}/{fn}"
flat = f"{cfg.source_root}/{fn}"
src_path, src_content = read_file_optional(client, cfg, [nested, flat])
if src_path and src_content is not None:
merged_sources.append(src_path)
merged_chunks.append(src_content)
source_rel_path = ""
source_content = ""
last_error: Exception | None = None
for path in candidate_paths:
try:
source_content = client.read_file(cfg.owner, cfg.repo, path, cfg.ref)
source_rel_path = path
break
except Exception as exc:
last_error = exc
if not source_rel_path:
if last_error is not None:
raise last_error
raise RuntimeError(f"unable to locate source list for category: {name}")
rules = parse_rules(source_content)
if merged_chunks:
source_rel_path = " + ".join(merged_sources)
rules = parse_rules("\n".join(merged_chunks))
else:
# Fallback for categories that only provide *_All.list or other variants.
fallback_filenames = [
f"{name}_All.list",
f"{name}_Domain.list",
f"{name}_Resolve.list",
filename_base,
]
source_rel_path = ""
source_content = ""
for fn in fallback_filenames:
nested = f"{cfg.source_root}/{name}/{fn}"
flat = f"{cfg.source_root}/{fn}"
src_path, src_content = read_file_optional(client, cfg, [nested, flat])
if src_path and src_content is not None:
source_rel_path = src_path
source_content = src_content
break
if not source_rel_path:
raise RuntimeError(f"unable to locate source list for category: {name}")
rules = parse_rules(source_content)
surge_out = base_out / "surge" / f"{name}.list"
loon_out = base_out / "loon" / f"{name}.list"