feat: switch to local full surge sync and local merge-convert pipeline
This commit is contained in:
@@ -38,6 +38,8 @@ class Config:
|
||||
repo: str
|
||||
ref: str
|
||||
token: str | None
|
||||
source_mode: str
|
||||
local_source_root: str
|
||||
source_root: str
|
||||
source_filename_pattern: str
|
||||
output_dir: str
|
||||
@@ -150,6 +152,8 @@ def load_config(path: Path) -> Config:
|
||||
repo=gitea["repo"],
|
||||
ref=gitea.get("ref", "main"),
|
||||
token=token,
|
||||
source_mode=source.get("mode", "gitea"),
|
||||
local_source_root=source.get("local_root", "."),
|
||||
source_root=source.get("root", "rule/Surge"),
|
||||
source_filename_pattern=source.get("filename_pattern", "{name}.list"),
|
||||
output_dir=output.get("dir", "dist"),
|
||||
@@ -173,6 +177,19 @@ def parse_rules(content: str) -> list[RuleLine]:
|
||||
continue
|
||||
seen.add(line)
|
||||
|
||||
# Domain-only files (e.g. *_Domain.list) may contain plain host suffixes
|
||||
# without a rule prefix. Normalize them to DOMAIN-SUFFIX.
|
||||
if "," not in line:
|
||||
domain = line.lstrip(".").strip()
|
||||
if not domain:
|
||||
continue
|
||||
normalized = f"DOMAIN-SUFFIX,{domain}"
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
rules.append(RuleLine(raw=normalized, rule_type="DOMAIN-SUFFIX"))
|
||||
continue
|
||||
|
||||
parts = [part.strip() for part in line.split(",") if part.strip()]
|
||||
if not parts:
|
||||
continue
|
||||
@@ -301,11 +318,39 @@ def should_include_category(name: str, cfg: Config, cli_names: set[str]) -> bool
|
||||
return True
|
||||
|
||||
|
||||
def local_abs_path(cfg: Config, relative_path: str) -> Path:
|
||||
return Path(cfg.local_source_root).expanduser().resolve() / relative_path
|
||||
|
||||
|
||||
def list_dir_source(client: GiteaClient, cfg: Config, path: str) -> list[dict[str, Any]]:
|
||||
if cfg.source_mode == "local":
|
||||
base = local_abs_path(cfg, path)
|
||||
if not base.is_dir():
|
||||
raise RuntimeError(f"Local source path is not a directory: {base}")
|
||||
entries: list[dict[str, Any]] = []
|
||||
for p in base.iterdir():
|
||||
entry_type = "dir" if p.is_dir() else "file"
|
||||
entries.append({"name": p.name, "type": entry_type})
|
||||
return entries
|
||||
|
||||
return client.list_dir(cfg.owner, cfg.repo, path, cfg.ref)
|
||||
|
||||
|
||||
def read_source_file(client: GiteaClient, cfg: Config, path: str) -> str:
|
||||
if cfg.source_mode == "local":
|
||||
local_path = local_abs_path(cfg, path)
|
||||
if not local_path.is_file():
|
||||
raise FileNotFoundError(str(local_path))
|
||||
return local_path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
return client.read_file(cfg.owner, cfg.repo, path, cfg.ref)
|
||||
|
||||
|
||||
def find_categories(client: GiteaClient, cfg: Config, cli_names: set[str]) -> list[str]:
|
||||
if cfg.include_categories:
|
||||
return sorted([n for n in cfg.include_categories if should_include_category(n, cfg, cli_names)])
|
||||
|
||||
entries = client.list_dir(cfg.owner, cfg.repo, cfg.source_root, cfg.ref)
|
||||
entries = list_dir_source(client, cfg, cfg.source_root)
|
||||
categories: list[str] = []
|
||||
|
||||
for entry in entries:
|
||||
@@ -327,37 +372,62 @@ def find_categories(client: GiteaClient, cfg: Config, cli_names: set[str]) -> li
|
||||
return sorted(categories)
|
||||
|
||||
|
||||
def read_file_optional(client: GiteaClient, cfg: Config, candidate_paths: list[str]) -> tuple[str | None, str | None]:
|
||||
for path in candidate_paths:
|
||||
try:
|
||||
return path, read_source_file(client, cfg, path)
|
||||
except Exception:
|
||||
continue
|
||||
return None, None
|
||||
|
||||
|
||||
def build_one_category(client: GiteaClient, cfg: Config, name: str, base_out: Path) -> tuple[int, int, int, int]:
|
||||
filename_base = cfg.source_filename_pattern.format(name=name)
|
||||
candidate_filenames = [
|
||||
# Preferred merge model:
|
||||
# 1) <Name>.list (keyword/ua/ip with no-resolve)
|
||||
# 2) <Name>_Domain.list (domain rules)
|
||||
# 3) <Name>_Resolve.list (keyword/ua/ip without no-resolve)
|
||||
# Merge then dedupe.
|
||||
merge_filenames = [
|
||||
filename_base,
|
||||
f"{name}_All.list",
|
||||
f"{name}_Domain.list",
|
||||
f"{name}_Resolve.list",
|
||||
]
|
||||
|
||||
candidate_paths: list[str] = []
|
||||
for fn in candidate_filenames:
|
||||
candidate_paths.append(f"{cfg.source_root}/{name}/{fn}") # nested
|
||||
candidate_paths.append(f"{cfg.source_root}/{fn}") # flat
|
||||
merged_chunks: list[str] = []
|
||||
merged_sources: list[str] = []
|
||||
for fn in merge_filenames:
|
||||
nested = f"{cfg.source_root}/{name}/{fn}"
|
||||
flat = f"{cfg.source_root}/{fn}"
|
||||
src_path, src_content = read_file_optional(client, cfg, [nested, flat])
|
||||
if src_path and src_content is not None:
|
||||
merged_sources.append(src_path)
|
||||
merged_chunks.append(src_content)
|
||||
|
||||
source_rel_path = ""
|
||||
source_content = ""
|
||||
last_error: Exception | None = None
|
||||
for path in candidate_paths:
|
||||
try:
|
||||
source_content = client.read_file(cfg.owner, cfg.repo, path, cfg.ref)
|
||||
source_rel_path = path
|
||||
break
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
|
||||
if not source_rel_path:
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError(f"unable to locate source list for category: {name}")
|
||||
|
||||
rules = parse_rules(source_content)
|
||||
if merged_chunks:
|
||||
source_rel_path = " + ".join(merged_sources)
|
||||
rules = parse_rules("\n".join(merged_chunks))
|
||||
else:
|
||||
# Fallback for categories that only provide *_All.list or other variants.
|
||||
fallback_filenames = [
|
||||
f"{name}_All.list",
|
||||
f"{name}_Domain.list",
|
||||
f"{name}_Resolve.list",
|
||||
filename_base,
|
||||
]
|
||||
source_rel_path = ""
|
||||
source_content = ""
|
||||
for fn in fallback_filenames:
|
||||
nested = f"{cfg.source_root}/{name}/{fn}"
|
||||
flat = f"{cfg.source_root}/{fn}"
|
||||
src_path, src_content = read_file_optional(client, cfg, [nested, flat])
|
||||
if src_path and src_content is not None:
|
||||
source_rel_path = src_path
|
||||
source_content = src_content
|
||||
break
|
||||
if not source_rel_path:
|
||||
raise RuntimeError(f"unable to locate source list for category: {name}")
|
||||
rules = parse_rules(source_content)
|
||||
|
||||
surge_out = base_out / "surge" / f"{name}.list"
|
||||
loon_out = base_out / "loon" / f"{name}.list"
|
||||
|
||||
Reference in New Issue
Block a user