From b9ac36321ac0e0de9400a7ce4a355ac9d717d64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A2=81=E9=9C=87?= Date: Mon, 6 Apr 2026 11:52:50 +0800 Subject: [PATCH] feat: initial gitea shunt rules generator --- .gitea/workflows/generate-rules.yml | 70 ++++++ .gitea/workflows/publish-rules.yml | 129 +++++++++++ .gitignore | 4 + README.md | 138 ++++++++++++ config.example.json | 19 ++ config.example.toml | 18 ++ main.py | 318 ++++++++++++++++++++++++++++ 7 files changed, 696 insertions(+) create mode 100644 .gitea/workflows/generate-rules.yml create mode 100644 .gitea/workflows/publish-rules.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 config.example.json create mode 100644 config.example.toml create mode 100644 main.py diff --git a/.gitea/workflows/generate-rules.yml b/.gitea/workflows/generate-rules.yml new file mode 100644 index 000000000..7d73fd3c6 --- /dev/null +++ b/.gitea/workflows/generate-rules.yml @@ -0,0 +1,70 @@ +name: Generate Rules + +on: + workflow_dispatch: + schedule: + - cron: "0 3 * * *" + push: + branches: + - main + paths: + - main.py + - config.toml + - config.json + - .gitea/workflows/generate-rules.yml + +permissions: + contents: write + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Prepare config + shell: bash + run: | + if [ -f config.toml ]; then + echo "Use existing config.toml" + elif [ -f config.json ]; then + echo "Use existing config.json" + elif [ -f config.example.toml ]; then + cp config.example.toml config.toml + echo "Generated config.toml from example" + else + echo "No config file found" >&2 + exit 1 + fi + + - name: Generate rules + env: + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} + run: | + if [ -f config.toml ]; then + python3 main.py --config config.toml + else + python3 main.py --config config.json + fi + + - name: Commit and push dist changes + shell: bash + run: | + if [ -n "$(git status --porcelain dist)" ]; then + git config user.name "gitea-actions[bot]" + git config user.email "gitea-actions@localhost" + git add dist + git commit -m "chore: update generated rules" + git push + else + echo "No changes in dist/" + fi diff --git a/.gitea/workflows/publish-rules.yml b/.gitea/workflows/publish-rules.yml new file mode 100644 index 000000000..c70b12b5e --- /dev/null +++ b/.gitea/workflows/publish-rules.yml @@ -0,0 +1,129 @@ +name: Publish Rules To External Repo + +on: + workflow_dispatch: + schedule: + - cron: "15 3 * * *" + +permissions: + contents: read + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - name: Validate required secrets + shell: bash + run: | + missing=0 + for key in GITEA_BASE_URL GITEA_TOKEN SOURCE_OWNER SOURCE_REPO TARGET_OWNER TARGET_REPO; do + if [ -z "${!key}" ]; then + echo "Missing required secret: $key" >&2 + missing=1 + fi + done + if [ "$missing" -ne 0 ]; then + exit 1 + fi + env: + GITEA_BASE_URL: ${{ secrets.GITEA_BASE_URL }} + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} + SOURCE_OWNER: ${{ secrets.SOURCE_OWNER }} + SOURCE_REPO: ${{ secrets.SOURCE_REPO }} + TARGET_OWNER: ${{ secrets.TARGET_OWNER }} + TARGET_REPO: ${{ secrets.TARGET_REPO }} + + - name: Checkout generator repo + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Build runtime config + shell: bash + run: | + SOURCE_REF="${SOURCE_REF:-main}" + SOURCE_ROOT="${SOURCE_ROOT:-rule/Surge}" + CLASH_NO_RESOLVE="${CLASH_NO_RESOLVE:-false}" + + cat > config.runtime.json <&2 + exit 1 + fi + remote_url="${auth_url}/${TARGET_OWNER}/${TARGET_REPO}.git" + + rm -rf /tmp/rules-publish + git clone --depth=1 --branch "$target_branch" "$remote_url" /tmp/rules-publish || { + git clone --depth=1 "$remote_url" /tmp/rules-publish + cd /tmp/rules-publish + git checkout -b "$target_branch" + cd - + } + + rsync -a --delete dist/ /tmp/rules-publish/ + + cd /tmp/rules-publish + if [ -n "$(git status --porcelain)" ]; then + git config user.name "gitea-actions[bot]" + git config user.email "gitea-actions@localhost" + git add . + git commit -m "chore: publish generated rules" + git push origin "$target_branch" + echo "Published to ${TARGET_OWNER}/${TARGET_REPO}@${target_branch}" + else + echo "No publish changes" + fi + env: + GITEA_BASE_URL: ${{ secrets.GITEA_BASE_URL }} + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} + TARGET_OWNER: ${{ secrets.TARGET_OWNER }} + TARGET_REPO: ${{ secrets.TARGET_REPO }} + TARGET_BRANCH: ${{ vars.TARGET_BRANCH }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..0210ceaaf --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +config.toml +dist/ diff --git a/README.md b/README.md new file mode 100644 index 000000000..d8df13895 --- /dev/null +++ b/README.md @@ -0,0 +1,138 @@ +# gitea-shunt-rules + +一个最小可用的规则生成器: + +- 数据源来自 **Gitea** 仓库 +- 输入按目录分类(默认读取 `rule/Surge//.list`) +- 输出仅包含你要的两种格式: + - `dist/surge/.list` + - `dist/clash/.yaml` + +## 为什么这个方案适合你 + +`ShuntRules` 公开仓库基本只留了发布索引,生成器不在仓库中。这个项目按它的输出思路重建了可控版本,并把数据源切到 Gitea,便于你自己托管。 + +## 运行环境 + +- Python 3.11+(可直接使用 TOML 配置) +- Python 3.10 也可用,但建议改用 JSON 配置(`config.example.json`) + +## 快速开始 + +1. 复制配置: + +```bash +cd /Users/yuan/Desktop/workspaces/docker/pve/gitea-shunt-rules +cp config.example.toml config.toml +``` + +2. 填写 `config.toml`: + +- `gitea.base_url`:你的 Gitea 地址,例如 `https://git.xxx.com` +- `gitea.owner` / `gitea.repo`:规则仓库 +- `source.root`:源规则根目录,默认 `rule/Surge` + +3. 设置 token(如果仓库私有): + +```bash +export GITEA_TOKEN='your-token' +``` + +4. 生成全部分类: + +```bash +python3 main.py --config config.toml +``` + +5. 只生成部分分类: + +```bash +python3 main.py --config config.toml --names YouTube,Netflix +``` + +如果你是 Python 3.10,可用 JSON 配置: + +```bash +cp config.example.json config.json +python3 main.py --config config.json --names YouTube,Netflix +``` + +## 目录规范(推荐) + +建议你的 Gitea 仓库中按以下结构维护源数据: + +```text +rule/Surge/ + YouTube/ + YouTube.list + Netflix/ + Netflix.list +``` + +每个 `.list` 文件示例: + +```text +# 注释会被忽略 +DOMAIN-SUFFIX,youtube.com +DOMAIN-KEYWORD,youtube +IP-CIDR,172.110.32.0/21,no-resolve +USER-AGENT,*youtube* +``` + +## 转换规则说明 + +- Surge 输出:保留源规则(去重、清理空白) +- Clash 输出: + - 自动移除注释/空行 + - `USER-AGENT`、`URL-REGEX` 默认跳过(并在头部记录 `SKIPPED-*`) + - `IP-CIDR`/`IP-CIDR6` 可通过 `clash_no_resolve` 控制是否追加 `no-resolve` + +## 定时更新建议 + +你可以在 Gitea Actions 或系统 `cron` 做定时任务: + +1. 拉取源仓库 +2. 执行 `python3 main.py --config config.toml` +3. 提交 `dist/` 目录到发布仓库或对象存储 + +### Gitea Actions(已内置) + +项目已包含工作流文件: + +- `/Users/yuan/Desktop/workspaces/docker/pve/gitea-shunt-rules/.gitea/workflows/generate-rules.yml` + +你只需要做这几步: + +1. 在仓库根目录放好 `config.toml` 或 `config.json` +2. 在 Gitea 仓库 Secrets 中添加 `GITEA_TOKEN`(私有源仓库建议必须配置) +3. 推送到 `main` 后会自动执行;也可在 Actions 页面手动触发 + +当前定时表达式是 `0 3 * * *`(UTC),对应北京时间(UTC+8)每天 `11:00`。 + +### 发布到独立仓库/分支(已内置) + +如果你希望“生成仓库”和“发布仓库”分离,使用: + +- `/Users/yuan/Desktop/workspaces/docker/pve/gitea-shunt-rules/.gitea/workflows/publish-rules.yml` + +这个工作流会: + +1. 从你指定的源仓库读取规则并生成 `dist/` +2. 把 `dist/` 内容同步到目标仓库的目标分支(可单独设为 `rules` / `gh-pages`) + +需要在 Gitea 仓库中配置: + +- Secrets: + - `GITEA_BASE_URL`:例如 `https://gitea.example.com` + - `GITEA_TOKEN`:需要有读取源仓库 + 推送目标仓库权限 + - `SOURCE_OWNER` + - `SOURCE_REPO` + - `TARGET_OWNER` + - `TARGET_REPO` +- Variables(可选): + - `SOURCE_REF`(默认 `main`) + - `SOURCE_ROOT`(默认 `rule/Surge`) + - `TARGET_BRANCH`(默认 `main`) + - `CLASH_NO_RESOLVE`(默认 `false`) + +该工作流当前定时为 `15 3 * * *`(UTC),对应北京时间每天 `11:15`,也支持手动触发。 diff --git a/config.example.json b/config.example.json new file mode 100644 index 000000000..e7bd54862 --- /dev/null +++ b/config.example.json @@ -0,0 +1,19 @@ +{ + "gitea": { + "base_url": "https://gitea.example.com", + "owner": "rules", + "repo": "ios_rule_script", + "ref": "main", + "token_env": "GITEA_TOKEN" + }, + "source": { + "root": "rule/Surge", + "filename_pattern": "{name}.list", + "include_categories": [], + "exclude_categories": [] + }, + "output": { + "dir": "dist", + "clash_no_resolve": false + } +} diff --git a/config.example.toml b/config.example.toml new file mode 100644 index 000000000..6a016bf31 --- /dev/null +++ b/config.example.toml @@ -0,0 +1,18 @@ +[gitea] +base_url = "https://gitea.example.com" +owner = "rules" +repo = "ios_rule_script" +ref = "main" +token_env = "GITEA_TOKEN" + +[source] +# Usually this is where Surge source rules are stored in your Gitea repo. +root = "rule/Surge" +filename_pattern = "{name}.list" +include_categories = [] +exclude_categories = [] + +[output] +dir = "dist" +# true: IP-CIDR/IP-CIDR6 in Clash payload include no-resolve +clash_no_resolve = false diff --git a/main.py b/main.py new file mode 100644 index 000000000..dbf171aef --- /dev/null +++ b/main.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import base64 +import json +import os +import sys +from collections import Counter +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from urllib.parse import quote +from urllib.request import Request, urlopen +try: + import tomllib +except ModuleNotFoundError: # Python < 3.11 + tomllib = None + + +DEFAULT_LIMIT = 100 +UNSUPPORTED_CLASH_TYPES = { + "USER-AGENT", + "URL-REGEX", + "DEST-PORT", # Surge alias, Clash usually uses DST-PORT +} + + +@dataclass +class Config: + base_url: str + owner: str + repo: str + ref: str + token: str | None + source_root: str + source_filename_pattern: str + output_dir: str + include_categories: list[str] + exclude_categories: list[str] + clash_no_resolve: bool + + +@dataclass(frozen=True) +class RuleLine: + raw: str + rule_type: str + + +class GiteaClient: + def __init__(self, base_url: str, token: str | None): + self.base_url = base_url.rstrip("/") + self.token = token + + def _request_json(self, path: str, params: dict[str, Any] | None = None) -> Any: + url = f"{self.base_url}{path}" + if params: + query = "&".join(f"{quote(str(k))}={quote(str(v))}" for k, v in params.items()) + url = f"{url}?{query}" + + headers = {"Accept": "application/json"} + if self.token: + headers["Authorization"] = f"token {self.token}" + + req = Request(url, headers=headers) + with urlopen(req) as resp: + return json.loads(resp.read().decode("utf-8")) + + def list_dir(self, owner: str, repo: str, path: str, ref: str) -> list[dict[str, Any]]: + encoded_path = quote(path.strip("/"), safe="/") + endpoint = f"/api/v1/repos/{quote(owner)}/{quote(repo)}/contents/{encoded_path}" + + page = 1 + all_items: list[dict[str, Any]] = [] + while True: + data = self._request_json(endpoint, {"ref": ref, "page": page, "limit": DEFAULT_LIMIT}) + if isinstance(data, dict): + raise RuntimeError(f"Path is not a directory: {path}") + items = list(data) + all_items.extend(items) + if len(items) < DEFAULT_LIMIT: + break + page += 1 + + return all_items + + def read_file(self, owner: str, repo: str, path: str, ref: str) -> str: + encoded_path = quote(path.strip("/"), safe="/") + endpoint = f"/api/v1/repos/{quote(owner)}/{quote(repo)}/contents/{encoded_path}" + data = self._request_json(endpoint, {"ref": ref}) + if not isinstance(data, dict): + raise RuntimeError(f"Path is not a file: {path}") + + content = data.get("content") + encoding = data.get("encoding") + if not content: + raise RuntimeError(f"Missing file content for path: {path}") + if encoding != "base64": + raise RuntimeError(f"Unsupported encoding ({encoding}) for path: {path}") + + return base64.b64decode(content).decode("utf-8", errors="replace") + + +def load_config(path: Path) -> Config: + if path.suffix.lower() == ".json": + raw = json.loads(path.read_text(encoding="utf-8")) + else: + if tomllib is None: + raise RuntimeError( + "TOML config requires Python 3.11+. " + "Use Python 3.11+ or provide a JSON config file." + ) + raw = tomllib.loads(path.read_text(encoding="utf-8")) + + gitea = raw.get("gitea", {}) + source = raw.get("source", {}) + output = raw.get("output", {}) + + token_env = gitea.get("token_env", "GITEA_TOKEN") + token = os.getenv(token_env) if token_env else None + + return Config( + base_url=gitea["base_url"], + owner=gitea["owner"], + repo=gitea["repo"], + ref=gitea.get("ref", "main"), + token=token, + source_root=source.get("root", "rule/Surge"), + source_filename_pattern=source.get("filename_pattern", "{name}.list"), + output_dir=output.get("dir", "dist"), + include_categories=source.get("include_categories", []), + exclude_categories=source.get("exclude_categories", []), + clash_no_resolve=output.get("clash_no_resolve", False), + ) + + +def parse_rules(content: str) -> list[RuleLine]: + rules: list[RuleLine] = [] + seen: set[str] = set() + + for original in content.splitlines(): + line = original.strip() + if not line or line.startswith("#"): + continue + + if line in seen: + continue + seen.add(line) + + parts = [part.strip() for part in line.split(",") if part.strip()] + if not parts: + continue + + rules.append(RuleLine(raw=",".join(parts), rule_type=parts[0].upper())) + + return rules + + +def to_clash_payload_line(rule: RuleLine, no_resolve: bool) -> str | None: + parts = [p.strip() for p in rule.raw.split(",") if p.strip()] + if not parts: + return None + + rule_type = parts[0].upper() + parts[0] = rule_type + + if rule_type in UNSUPPORTED_CLASH_TYPES: + return None + + if rule_type in {"IP-CIDR", "IP-CIDR6"}: + payload = [rule_type, parts[1]] if len(parts) >= 2 else parts + if no_resolve: + payload.append("no-resolve") + return ",".join(payload) + + # Strip no-resolve from non-IP rules if present in source. + filtered = [p for p in parts if p.lower() != "no-resolve"] + return ",".join(filtered) + + +def format_surge(name: str, rules: list[RuleLine], source_path: str) -> str: + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + count = Counter(rule.rule_type for rule in rules) + + header = [ + f"# NAME: {name}", + "# AUTHOR: gitea-shunt-rules", + f"# SOURCE: {source_path}", + f"# UPDATED: {now}", + ] + for k in sorted(count): + header.append(f"# {k}: {count[k]}") + header.append(f"# TOTAL: {len(rules)}") + + body = [rule.raw for rule in rules] + return "\n".join(header + body) + "\n" + + +def format_clash(name: str, rules: list[RuleLine], source_path: str, no_resolve: bool) -> str: + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + + payload: list[str] = [] + counter: Counter[str] = Counter() + skipped: Counter[str] = Counter() + + for rule in rules: + converted = to_clash_payload_line(rule, no_resolve=no_resolve) + if converted is None: + skipped[rule.rule_type] += 1 + continue + payload.append(converted) + counter[rule.rule_type] += 1 + + lines = [ + f"# NAME: {name}", + "# AUTHOR: gitea-shunt-rules", + f"# SOURCE: {source_path}", + f"# UPDATED: {now}", + ] + for k in sorted(counter): + lines.append(f"# {k}: {counter[k]}") + for k in sorted(skipped): + lines.append(f"# SKIPPED-{k}: {skipped[k]}") + lines.append(f"# TOTAL: {len(payload)}") + lines.append("payload:") + lines.extend(f" - {item}" for item in payload) + + return "\n".join(lines) + "\n" + + +def should_include_category(name: str, cfg: Config, cli_names: set[str]) -> bool: + if cli_names and name not in cli_names: + return False + if cfg.include_categories and name not in cfg.include_categories: + return False + if name in cfg.exclude_categories: + return False + return True + + +def find_categories(client: GiteaClient, cfg: Config, cli_names: set[str]) -> list[str]: + entries = client.list_dir(cfg.owner, cfg.repo, cfg.source_root, cfg.ref) + categories: list[str] = [] + + for entry in entries: + if entry.get("type") != "dir": + continue + name = entry.get("name") + if not name: + continue + if should_include_category(name, cfg, cli_names): + categories.append(name) + + return sorted(categories) + + +def build_one_category(client: GiteaClient, cfg: Config, name: str, base_out: Path) -> tuple[int, int]: + source_rel_path = f"{cfg.source_root}/{name}/{cfg.source_filename_pattern.format(name=name)}" + source_content = client.read_file(cfg.owner, cfg.repo, source_rel_path, cfg.ref) + + rules = parse_rules(source_content) + + surge_out = base_out / "surge" / f"{name}.list" + clash_out = base_out / "clash" / f"{name}.yaml" + surge_out.parent.mkdir(parents=True, exist_ok=True) + clash_out.parent.mkdir(parents=True, exist_ok=True) + + surge_out.write_text(format_surge(name, rules, source_rel_path), encoding="utf-8") + clash_out.write_text( + format_clash(name, rules, source_rel_path, no_resolve=cfg.clash_no_resolve), + encoding="utf-8", + ) + + return len(rules), sum(1 for r in rules if to_clash_payload_line(r, no_resolve=cfg.clash_no_resolve) is not None) + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Generate Surge/Clash rules from Gitea source repo.") + p.add_argument("--config", default="config.toml", help="Path to config TOML file") + p.add_argument("--names", default="", help="Comma-separated category names, e.g. YouTube,Netflix") + return p.parse_args() + + +def main() -> int: + args = parse_args() + cfg = load_config(Path(args.config)) + names = {x.strip() for x in args.names.split(",") if x.strip()} + + client = GiteaClient(cfg.base_url, cfg.token) + categories = find_categories(client, cfg, names) + + if not categories: + print("No categories found after filtering.", file=sys.stderr) + return 2 + + out_dir = Path(cfg.output_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + total_source = 0 + total_clash = 0 + + print(f"Found {len(categories)} categories under {cfg.source_root}") + for idx, name in enumerate(categories, start=1): + try: + s_cnt, c_cnt = build_one_category(client, cfg, name, out_dir) + total_source += s_cnt + total_clash += c_cnt + print(f"[{idx}/{len(categories)}] {name}: source={s_cnt}, clash={c_cnt}") + except Exception as exc: + print(f"[{idx}/{len(categories)}] {name}: failed: {exc}", file=sys.stderr) + + print(f"Done. source_rules={total_source}, clash_rules={total_clash}, output={out_dir.resolve()}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())