Add configurable upstream sync proxy and schedule settings
This commit is contained in:
@@ -9,7 +9,8 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
import threading
|
||||
from datetime import datetime
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from http import HTTPStatus
|
||||
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
|
||||
from pathlib import Path
|
||||
@@ -24,12 +25,183 @@ MYSQL_SEED_PATH = PROJECT_ROOT / "dist/mobilemodels_mysql_seed.sql"
|
||||
MYSQL_LOADER = PROJECT_ROOT / "tools/load_mysql_seed.py"
|
||||
DATA_ROOT = Path(os.environ.get("MOBILEMODELS_DATA_ROOT", "/data"))
|
||||
SYNC_METADATA_PATH = DATA_ROOT / "state/sync_status.json"
|
||||
SCHEDULE_CONFIG_PATH = DATA_ROOT / "state/sync_schedule.json"
|
||||
SYNC_LOCK = threading.Lock()
|
||||
SCHEDULE_LOCK = threading.Lock()
|
||||
NORMALIZE_RE = re.compile(r"[^0-9a-z\u4e00-\u9fff]+")
|
||||
SCHEDULE_TIME_RE = re.compile(r"^(?:[01]?\d|2[0-3]):[0-5]\d$")
|
||||
SCHEDULER_POLL_SECONDS = 20
|
||||
|
||||
|
||||
def truthy_env(name: str, default: str = "0") -> bool:
|
||||
return os.environ.get(name, default).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def apply_timezone_from_env() -> None:
|
||||
if not os.environ.get("TZ"):
|
||||
return
|
||||
try:
|
||||
time.tzset()
|
||||
except AttributeError:
|
||||
return
|
||||
|
||||
|
||||
def mysql_auto_load_enabled() -> bool:
|
||||
return os.environ.get("MYSQL_AUTO_LOAD", "0").strip().lower() in {"1", "true", "yes", "on"}
|
||||
return truthy_env("MYSQL_AUTO_LOAD", "0")
|
||||
|
||||
|
||||
def local_now() -> datetime:
|
||||
return datetime.now().astimezone()
|
||||
|
||||
|
||||
def normalize_schedule_time(value: str | None, *, fallback: str = "03:00") -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
text = fallback
|
||||
if not SCHEDULE_TIME_RE.match(text):
|
||||
if fallback and text != fallback:
|
||||
return normalize_schedule_time(fallback, fallback="")
|
||||
raise RuntimeError("每日同步时间格式必须为 HH:MM,例如 03:00。")
|
||||
hour, minute = text.split(":", 1)
|
||||
return f"{int(hour):02d}:{int(minute):02d}"
|
||||
|
||||
|
||||
def normalize_github_proxy_prefix(value: str | None) -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
if "://" not in text:
|
||||
raise RuntimeError("GitHub 加速前缀必须包含协议,例如 https://ghfast.top/")
|
||||
if not text.endswith("/"):
|
||||
text = f"{text}/"
|
||||
return text
|
||||
|
||||
|
||||
def get_effective_repo_url(github_proxy_prefix: str | None = None) -> str:
|
||||
prefix = normalize_github_proxy_prefix(
|
||||
github_proxy_prefix if github_proxy_prefix is not None else os.environ.get("GITHUB_PROXY_PREFIX", "")
|
||||
)
|
||||
return f"{prefix}{DEFAULT_REPO_URL}" if prefix else DEFAULT_REPO_URL
|
||||
|
||||
|
||||
def compute_next_run_at(daily_time: str, now: datetime | None = None) -> str:
|
||||
current = now or local_now()
|
||||
hour_text, minute_text = daily_time.split(":", 1)
|
||||
candidate = current.replace(
|
||||
hour=int(hour_text),
|
||||
minute=int(minute_text),
|
||||
second=0,
|
||||
microsecond=0,
|
||||
)
|
||||
if candidate <= current:
|
||||
candidate += timedelta(days=1)
|
||||
return candidate.isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def default_schedule_config() -> dict[str, object]:
|
||||
enabled = truthy_env("SYNC_SCHEDULE_ENABLED", "0")
|
||||
daily_time = normalize_schedule_time(os.environ.get("SYNC_SCHEDULE_TIME", "03:00"))
|
||||
timezone_name = os.environ.get("TZ", "UTC").strip() or "UTC"
|
||||
github_proxy_prefix = normalize_github_proxy_prefix(os.environ.get("GITHUB_PROXY_PREFIX", ""))
|
||||
return {
|
||||
"enabled": enabled,
|
||||
"daily_time": daily_time,
|
||||
"timezone": timezone_name,
|
||||
"github_proxy_prefix": github_proxy_prefix,
|
||||
"next_run_at": compute_next_run_at(daily_time) if enabled else None,
|
||||
"last_run_time": None,
|
||||
"last_run_status": None,
|
||||
"last_run_message": None,
|
||||
"updated_at": None,
|
||||
}
|
||||
|
||||
|
||||
def normalize_schedule_config(raw: dict[str, object] | None) -> dict[str, object]:
|
||||
config = default_schedule_config()
|
||||
if isinstance(raw, dict):
|
||||
if "enabled" in raw:
|
||||
value = raw.get("enabled")
|
||||
config["enabled"] = value if isinstance(value, bool) else str(value).strip().lower() in {"1", "true", "yes", "on"}
|
||||
if "daily_time" in raw:
|
||||
try:
|
||||
config["daily_time"] = normalize_schedule_time(str(raw.get("daily_time") or ""))
|
||||
except RuntimeError:
|
||||
config["daily_time"] = normalize_schedule_time(os.environ.get("SYNC_SCHEDULE_TIME", "03:00"))
|
||||
if raw.get("timezone"):
|
||||
config["timezone"] = str(raw.get("timezone")).strip() or config["timezone"]
|
||||
if "github_proxy_prefix" in raw:
|
||||
try:
|
||||
config["github_proxy_prefix"] = normalize_github_proxy_prefix(str(raw.get("github_proxy_prefix") or ""))
|
||||
except RuntimeError:
|
||||
config["github_proxy_prefix"] = normalize_github_proxy_prefix(os.environ.get("GITHUB_PROXY_PREFIX", ""))
|
||||
for key in ("last_run_time", "last_run_status", "last_run_message", "updated_at"):
|
||||
if raw.get(key) is not None:
|
||||
config[key] = raw.get(key)
|
||||
next_run_at = raw.get("next_run_at")
|
||||
if config["enabled"] and isinstance(next_run_at, str) and next_run_at.strip():
|
||||
try:
|
||||
datetime.fromisoformat(next_run_at)
|
||||
config["next_run_at"] = next_run_at
|
||||
except ValueError:
|
||||
config["next_run_at"] = compute_next_run_at(str(config["daily_time"]))
|
||||
else:
|
||||
config["next_run_at"] = compute_next_run_at(str(config["daily_time"])) if config["enabled"] else None
|
||||
return config
|
||||
|
||||
|
||||
def read_schedule_config() -> dict[str, object]:
|
||||
with SCHEDULE_LOCK:
|
||||
if not SCHEDULE_CONFIG_PATH.exists():
|
||||
return normalize_schedule_config(None)
|
||||
try:
|
||||
payload = json.loads(SCHEDULE_CONFIG_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return normalize_schedule_config(None)
|
||||
return normalize_schedule_config(payload if isinstance(payload, dict) else None)
|
||||
|
||||
|
||||
def write_schedule_config(payload: dict[str, object]) -> dict[str, object]:
|
||||
normalized = normalize_schedule_config(payload)
|
||||
with SCHEDULE_LOCK:
|
||||
SCHEDULE_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
SCHEDULE_CONFIG_PATH.write_text(
|
||||
json.dumps(normalized, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def update_schedule_config(payload: dict[str, object]) -> dict[str, object]:
|
||||
current = read_schedule_config()
|
||||
enabled_raw = payload.get("enabled", current.get("enabled", False))
|
||||
enabled = enabled_raw if isinstance(enabled_raw, bool) else str(enabled_raw).strip().lower() in {"1", "true", "yes", "on"}
|
||||
daily_time = normalize_schedule_time(str(payload.get("daily_time") or current.get("daily_time") or "03:00"))
|
||||
github_proxy_prefix = normalize_github_proxy_prefix(
|
||||
str(payload.get("github_proxy_prefix") if "github_proxy_prefix" in payload else current.get("github_proxy_prefix") or "")
|
||||
)
|
||||
updated = {
|
||||
**current,
|
||||
"enabled": enabled,
|
||||
"daily_time": daily_time,
|
||||
"timezone": os.environ.get("TZ", "UTC").strip() or "UTC",
|
||||
"github_proxy_prefix": github_proxy_prefix,
|
||||
"next_run_at": compute_next_run_at(daily_time) if enabled else None,
|
||||
"updated_at": local_now().isoformat(timespec="seconds"),
|
||||
}
|
||||
return write_schedule_config(updated)
|
||||
|
||||
|
||||
def mark_schedule_run(status: str, message: str) -> dict[str, object]:
|
||||
current = read_schedule_config()
|
||||
updated = {
|
||||
**current,
|
||||
"last_run_time": local_now().isoformat(timespec="seconds"),
|
||||
"last_run_status": status,
|
||||
"last_run_message": message,
|
||||
"next_run_at": compute_next_run_at(str(current.get("daily_time") or "03:00")) if current.get("enabled") else None,
|
||||
"updated_at": local_now().isoformat(timespec="seconds"),
|
||||
}
|
||||
return write_schedule_config(updated)
|
||||
|
||||
|
||||
def run_command(args: list[str]) -> subprocess.CompletedProcess[str]:
|
||||
@@ -188,6 +360,9 @@ def get_status_payload() -> dict[str, object]:
|
||||
mysql_ready = False
|
||||
mysql_status = ""
|
||||
sync_metadata = read_sync_metadata()
|
||||
schedule_config = read_schedule_config()
|
||||
github_proxy_prefix = str(schedule_config.get("github_proxy_prefix") or "")
|
||||
effective_repo_url = get_effective_repo_url(github_proxy_prefix)
|
||||
if mysql_auto_load:
|
||||
mysql_proc = run_command(["python3", str(MYSQL_LOADER), "--check-only", "--wait-timeout", "5"])
|
||||
if mysql_proc.returncode == 0:
|
||||
@@ -206,13 +381,24 @@ def get_status_payload() -> dict[str, object]:
|
||||
"data_root": str(DATA_ROOT),
|
||||
"mysql_auto_load": mysql_auto_load,
|
||||
"upstream_repo_url": DEFAULT_REPO_URL,
|
||||
"effective_upstream_repo_url": effective_repo_url,
|
||||
"upstream_branch": DEFAULT_BRANCH,
|
||||
"last_sync_time": sync_metadata.get("last_sync_time"),
|
||||
"last_upstream_commit": sync_metadata.get("last_upstream_commit"),
|
||||
"last_sync_trigger": sync_metadata.get("last_trigger_source"),
|
||||
"index_file": str(INDEX_PATH.relative_to(PROJECT_ROOT)),
|
||||
"index_mtime": index_mtime,
|
||||
"mysql_seed_file": str(MYSQL_SEED_PATH.relative_to(PROJECT_ROOT)),
|
||||
"mysql_seed_mtime": mysql_seed_mtime,
|
||||
"sync_schedule_file": str(SCHEDULE_CONFIG_PATH.relative_to(DATA_ROOT)),
|
||||
"sync_schedule_enabled": schedule_config.get("enabled"),
|
||||
"sync_schedule_time": schedule_config.get("daily_time"),
|
||||
"sync_schedule_timezone": schedule_config.get("timezone"),
|
||||
"github_proxy_prefix": github_proxy_prefix,
|
||||
"sync_schedule_next_run": schedule_config.get("next_run_at"),
|
||||
"sync_schedule_last_run_time": schedule_config.get("last_run_time"),
|
||||
"sync_schedule_last_run_status": schedule_config.get("last_run_status"),
|
||||
"sync_schedule_last_run_message": schedule_config.get("last_run_message"),
|
||||
"mysql_host": mysql_host,
|
||||
"mysql_port": mysql_port,
|
||||
"mysql_database": mysql_database,
|
||||
@@ -223,13 +409,16 @@ def get_status_payload() -> dict[str, object]:
|
||||
}
|
||||
|
||||
|
||||
def run_upstream_sync() -> dict[str, object]:
|
||||
def run_upstream_sync(trigger_source: str = "manual") -> dict[str, object]:
|
||||
if not SYNC_LOCK.acquire(blocking=False):
|
||||
raise RuntimeError("已有同步任务在执行,请稍后再试。")
|
||||
|
||||
try:
|
||||
schedule_config = read_schedule_config()
|
||||
github_proxy_prefix = str(schedule_config.get("github_proxy_prefix") or "")
|
||||
effective_repo_url = get_effective_repo_url(github_proxy_prefix)
|
||||
upstream_proc = run_command(
|
||||
["git", "ls-remote", DEFAULT_REPO_URL, f"refs/heads/{DEFAULT_BRANCH}"]
|
||||
["git", "ls-remote", effective_repo_url, f"refs/heads/{DEFAULT_BRANCH}"]
|
||||
)
|
||||
upstream_commit = ""
|
||||
if upstream_proc.returncode == 0 and upstream_proc.stdout.strip():
|
||||
@@ -238,6 +427,7 @@ def run_upstream_sync() -> dict[str, object]:
|
||||
command = [
|
||||
"python3",
|
||||
str(SYNC_SCRIPT),
|
||||
f"--repo-url={effective_repo_url}",
|
||||
"--build-index",
|
||||
"--export-mysql-seed",
|
||||
]
|
||||
@@ -257,8 +447,11 @@ def run_upstream_sync() -> dict[str, object]:
|
||||
"workspace_root": str(WORKSPACE_ROOT),
|
||||
"data_root": str(DATA_ROOT),
|
||||
"upstream_repo_url": DEFAULT_REPO_URL,
|
||||
"effective_upstream_repo_url": effective_repo_url,
|
||||
"github_proxy_prefix": github_proxy_prefix,
|
||||
"upstream_branch": DEFAULT_BRANCH,
|
||||
"upstream_commit": upstream_commit,
|
||||
"trigger_source": trigger_source,
|
||||
"last_sync_time": datetime.now().isoformat(timespec="seconds"),
|
||||
"last_upstream_commit": upstream_commit,
|
||||
"index_file": str(INDEX_PATH.relative_to(PROJECT_ROOT)),
|
||||
@@ -274,7 +467,10 @@ def run_upstream_sync() -> dict[str, object]:
|
||||
write_sync_metadata({
|
||||
"last_sync_time": payload["last_sync_time"],
|
||||
"last_upstream_commit": payload["last_upstream_commit"],
|
||||
"last_trigger_source": trigger_source,
|
||||
"upstream_repo_url": DEFAULT_REPO_URL,
|
||||
"effective_upstream_repo_url": effective_repo_url,
|
||||
"github_proxy_prefix": github_proxy_prefix,
|
||||
"upstream_branch": DEFAULT_BRANCH,
|
||||
})
|
||||
return payload
|
||||
@@ -282,6 +478,54 @@ def run_upstream_sync() -> dict[str, object]:
|
||||
SYNC_LOCK.release()
|
||||
|
||||
|
||||
def run_scheduled_sync_if_due() -> None:
|
||||
schedule_config = read_schedule_config()
|
||||
if not schedule_config.get("enabled"):
|
||||
return
|
||||
|
||||
next_run_at = str(schedule_config.get("next_run_at") or "").strip()
|
||||
if not next_run_at:
|
||||
write_schedule_config({
|
||||
**schedule_config,
|
||||
"next_run_at": compute_next_run_at(str(schedule_config.get("daily_time") or "03:00")),
|
||||
})
|
||||
return
|
||||
|
||||
try:
|
||||
next_run_dt = datetime.fromisoformat(next_run_at)
|
||||
except ValueError:
|
||||
write_schedule_config({
|
||||
**schedule_config,
|
||||
"next_run_at": compute_next_run_at(str(schedule_config.get("daily_time") or "03:00")),
|
||||
})
|
||||
return
|
||||
|
||||
if local_now() < next_run_dt:
|
||||
return
|
||||
|
||||
try:
|
||||
payload = run_upstream_sync(trigger_source="schedule")
|
||||
message = str(payload.get("output") or "定时同步完成。")
|
||||
mark_schedule_run("success", message)
|
||||
print(f"[scheduler] upstream sync completed at {local_now().isoformat(timespec='seconds')}")
|
||||
except RuntimeError as err:
|
||||
status = "skipped" if "已有同步任务" in str(err) else "failed"
|
||||
mark_schedule_run(status, str(err))
|
||||
print(f"[scheduler] upstream sync {status}: {err}")
|
||||
except Exception as err:
|
||||
mark_schedule_run("failed", str(err))
|
||||
print(f"[scheduler] upstream sync failed: {err}")
|
||||
|
||||
|
||||
def scheduler_loop() -> None:
|
||||
while True:
|
||||
try:
|
||||
run_scheduled_sync_if_due()
|
||||
except Exception as err:
|
||||
print(f"[scheduler] loop error: {err}")
|
||||
time.sleep(SCHEDULER_POLL_SECONDS)
|
||||
|
||||
|
||||
class MobileModelsHandler(SimpleHTTPRequestHandler):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, directory=str(PROJECT_ROOT), **kwargs)
|
||||
@@ -338,6 +582,35 @@ class MobileModelsHandler(SimpleHTTPRequestHandler):
|
||||
except Exception as err:
|
||||
self._send_json({"error": str(err)}, status=HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||
return
|
||||
if self.path == "/api/sync-schedule":
|
||||
try:
|
||||
content_length = int(self.headers.get("Content-Length", "0") or "0")
|
||||
raw_body = self.rfile.read(content_length) if content_length > 0 else b"{}"
|
||||
req = json.loads(raw_body.decode("utf-8") or "{}")
|
||||
if not isinstance(req, dict):
|
||||
raise RuntimeError("请求体必须是 JSON 对象。")
|
||||
schedule_config = update_schedule_config(req)
|
||||
self._send_json(
|
||||
{
|
||||
"message": "同步设置已保存。",
|
||||
"sync_schedule_enabled": schedule_config.get("enabled"),
|
||||
"sync_schedule_time": schedule_config.get("daily_time"),
|
||||
"sync_schedule_timezone": schedule_config.get("timezone"),
|
||||
"github_proxy_prefix": schedule_config.get("github_proxy_prefix"),
|
||||
"effective_upstream_repo_url": get_effective_repo_url(
|
||||
str(schedule_config.get("github_proxy_prefix") or "")
|
||||
),
|
||||
"sync_schedule_next_run": schedule_config.get("next_run_at"),
|
||||
"sync_schedule_last_run_time": schedule_config.get("last_run_time"),
|
||||
"sync_schedule_last_run_status": schedule_config.get("last_run_status"),
|
||||
"sync_schedule_last_run_message": schedule_config.get("last_run_message"),
|
||||
}
|
||||
)
|
||||
except RuntimeError as err:
|
||||
self._send_json({"error": str(err)}, status=HTTPStatus.BAD_REQUEST)
|
||||
except Exception as err:
|
||||
self._send_json({"error": str(err)}, status=HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||
return
|
||||
|
||||
self._send_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
|
||||
|
||||
@@ -350,7 +623,11 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
|
||||
def main() -> int:
|
||||
apply_timezone_from_env()
|
||||
write_schedule_config(read_schedule_config())
|
||||
args = parse_args()
|
||||
scheduler = threading.Thread(target=scheduler_loop, name="sync-scheduler", daemon=True)
|
||||
scheduler.start()
|
||||
server = ThreadingHTTPServer((args.host, args.port), MobileModelsHandler)
|
||||
print(f"Serving MobileModels on http://{args.host}:{args.port}")
|
||||
server.serve_forever()
|
||||
|
||||
Reference in New Issue
Block a user