Add manual catalog management

This commit is contained in:
2026-04-14 18:24:49 +08:00
parent 0c01b91fd7
commit 89bdf2ff61
7 changed files with 1062 additions and 15 deletions
+109
View File
@@ -85,6 +85,8 @@ FILE_DEFAULT_DEVICE_TYPE: Dict[str, str] = {
"zhixuan": "phone",
}
MANUAL_SOURCE_FILE = "local/manual_catalog.json"
BRAND_ALIASES: Dict[str, List[str]] = {
"360": ["360", "360手机", "奇酷", "qiku"],
@@ -239,6 +241,10 @@ class DeviceRecord:
section: str
MANUAL_BRAND_ALIAS_OVERRIDES: Dict[str, List[str]] = {}
MANUAL_PARENT_BRAND_OVERRIDES: Dict[str, str] = {}
def normalize_text(text: str) -> str:
return re.sub(r"[^0-9a-z\u4e00-\u9fff]+", "", text.lower())
@@ -249,6 +255,7 @@ def canonical_brand(file_stem: str) -> str:
def brand_aliases(brand: str) -> List[str]:
aliases = set(BRAND_ALIASES.get(brand, []))
aliases.update(MANUAL_BRAND_ALIAS_OVERRIDES.get(brand, []))
aliases.add(brand)
return sorted(aliases)
@@ -265,9 +272,108 @@ def has_keyword(text: str, keywords: Iterable[str]) -> bool:
def resolve_parent_brand(manufacturer_brand: str) -> str:
if manufacturer_brand in MANUAL_PARENT_BRAND_OVERRIDES:
return MANUAL_PARENT_BRAND_OVERRIDES[manufacturer_brand]
return MANUFACTURER_PARENT_BRAND.get(manufacturer_brand, manufacturer_brand)
def reset_manual_overrides() -> None:
MANUAL_BRAND_ALIAS_OVERRIDES.clear()
MANUAL_PARENT_BRAND_OVERRIDES.clear()
def normalize_alias_list(*groups: object) -> List[str]:
aliases: List[str] = []
seen: Set[str] = set()
for group in groups:
if group is None:
continue
items = group if isinstance(group, (list, tuple, set)) else [group]
for item in items:
text = str(item or "").strip()
key = normalize_text(text)
if not text or not key or key in seen:
continue
seen.add(key)
aliases.append(text)
return aliases
def load_manual_catalog(repo_root: Path) -> dict[str, object]:
path = repo_root / MANUAL_SOURCE_FILE
if not path.exists():
return {"brands": [], "devices": []}
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"{MANUAL_SOURCE_FILE} 必须是 JSON 对象。")
brands = payload.get("brands")
devices = payload.get("devices")
return {
"brands": brands if isinstance(brands, list) else [],
"devices": devices if isinstance(devices, list) else [],
}
def apply_manual_brand_overrides(manual_catalog: dict[str, object]) -> dict[str, str]:
reset_manual_overrides()
brand_parent_map: dict[str, str] = {}
for raw_brand in manual_catalog.get("brands", []):
if not isinstance(raw_brand, dict):
continue
brand_name = str(raw_brand.get("name") or "").strip()
if not brand_name:
continue
aliases = normalize_alias_list(brand_name, raw_brand.get("aliases"))
parent_brand = str(raw_brand.get("parent_brand") or brand_name).strip() or brand_name
MANUAL_BRAND_ALIAS_OVERRIDES[brand_name] = aliases
MANUAL_PARENT_BRAND_OVERRIDES[brand_name] = parent_brand
brand_parent_map[brand_name] = parent_brand
return brand_parent_map
def parse_manual_catalog(repo_root: Path, manual_catalog: dict[str, object]) -> List[DeviceRecord]:
brand_parent_map = apply_manual_brand_overrides(manual_catalog)
records: List[DeviceRecord] = []
for raw_device in manual_catalog.get("devices", []):
if not isinstance(raw_device, dict):
continue
brand = str(raw_device.get("brand") or "").strip()
device_name = str(raw_device.get("device_name") or "").strip()
if not brand or not device_name:
continue
aliases = normalize_alias_list(
raw_device.get("models"),
device_name,
raw_device.get("aliases"),
)
if not aliases:
continue
device_type = str(raw_device.get("device_type") or "").strip() or "other"
section = str(raw_device.get("section") or "手动补录").strip() or "手动补录"
record_id = str(raw_device.get("id") or "").strip() or f"manual:{normalize_text(brand)}:{normalize_text(device_name)}"
parent_brand = brand_parent_map.get(brand, str(raw_device.get("parent_brand") or brand).strip() or brand)
records.append(
DeviceRecord(
id=record_id,
device_name=device_name,
brand=brand,
manufacturer_brand=brand,
parent_brand=parent_brand,
market_brand=brand,
device_type=device_type,
aliases=aliases,
source_file=MANUAL_SOURCE_FILE,
section=section,
)
)
return records
def infer_market_brand(
manufacturer_brand: str,
device_name: str,
@@ -647,10 +753,13 @@ class DeviceMapper:
def build_records(repo_root: Path) -> List[DeviceRecord]:
brands_dir = repo_root / "brands"
records: List[DeviceRecord] = []
manual_catalog = load_manual_catalog(repo_root)
apply_manual_brand_overrides(manual_catalog)
for md_path in sorted(brands_dir.glob("*.md")):
records.extend(parse_brand_file(md_path))
records.extend(parse_manual_catalog(repo_root, manual_catalog))
return records