Add manual catalog management
This commit is contained in:
@@ -85,6 +85,8 @@ FILE_DEFAULT_DEVICE_TYPE: Dict[str, str] = {
|
||||
"zhixuan": "phone",
|
||||
}
|
||||
|
||||
MANUAL_SOURCE_FILE = "local/manual_catalog.json"
|
||||
|
||||
|
||||
BRAND_ALIASES: Dict[str, List[str]] = {
|
||||
"360": ["360", "360手机", "奇酷", "qiku"],
|
||||
@@ -239,6 +241,10 @@ class DeviceRecord:
|
||||
section: str
|
||||
|
||||
|
||||
MANUAL_BRAND_ALIAS_OVERRIDES: Dict[str, List[str]] = {}
|
||||
MANUAL_PARENT_BRAND_OVERRIDES: Dict[str, str] = {}
|
||||
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
return re.sub(r"[^0-9a-z\u4e00-\u9fff]+", "", text.lower())
|
||||
|
||||
@@ -249,6 +255,7 @@ def canonical_brand(file_stem: str) -> str:
|
||||
|
||||
def brand_aliases(brand: str) -> List[str]:
|
||||
aliases = set(BRAND_ALIASES.get(brand, []))
|
||||
aliases.update(MANUAL_BRAND_ALIAS_OVERRIDES.get(brand, []))
|
||||
aliases.add(brand)
|
||||
return sorted(aliases)
|
||||
|
||||
@@ -265,9 +272,108 @@ def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
||||
|
||||
|
||||
def resolve_parent_brand(manufacturer_brand: str) -> str:
|
||||
if manufacturer_brand in MANUAL_PARENT_BRAND_OVERRIDES:
|
||||
return MANUAL_PARENT_BRAND_OVERRIDES[manufacturer_brand]
|
||||
return MANUFACTURER_PARENT_BRAND.get(manufacturer_brand, manufacturer_brand)
|
||||
|
||||
|
||||
def reset_manual_overrides() -> None:
|
||||
MANUAL_BRAND_ALIAS_OVERRIDES.clear()
|
||||
MANUAL_PARENT_BRAND_OVERRIDES.clear()
|
||||
|
||||
|
||||
def normalize_alias_list(*groups: object) -> List[str]:
|
||||
aliases: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
for group in groups:
|
||||
if group is None:
|
||||
continue
|
||||
items = group if isinstance(group, (list, tuple, set)) else [group]
|
||||
for item in items:
|
||||
text = str(item or "").strip()
|
||||
key = normalize_text(text)
|
||||
if not text or not key or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
aliases.append(text)
|
||||
return aliases
|
||||
|
||||
|
||||
def load_manual_catalog(repo_root: Path) -> dict[str, object]:
|
||||
path = repo_root / MANUAL_SOURCE_FILE
|
||||
if not path.exists():
|
||||
return {"brands": [], "devices": []}
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
raise RuntimeError(f"{MANUAL_SOURCE_FILE} 必须是 JSON 对象。")
|
||||
brands = payload.get("brands")
|
||||
devices = payload.get("devices")
|
||||
return {
|
||||
"brands": brands if isinstance(brands, list) else [],
|
||||
"devices": devices if isinstance(devices, list) else [],
|
||||
}
|
||||
|
||||
|
||||
def apply_manual_brand_overrides(manual_catalog: dict[str, object]) -> dict[str, str]:
|
||||
reset_manual_overrides()
|
||||
brand_parent_map: dict[str, str] = {}
|
||||
for raw_brand in manual_catalog.get("brands", []):
|
||||
if not isinstance(raw_brand, dict):
|
||||
continue
|
||||
brand_name = str(raw_brand.get("name") or "").strip()
|
||||
if not brand_name:
|
||||
continue
|
||||
aliases = normalize_alias_list(brand_name, raw_brand.get("aliases"))
|
||||
parent_brand = str(raw_brand.get("parent_brand") or brand_name).strip() or brand_name
|
||||
MANUAL_BRAND_ALIAS_OVERRIDES[brand_name] = aliases
|
||||
MANUAL_PARENT_BRAND_OVERRIDES[brand_name] = parent_brand
|
||||
brand_parent_map[brand_name] = parent_brand
|
||||
return brand_parent_map
|
||||
|
||||
|
||||
def parse_manual_catalog(repo_root: Path, manual_catalog: dict[str, object]) -> List[DeviceRecord]:
|
||||
brand_parent_map = apply_manual_brand_overrides(manual_catalog)
|
||||
records: List[DeviceRecord] = []
|
||||
|
||||
for raw_device in manual_catalog.get("devices", []):
|
||||
if not isinstance(raw_device, dict):
|
||||
continue
|
||||
brand = str(raw_device.get("brand") or "").strip()
|
||||
device_name = str(raw_device.get("device_name") or "").strip()
|
||||
if not brand or not device_name:
|
||||
continue
|
||||
|
||||
aliases = normalize_alias_list(
|
||||
raw_device.get("models"),
|
||||
device_name,
|
||||
raw_device.get("aliases"),
|
||||
)
|
||||
if not aliases:
|
||||
continue
|
||||
|
||||
device_type = str(raw_device.get("device_type") or "").strip() or "other"
|
||||
section = str(raw_device.get("section") or "手动补录").strip() or "手动补录"
|
||||
record_id = str(raw_device.get("id") or "").strip() or f"manual:{normalize_text(brand)}:{normalize_text(device_name)}"
|
||||
parent_brand = brand_parent_map.get(brand, str(raw_device.get("parent_brand") or brand).strip() or brand)
|
||||
|
||||
records.append(
|
||||
DeviceRecord(
|
||||
id=record_id,
|
||||
device_name=device_name,
|
||||
brand=brand,
|
||||
manufacturer_brand=brand,
|
||||
parent_brand=parent_brand,
|
||||
market_brand=brand,
|
||||
device_type=device_type,
|
||||
aliases=aliases,
|
||||
source_file=MANUAL_SOURCE_FILE,
|
||||
section=section,
|
||||
)
|
||||
)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def infer_market_brand(
|
||||
manufacturer_brand: str,
|
||||
device_name: str,
|
||||
@@ -647,10 +753,13 @@ class DeviceMapper:
|
||||
def build_records(repo_root: Path) -> List[DeviceRecord]:
|
||||
brands_dir = repo_root / "brands"
|
||||
records: List[DeviceRecord] = []
|
||||
manual_catalog = load_manual_catalog(repo_root)
|
||||
apply_manual_brand_overrides(manual_catalog)
|
||||
|
||||
for md_path in sorted(brands_dir.glob("*.md")):
|
||||
records.extend(parse_brand_file(md_path))
|
||||
|
||||
records.extend(parse_manual_catalog(repo_root, manual_catalog))
|
||||
return records
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user