Files
MobileModels/tools/device_mapper.py
T
2026-04-24 10:15:55 +08:00

905 lines
29 KiB
Python

#!/usr/bin/env python3
"""Build and query a cross-platform device mapping index from MobileModels markdown data."""
from __future__ import annotations
import argparse
from collections import Counter
import json
import re
from dataclasses import asdict, dataclass
from datetime import date
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Set
from project_layout import PROJECT_ROOT, WORKSPACE_ROOT
ENTRY_RE = re.compile(r"^\*\*(.+?)\*\*\s*$")
VARIANT_RE = re.compile(r"^\s*((?:`[^`]+`\s*)+):\s*(.+?)\s*$")
BACKTICK_RE = re.compile(r"`([^`]+)`")
SECTION_RE = re.compile(r"^##\s+(.+?)\s*$")
FILE_BRAND_MAP: Dict[str, str] = {
"360shouji": "360",
"apple_all": "Apple",
"apple_all_en": "Apple",
"apple_cn": "Apple",
"asus_cn": "ASUS",
"asus_en": "ASUS",
"blackshark": "Black Shark",
"blackshark_en": "Black Shark",
"coolpad": "Coolpad",
"google": "Google",
"honor_cn": "HONOR",
"honor_global_en": "HONOR",
"huawei_cn": "HUAWEI",
"huawei_global_en": "HUAWEI",
"lenovo_cn": "Lenovo",
"letv": "LeTV",
"meizu": "Meizu",
"meizu_en": "Meizu",
"mitv_cn": "Xiaomi",
"mitv_global_en": "Xiaomi",
"motorola_cn": "Motorola",
"nokia_cn": "Nokia",
"nothing": "Nothing",
"nubia": "nubia",
"oneplus": "OnePlus",
"oneplus_en": "OnePlus",
"oppo_cn": "OPPO",
"oppo_global_en": "OPPO",
"realme_cn": "realme",
"realme_global_en": "realme",
"samsung_cn": "Samsung",
"samsung_global_en": "Samsung",
"smartisan": "Smartisan",
"sony": "Sony",
"sony_cn": "Sony",
"vivo_cn": "vivo",
"vivo_global_en": "vivo",
"xiaomi": "Xiaomi",
"xiaomi_cn": "Xiaomi",
"xiaomi_en": "Xiaomi",
"xiaomi-wear": "Xiaomi",
"zhixuan": "HUAWEI Smart Selection",
"zte_cn": "ZTE",
}
FILE_DEFAULT_DEVICE_TYPE: Dict[str, str] = {
"mitv_cn": "tv",
"mitv_global_en": "tv",
"xiaomi-wear": "wear",
"apple_all": "phone",
"apple_all_en": "phone",
"apple_cn": "phone",
"google": "phone",
"honor_cn": "phone",
"honor_global_en": "phone",
"huawei_cn": "phone",
"huawei_global_en": "phone",
"xiaomi": "phone",
"xiaomi_cn": "phone",
"xiaomi_en": "phone",
"zhixuan": "phone",
}
MANUAL_SOURCE_FILE = "local/manual_catalog.json"
BRAND_ALIASES: Dict[str, List[str]] = {
"360": ["360", "360手机", "奇酷", "qiku"],
"Apple": ["apple", "苹果", "iphone", "ipad", "ipod"],
"ASUS": ["asus", "华硕", "rog", "zenfone"],
"Black Shark": ["black shark", "blackshark", "黑鲨"],
"Coolpad": ["coolpad", "酷派"],
"Google": ["google", "pixel"],
"HONOR": ["honor", "荣耀"],
"HUAWEI": ["huawei", "华为"],
"HUAWEI Smart Selection": ["华为智选", "zhixuan", "umagic", "wiko", "hi nova", "nzone"],
"Lenovo": ["lenovo", "联想", "zuk", "拯救者"],
"LeTV": ["letv", "乐视"],
"Meizu": ["meizu", "魅族"],
"Motorola": ["motorola", "摩托罗拉", "moto"],
"Nokia": ["nokia", "诺基亚"],
"Nothing": ["nothing", "cmf"],
"nubia": ["nubia", "努比亚", "红魔", "redmagic"],
"iQOO": ["iqoo", "i qoo", "艾酷"],
"OnePlus": ["oneplus", "一加"],
"OPPO": ["oppo"],
"POCO": ["poco"],
"Redmi": ["redmi", "红米", "hongmi"],
"realme": ["realme", "真我"],
"Samsung": ["samsung", "三星", "galaxy"],
"Smartisan": ["smartisan", "锤子", "坚果"],
"Sony": ["sony", "索尼", "xperia"],
"vivo": ["vivo"],
"Xiaomi": ["xiaomi", "小米", "mi", "米家", "mipad"],
"ZTE": ["zte", "中兴"],
}
MANUFACTURER_PARENT_BRAND: Dict[str, str] = {
"Black Shark": "Xiaomi",
"HUAWEI Smart Selection": "HUAWEI",
"Motorola": "Lenovo",
"iQOO": "vivo",
"POCO": "Xiaomi",
"Redmi": "Xiaomi",
"OnePlus": "OPPO",
"realme": "OPPO",
"nubia": "ZTE",
}
MARKET_BRAND_ALIASES: Dict[str, List[str]] = {
"iQOO": ["iqoo", "i qoo", "艾酷"],
"POCO": ["poco"],
"Redmi": ["redmi", "红米", "hongmi"],
"Xiaomi": ["xiaomi", "小米", "mi", "mipad", "米家"],
}
MARKET_BRAND_TO_MANUFACTURER: Dict[str, str] = {
"iQOO": "vivo",
"POCO": "Xiaomi",
"Redmi": "Xiaomi",
"Xiaomi": "Xiaomi",
}
TV_KEYWORDS = [
"tv",
"电视",
"智慧屏",
"smart tv",
"机顶盒",
"tv box",
"stick",
"dongle",
]
TABLET_KEYWORDS = [
"ipad",
"tablet",
"tab",
"pad",
"平板",
"matepad",
]
WEAR_KEYWORDS = [
"watch",
"smartwatch",
"手表",
"手环",
"band",
"wear",
"wearable",
"buds",
"earbuds",
"耳机",
"tws",
"eyewear",
"glasses",
"眼镜",
]
COMPUTER_KEYWORDS = [
"matebook",
"macbook",
"笔记本",
"电脑",
"laptop",
"notebook",
"desktop",
"workstation",
]
OTHER_KEYWORDS = [
"vision",
"vr",
"ipod",
"airpods",
]
PHONE_KEYWORDS = [
"iphone",
"phone",
"手机",
"galaxy",
"pixel",
"xiaomi",
"redmi",
"poco",
"honor",
"huawei",
"mate",
"nova",
"oppo",
"vivo",
"realme",
"oneplus",
"nokia",
"nubia",
"meizu",
"lenovo",
"motorola",
"zte",
"smartisan",
"zenfone",
"rog",
"麦芒",
"畅享",
"优畅享",
]
@dataclass
class DeviceRecord:
id: str
device_name: str
brand: str
manufacturer_brand: str
parent_brand: str
market_brand: str
device_type: str
aliases: List[str]
source_file: str
section: str
MANUAL_BRAND_ALIAS_OVERRIDES: Dict[str, List[str]] = {}
MANUAL_PARENT_BRAND_OVERRIDES: Dict[str, str] = {}
def normalize_text(text: str) -> str:
return re.sub(r"[^0-9a-z\u4e00-\u9fff]+", "", text.lower())
def canonical_brand(file_stem: str) -> str:
return FILE_BRAND_MAP.get(file_stem, file_stem)
def brand_aliases(brand: str) -> List[str]:
aliases = set(BRAND_ALIASES.get(brand, []))
aliases.update(MANUAL_BRAND_ALIAS_OVERRIDES.get(brand, []))
aliases.add(brand)
return sorted(aliases)
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
norm_text = " ".join(norm_text.split())
for kw in keywords:
kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
kw_norm = " ".join(kw_norm.split())
if kw_norm and kw_norm in norm_text:
return True
return False
def resolve_parent_brand(manufacturer_brand: str) -> str:
if manufacturer_brand in MANUAL_PARENT_BRAND_OVERRIDES:
return MANUAL_PARENT_BRAND_OVERRIDES[manufacturer_brand]
return MANUFACTURER_PARENT_BRAND.get(manufacturer_brand, manufacturer_brand)
def reset_manual_overrides() -> None:
MANUAL_BRAND_ALIAS_OVERRIDES.clear()
MANUAL_PARENT_BRAND_OVERRIDES.clear()
def normalize_alias_list(*groups: object) -> List[str]:
aliases: List[str] = []
seen: Set[str] = set()
for group in groups:
if group is None:
continue
items = group if isinstance(group, (list, tuple, set)) else [group]
for item in items:
text = str(item or "").strip()
key = normalize_text(text)
if not text or not key or key in seen:
continue
seen.add(key)
aliases.append(text)
return aliases
def is_preferred_cn_source_file(source_file: str) -> bool:
source = str(source_file or "").strip().lower()
if not source:
return False
if source == MANUAL_SOURCE_FILE:
return True
return not (source.endswith("_en.md") or source.endswith("_global_en.md"))
def first_preferred_match(records: List[DeviceRecord]) -> List[DeviceRecord]:
if not records:
return []
cn_records = [record for record in records if is_preferred_cn_source_file(record.source_file)]
return [(cn_records or records)[0]]
def load_manual_catalog(repo_root: Path) -> dict[str, object]:
path = repo_root / MANUAL_SOURCE_FILE
if not path.exists():
return {"brands": [], "devices": []}
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise RuntimeError(f"{MANUAL_SOURCE_FILE} 必须是 JSON 对象。")
brands = payload.get("brands")
devices = payload.get("devices")
return {
"brands": brands if isinstance(brands, list) else [],
"devices": devices if isinstance(devices, list) else [],
}
def apply_manual_brand_overrides(manual_catalog: dict[str, object]) -> dict[str, str]:
reset_manual_overrides()
brand_parent_map: dict[str, str] = {}
for raw_brand in manual_catalog.get("brands", []):
if not isinstance(raw_brand, dict):
continue
brand_name = str(raw_brand.get("name") or "").strip()
if not brand_name:
continue
aliases = normalize_alias_list(brand_name, raw_brand.get("aliases"))
parent_brand = str(raw_brand.get("parent_brand") or brand_name).strip() or brand_name
MANUAL_BRAND_ALIAS_OVERRIDES[brand_name] = aliases
MANUAL_PARENT_BRAND_OVERRIDES[brand_name] = parent_brand
brand_parent_map[brand_name] = parent_brand
return brand_parent_map
def parse_manual_catalog(repo_root: Path, manual_catalog: dict[str, object]) -> List[DeviceRecord]:
brand_parent_map = apply_manual_brand_overrides(manual_catalog)
records: List[DeviceRecord] = []
for raw_device in manual_catalog.get("devices", []):
if not isinstance(raw_device, dict):
continue
brand = str(raw_device.get("brand") or "").strip()
device_name = str(raw_device.get("device_name") or "").strip()
if not brand or not device_name:
continue
aliases = normalize_alias_list(
raw_device.get("models"),
device_name,
raw_device.get("aliases"),
)
if not aliases:
continue
device_type = str(raw_device.get("device_type") or "").strip() or "other"
section = str(raw_device.get("section") or "手动补录").strip() or "手动补录"
record_id = str(raw_device.get("id") or "").strip() or f"manual:{normalize_text(brand)}:{normalize_text(device_name)}"
parent_brand = brand_parent_map.get(brand, str(raw_device.get("parent_brand") or brand).strip() or brand)
records.append(
DeviceRecord(
id=record_id,
device_name=device_name,
brand=brand,
manufacturer_brand=brand,
parent_brand=parent_brand,
market_brand=brand,
device_type=device_type,
aliases=aliases,
source_file=MANUAL_SOURCE_FILE,
section=section,
)
)
return records
def infer_market_brand(
manufacturer_brand: str,
device_name: str,
section: str,
aliases: Iterable[str],
) -> str:
corpus = normalize_text(" ".join([device_name, section, *aliases]))
if manufacturer_brand == "Xiaomi":
poco_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["POCO"]]
redmi_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["Redmi"]]
if any(key and key in corpus for key in poco_keys):
return "POCO"
if any(key and key in corpus for key in redmi_keys):
return "Redmi"
return "Xiaomi"
if manufacturer_brand == "vivo":
iqoo_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["iQOO"]]
if any(key and key in corpus for key in iqoo_keys):
return "iQOO"
return "vivo"
return manufacturer_brand
def infer_device_type(
device_name: str,
section: str,
source_file: str,
aliases: Iterable[str],
default_type: str,
) -> str:
corpus = " ".join([device_name, section, *aliases, source_file])
if has_keyword(corpus, TV_KEYWORDS):
return "tv"
if has_keyword(corpus, TABLET_KEYWORDS):
return "tablet"
if has_keyword(corpus, WEAR_KEYWORDS):
return "wear"
if has_keyword(corpus, COMPUTER_KEYWORDS):
return "computer"
if has_keyword(corpus, OTHER_KEYWORDS):
return "other"
if has_keyword(corpus, PHONE_KEYWORDS):
return "phone"
return default_type or "other"
def clean_entry_title(raw_title: str) -> str:
title = raw_title.strip()
if title.endswith(":"):
title = title[:-1].strip()
# remove leading tag like: [`X1`] or [X1]
title = re.sub(r"^\[[^\]]+\]\s*", "", title)
# remove one or more trailing codenames like: (`foo`) (`bar`)
title = re.sub(r"(?:\s*\(\s*`[^`]+`\s*\))+\s*$", "", title)
title = re.sub(r"\s*\((?:codename|代号)[^)]*\)\s*$", "", title, flags=re.IGNORECASE)
# strip markdown links while keeping text: [Foo](url) -> Foo
title = re.sub(r"\[([^\]]+)\]\([^)]*\)", r"\1", title)
title = " ".join(title.split())
return title
def extract_codes(text: str) -> List[str]:
return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()]
def parse_brand_file(path: Path) -> List[DeviceRecord]:
file_stem = path.stem
brand = canonical_brand(file_stem)
default_type = FILE_DEFAULT_DEVICE_TYPE.get(file_stem, "phone")
records: List[DeviceRecord] = []
lines = path.read_text(encoding="utf-8").splitlines()
section = ""
current_title = ""
current_aliases: Set[str] = set()
def flush_current() -> None:
nonlocal current_title, current_aliases
if not current_title:
return
aliases = sorted({alias.strip() for alias in current_aliases if alias.strip()})
record_id = f"{file_stem}:{len(records) + 1}"
device_type = infer_device_type(
device_name=current_title,
section=section,
source_file=path.name,
aliases=aliases,
default_type=default_type,
)
records.append(
DeviceRecord(
id=record_id,
device_name=current_title,
brand=brand,
manufacturer_brand=brand,
parent_brand=resolve_parent_brand(brand),
market_brand=infer_market_brand(
manufacturer_brand=brand,
device_name=current_title,
section=section,
aliases=aliases,
),
device_type=device_type,
aliases=aliases,
source_file=f"brands/{path.name}",
section=section,
)
)
current_title = ""
current_aliases = set()
for raw in lines:
line = raw.strip()
if not line:
continue
section_match = SECTION_RE.match(line)
if section_match:
flush_current()
section = section_match.group(1).strip()
continue
entry_match = ENTRY_RE.match(line)
if entry_match:
flush_current()
raw_title = entry_match.group(1).strip()
current_title = clean_entry_title(raw_title)
current_aliases = set(extract_codes(raw_title))
current_aliases.add(current_title)
continue
if not current_title:
continue
variant_match = VARIANT_RE.match(line)
if variant_match:
variant_codes = extract_codes(variant_match.group(1))
variant_name = variant_match.group(2).strip()
current_aliases.update(variant_codes)
current_aliases.add(variant_name)
flush_current()
return records
class DeviceMapper:
def __init__(self, records: List[DeviceRecord]) -> None:
self.records = records
self.records_by_id = {record.id: record for record in records}
self.manufacturer_alias_lookup: Dict[str, str] = {}
self.parent_alias_lookup: Dict[str, str] = {}
self.market_alias_lookup: Dict[str, str] = {}
self.parent_to_children: Dict[str, Set[str]] = {}
self.alias_index: Dict[str, Set[str]] = {}
for record in records:
for alias in record.aliases:
key = normalize_text(alias)
if not key:
continue
self.alias_index.setdefault(key, set()).add(record.id)
manufacturers = sorted({record.manufacturer_brand for record in records})
parents = sorted({record.parent_brand for record in records})
for brand in manufacturers:
for alias in brand_aliases(brand):
key = normalize_text(alias)
if key:
self.manufacturer_alias_lookup[key] = brand
for parent in parents:
for alias in brand_aliases(parent):
key = normalize_text(alias)
if key:
self.parent_alias_lookup[key] = parent
for manufacturer in manufacturers:
parent = resolve_parent_brand(manufacturer)
self.parent_to_children.setdefault(parent, set()).add(manufacturer)
for market_brand, aliases in MARKET_BRAND_ALIASES.items():
for alias in set([market_brand, *aliases]):
key = normalize_text(alias)
if key:
self.market_alias_lookup[key] = market_brand
def _parse_brand_filter(self, input_brand: Optional[str]) -> Dict[str, Optional[str]]:
if not input_brand:
return {
"parent_brand": None,
"manufacturer_brand": None,
"market_brand": None,
"source": "none",
}
input_norm = normalize_text(input_brand)
if not input_norm:
return {
"parent_brand": None,
"manufacturer_brand": None,
"market_brand": None,
"source": "none",
}
if input_norm in self.market_alias_lookup:
market_brand = self.market_alias_lookup[input_norm]
manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand)
parent_brand = resolve_parent_brand(manufacturer_brand)
if market_brand == "Xiaomi":
return {
"parent_brand": parent_brand,
"manufacturer_brand": manufacturer_brand,
"market_brand": None,
"source": "manufacturer_alias_from_market",
}
return {
"parent_brand": parent_brand,
"manufacturer_brand": manufacturer_brand,
"market_brand": market_brand,
"source": "market_alias_exact",
}
if input_norm in self.manufacturer_alias_lookup:
manufacturer_brand = self.manufacturer_alias_lookup[input_norm]
parent_brand = resolve_parent_brand(manufacturer_brand)
children = self.parent_to_children.get(manufacturer_brand, set())
if manufacturer_brand == parent_brand and len(children) > 1:
return {
"parent_brand": parent_brand,
"manufacturer_brand": None,
"market_brand": None,
"source": "parent_alias_exact",
}
return {
"parent_brand": parent_brand,
"manufacturer_brand": manufacturer_brand,
"market_brand": None,
"source": "manufacturer_alias_exact",
}
if input_norm in self.parent_alias_lookup:
parent_brand = self.parent_alias_lookup[input_norm]
return {
"parent_brand": parent_brand,
"manufacturer_brand": None,
"market_brand": None,
"source": "parent_alias_exact",
}
for alias_norm, market_brand in self.market_alias_lookup.items():
if alias_norm and alias_norm in input_norm:
manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand)
return {
"parent_brand": resolve_parent_brand(manufacturer_brand),
"manufacturer_brand": manufacturer_brand,
"market_brand": market_brand,
"source": "market_alias_contains",
}
for alias_norm, manufacturer_brand in self.manufacturer_alias_lookup.items():
if alias_norm and alias_norm in input_norm:
parent_brand = resolve_parent_brand(manufacturer_brand)
children = self.parent_to_children.get(manufacturer_brand, set())
if manufacturer_brand == parent_brand and len(children) > 1:
return {
"parent_brand": parent_brand,
"manufacturer_brand": None,
"market_brand": None,
"source": "parent_alias_contains",
}
return {
"parent_brand": parent_brand,
"manufacturer_brand": manufacturer_brand,
"market_brand": None,
"source": "manufacturer_alias_contains",
}
for alias_norm, parent_brand in self.parent_alias_lookup.items():
if alias_norm and alias_norm in input_norm:
return {
"parent_brand": parent_brand,
"manufacturer_brand": None,
"market_brand": None,
"source": "parent_alias_contains",
}
return {
"parent_brand": None,
"manufacturer_brand": None,
"market_brand": None,
"source": "none",
}
@staticmethod
def _brand_match(
brand_filter: Dict[str, Optional[str]],
record: DeviceRecord,
) -> bool:
parent = brand_filter.get("parent_brand")
manufacturer = brand_filter.get("manufacturer_brand")
market = brand_filter.get("market_brand")
if parent and record.parent_brand != parent:
return False
if manufacturer and record.manufacturer_brand != manufacturer:
return False
if market and record.market_brand != market:
return False
return True
def find(self, name: str, brand: Optional[str] = None, limit: int = 5) -> Dict[str, object]:
query = normalize_text(name)
if not query:
return {
"matched": False,
"reason": "Empty device name.",
"query_name": name,
"query_brand": brand,
"candidates": [],
}
candidate_ids = list(self.alias_index.get(query, set()))
matched_records = [self.records_by_id[rid] for rid in candidate_ids]
brand_filter = self._parse_brand_filter(brand)
if brand:
matched_records = [r for r in matched_records if self._brand_match(brand_filter, r)]
if not matched_records and brand_filter.get("manufacturer_brand"):
fallback_filter = {
"parent_brand": brand_filter.get("parent_brand"),
"manufacturer_brand": brand_filter.get("manufacturer_brand"),
"market_brand": None,
}
matched_records = [r for r in [self.records_by_id[rid] for rid in candidate_ids] if self._brand_match(fallback_filter, r)]
matched_records.sort(
key=lambda r: (
0 if is_preferred_cn_source_file(r.source_file) else 1,
r.device_name,
r.source_file,
r.id,
)
)
matched_records = first_preferred_match(matched_records)
if matched_records:
best = matched_records[0]
return {
"matched": True,
"query_name": name,
"query_brand": brand,
"query_brand_parsed": brand_filter,
"best": asdict(best),
"candidates": [asdict(r) for r in matched_records[:limit]],
}
suggestions: List[str] = []
for alias in self.alias_index:
if query in alias or alias in query:
suggestions.append(alias)
if len(suggestions) >= limit:
break
return {
"matched": False,
"query_name": name,
"query_brand": brand,
"query_brand_parsed": brand_filter,
"reason": "No exact alias match.",
"candidates": [],
"suggestions": suggestions,
}
def build_records(repo_root: Path) -> List[DeviceRecord]:
brands_dir = repo_root / "brands"
records: List[DeviceRecord] = []
manual_catalog = load_manual_catalog(repo_root)
apply_manual_brand_overrides(manual_catalog)
for md_path in sorted(brands_dir.glob("*.md")):
records.extend(parse_brand_file(md_path))
records.extend(parse_manual_catalog(repo_root, manual_catalog))
return records
def export_index(records: List[DeviceRecord], output_path: Path) -> None:
lookup: Dict[str, List[str]] = {}
manufacturer_brands_in_data = sorted({record.manufacturer_brand for record in records})
parent_brands_in_data = sorted({record.parent_brand for record in records})
market_brands_in_data = sorted({record.market_brand for record in records})
all_brands_in_data = sorted(
set(manufacturer_brands_in_data)
| set(market_brands_in_data)
| set(MARKET_BRAND_TO_MANUFACTURER.keys())
)
manufacturer_stats = dict(sorted(Counter(record.manufacturer_brand for record in records).items()))
parent_stats = dict(sorted(Counter(record.parent_brand for record in records).items()))
market_brand_stats = dict(sorted(Counter(record.market_brand for record in records).items()))
brand_to_manufacturer = {}
for brand in all_brands_in_data:
if brand in MARKET_BRAND_TO_MANUFACTURER:
brand_to_manufacturer[brand] = MARKET_BRAND_TO_MANUFACTURER[brand]
else:
brand_to_manufacturer[brand] = resolve_parent_brand(brand)
parent_to_children: Dict[str, List[str]] = {}
for child, parent in brand_to_manufacturer.items():
parent_to_children.setdefault(parent, []).append(child)
for parent in parent_to_children:
parent_to_children[parent] = sorted(parent_to_children[parent])
all_aliases = {brand: brand_aliases(brand) for brand in all_brands_in_data}
for record in records:
for alias in record.aliases:
key = normalize_text(alias)
if not key:
continue
lookup.setdefault(key, []).append(record.id)
for key, ids in lookup.items():
lookup[key] = sorted(set(ids))
output = {
"generated_on": date.today().isoformat(),
"total_records": len(records),
"brands": manufacturer_brands_in_data,
"brand_aliases": all_aliases,
"brand_management": {
"brands": all_brands_in_data,
"manufacturers": sorted(set(brand_to_manufacturer.values())),
"manufacturer_aliases": all_aliases,
"manufacturer_to_parent": brand_to_manufacturer,
"brand_to_manufacturer": brand_to_manufacturer,
"parent_to_children": parent_to_children,
"parent_aliases": {brand: brand_aliases(brand) for brand in parent_brands_in_data},
"market_brand_aliases": MARKET_BRAND_ALIASES,
"market_brand_to_manufacturer": MARKET_BRAND_TO_MANUFACTURER,
"market_brands": market_brands_in_data,
"parent_brands": parent_brands_in_data,
"stats": {
"manufacturer_brand": manufacturer_stats,
"parent_brand": parent_stats,
"market_brand": market_brand_stats,
},
},
"lookup": lookup,
"records": [asdict(r) for r in records],
}
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8")
def main() -> None:
parser = argparse.ArgumentParser(description="MobileModels device mapper")
parser.add_argument(
"--repo-root",
type=Path,
default=WORKSPACE_ROOT,
help="Path to workspace root",
)
subparsers = parser.add_subparsers(dest="command", required=True)
build_cmd = subparsers.add_parser("build", help="Build JSON index")
build_cmd.add_argument(
"--output",
type=Path,
default=Path("dist/device_index.json"),
help="Output JSON path",
)
find_cmd = subparsers.add_parser("find", help="Find a device by name + optional brand")
find_cmd.add_argument("--name", required=True, help="Raw device name from app")
find_cmd.add_argument("--brand", default=None, help="Optional raw brand from app")
find_cmd.add_argument("--limit", type=int, default=5, help="Max matched candidates")
args = parser.parse_args()
records = build_records(args.repo_root)
mapper = DeviceMapper(records)
if args.command == "build":
output_path: Path = args.output
if not output_path.is_absolute():
output_path = PROJECT_ROOT / output_path
export_index(records, output_path)
print(f"Built index: {output_path}")
print(f"Total records: {len(records)}")
return
if args.command == "find":
result = mapper.find(name=args.name, brand=args.brand, limit=args.limit)
print(json.dumps(result, ensure_ascii=False, indent=2))
return
if __name__ == "__main__":
main()