760 lines
24 KiB
Python
760 lines
24 KiB
Python
#!/usr/bin/env python3
|
|
"""Build and query a cross-platform device mapping index from MobileModels markdown data."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
from collections import Counter
|
|
import json
|
|
import re
|
|
from dataclasses import asdict, dataclass
|
|
from datetime import date
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Optional, Set
|
|
|
|
from project_layout import PROJECT_ROOT, WORKSPACE_ROOT
|
|
|
|
ENTRY_RE = re.compile(r"^\*\*(.+?)\*\*\s*$")
|
|
VARIANT_RE = re.compile(r"^\s*((?:`[^`]+`\s*)+):\s*(.+?)\s*$")
|
|
BACKTICK_RE = re.compile(r"`([^`]+)`")
|
|
SECTION_RE = re.compile(r"^##\s+(.+?)\s*$")
|
|
|
|
|
|
FILE_BRAND_MAP: Dict[str, str] = {
|
|
"360shouji": "360",
|
|
"apple_all": "Apple",
|
|
"apple_all_en": "Apple",
|
|
"apple_cn": "Apple",
|
|
"asus_cn": "ASUS",
|
|
"asus_en": "ASUS",
|
|
"blackshark": "Black Shark",
|
|
"blackshark_en": "Black Shark",
|
|
"coolpad": "Coolpad",
|
|
"google": "Google",
|
|
"honor_cn": "HONOR",
|
|
"honor_global_en": "HONOR",
|
|
"huawei_cn": "HUAWEI",
|
|
"huawei_global_en": "HUAWEI",
|
|
"lenovo_cn": "Lenovo",
|
|
"letv": "LeTV",
|
|
"meizu": "Meizu",
|
|
"meizu_en": "Meizu",
|
|
"mitv_cn": "Xiaomi",
|
|
"mitv_global_en": "Xiaomi",
|
|
"motorola_cn": "Motorola",
|
|
"nokia_cn": "Nokia",
|
|
"nothing": "Nothing",
|
|
"nubia": "nubia",
|
|
"oneplus": "OnePlus",
|
|
"oneplus_en": "OnePlus",
|
|
"oppo_cn": "OPPO",
|
|
"oppo_global_en": "OPPO",
|
|
"realme_cn": "realme",
|
|
"realme_global_en": "realme",
|
|
"samsung_cn": "Samsung",
|
|
"samsung_global_en": "Samsung",
|
|
"smartisan": "Smartisan",
|
|
"sony": "Sony",
|
|
"sony_cn": "Sony",
|
|
"vivo_cn": "vivo",
|
|
"vivo_global_en": "vivo",
|
|
"xiaomi": "Xiaomi",
|
|
"xiaomi_cn": "Xiaomi",
|
|
"xiaomi_en": "Xiaomi",
|
|
"xiaomi-wear": "Xiaomi",
|
|
"zhixuan": "HUAWEI Smart Selection",
|
|
"zte_cn": "ZTE",
|
|
}
|
|
|
|
|
|
FILE_DEFAULT_DEVICE_TYPE: Dict[str, str] = {
|
|
"mitv_cn": "tv",
|
|
"mitv_global_en": "tv",
|
|
"xiaomi-wear": "wear",
|
|
"apple_all": "phone",
|
|
"apple_all_en": "phone",
|
|
"apple_cn": "phone",
|
|
"google": "phone",
|
|
"honor_cn": "phone",
|
|
"honor_global_en": "phone",
|
|
"huawei_cn": "phone",
|
|
"huawei_global_en": "phone",
|
|
"xiaomi": "phone",
|
|
"xiaomi_cn": "phone",
|
|
"xiaomi_en": "phone",
|
|
"zhixuan": "phone",
|
|
}
|
|
|
|
|
|
BRAND_ALIASES: Dict[str, List[str]] = {
|
|
"360": ["360", "360手机", "奇酷", "qiku"],
|
|
"Apple": ["apple", "苹果", "iphone", "ipad", "ipod"],
|
|
"ASUS": ["asus", "华硕", "rog", "zenfone"],
|
|
"Black Shark": ["black shark", "blackshark", "黑鲨"],
|
|
"Coolpad": ["coolpad", "酷派"],
|
|
"Google": ["google", "pixel"],
|
|
"HONOR": ["honor", "荣耀"],
|
|
"HUAWEI": ["huawei", "华为"],
|
|
"HUAWEI Smart Selection": ["华为智选", "zhixuan", "umagic", "wiko", "hi nova", "nzone"],
|
|
"Lenovo": ["lenovo", "联想", "zuk", "拯救者"],
|
|
"LeTV": ["letv", "乐视"],
|
|
"Meizu": ["meizu", "魅族"],
|
|
"Motorola": ["motorola", "摩托罗拉", "moto"],
|
|
"Nokia": ["nokia", "诺基亚"],
|
|
"Nothing": ["nothing", "cmf"],
|
|
"nubia": ["nubia", "努比亚", "红魔", "redmagic"],
|
|
"iQOO": ["iqoo", "i qoo", "艾酷"],
|
|
"OnePlus": ["oneplus", "一加"],
|
|
"OPPO": ["oppo"],
|
|
"POCO": ["poco"],
|
|
"Redmi": ["redmi", "红米", "hongmi"],
|
|
"realme": ["realme", "真我"],
|
|
"Samsung": ["samsung", "三星", "galaxy"],
|
|
"Smartisan": ["smartisan", "锤子", "坚果"],
|
|
"Sony": ["sony", "索尼", "xperia"],
|
|
"vivo": ["vivo"],
|
|
"Xiaomi": ["xiaomi", "小米", "mi", "米家", "mipad"],
|
|
"ZTE": ["zte", "中兴"],
|
|
}
|
|
|
|
MANUFACTURER_PARENT_BRAND: Dict[str, str] = {
|
|
"Black Shark": "Xiaomi",
|
|
"HUAWEI Smart Selection": "HUAWEI",
|
|
"Motorola": "Lenovo",
|
|
"iQOO": "vivo",
|
|
"POCO": "Xiaomi",
|
|
"Redmi": "Xiaomi",
|
|
"OnePlus": "OPPO",
|
|
"realme": "OPPO",
|
|
"nubia": "ZTE",
|
|
}
|
|
|
|
MARKET_BRAND_ALIASES: Dict[str, List[str]] = {
|
|
"iQOO": ["iqoo", "i qoo", "艾酷"],
|
|
"POCO": ["poco"],
|
|
"Redmi": ["redmi", "红米", "hongmi"],
|
|
"Xiaomi": ["xiaomi", "小米", "mi", "mipad", "米家"],
|
|
}
|
|
|
|
MARKET_BRAND_TO_MANUFACTURER: Dict[str, str] = {
|
|
"iQOO": "vivo",
|
|
"POCO": "Xiaomi",
|
|
"Redmi": "Xiaomi",
|
|
"Xiaomi": "Xiaomi",
|
|
}
|
|
|
|
TV_KEYWORDS = [
|
|
"tv",
|
|
"电视",
|
|
"智慧屏",
|
|
"smart tv",
|
|
"机顶盒",
|
|
"tv box",
|
|
"stick",
|
|
"dongle",
|
|
]
|
|
TABLET_KEYWORDS = [
|
|
"ipad",
|
|
"tablet",
|
|
"tab",
|
|
"pad",
|
|
"平板",
|
|
"matepad",
|
|
]
|
|
WEAR_KEYWORDS = [
|
|
"watch",
|
|
"smartwatch",
|
|
"手表",
|
|
"手环",
|
|
"band",
|
|
"wear",
|
|
"wearable",
|
|
"buds",
|
|
"earbuds",
|
|
"耳机",
|
|
"tws",
|
|
"eyewear",
|
|
"glasses",
|
|
"眼镜",
|
|
]
|
|
OTHER_KEYWORDS = [
|
|
"matebook",
|
|
"笔记本",
|
|
"laptop",
|
|
"notebook",
|
|
"vision",
|
|
"vr",
|
|
"ipod",
|
|
"airpods",
|
|
]
|
|
PHONE_KEYWORDS = [
|
|
"iphone",
|
|
"phone",
|
|
"手机",
|
|
"galaxy",
|
|
"pixel",
|
|
"xiaomi",
|
|
"redmi",
|
|
"poco",
|
|
"honor",
|
|
"huawei",
|
|
"mate",
|
|
"nova",
|
|
"oppo",
|
|
"vivo",
|
|
"realme",
|
|
"oneplus",
|
|
"nokia",
|
|
"nubia",
|
|
"meizu",
|
|
"lenovo",
|
|
"motorola",
|
|
"zte",
|
|
"smartisan",
|
|
"zenfone",
|
|
"rog",
|
|
"麦芒",
|
|
"畅享",
|
|
"优畅享",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class DeviceRecord:
|
|
id: str
|
|
device_name: str
|
|
brand: str
|
|
manufacturer_brand: str
|
|
parent_brand: str
|
|
market_brand: str
|
|
device_type: str
|
|
aliases: List[str]
|
|
source_file: str
|
|
section: str
|
|
|
|
|
|
def normalize_text(text: str) -> str:
|
|
return re.sub(r"[^0-9a-z\u4e00-\u9fff]+", "", text.lower())
|
|
|
|
|
|
def canonical_brand(file_stem: str) -> str:
|
|
return FILE_BRAND_MAP.get(file_stem, file_stem)
|
|
|
|
|
|
def brand_aliases(brand: str) -> List[str]:
|
|
aliases = set(BRAND_ALIASES.get(brand, []))
|
|
aliases.add(brand)
|
|
return sorted(aliases)
|
|
|
|
|
|
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
|
norm_text = normalize_text(text)
|
|
for kw in keywords:
|
|
if normalize_text(kw) and normalize_text(kw) in norm_text:
|
|
return True
|
|
return False
|
|
|
|
|
|
def resolve_parent_brand(manufacturer_brand: str) -> str:
|
|
return MANUFACTURER_PARENT_BRAND.get(manufacturer_brand, manufacturer_brand)
|
|
|
|
|
|
def infer_market_brand(
|
|
manufacturer_brand: str,
|
|
device_name: str,
|
|
section: str,
|
|
aliases: Iterable[str],
|
|
) -> str:
|
|
corpus = normalize_text(" ".join([device_name, section, *aliases]))
|
|
|
|
if manufacturer_brand == "Xiaomi":
|
|
poco_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["POCO"]]
|
|
redmi_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["Redmi"]]
|
|
if any(key and key in corpus for key in poco_keys):
|
|
return "POCO"
|
|
if any(key and key in corpus for key in redmi_keys):
|
|
return "Redmi"
|
|
return "Xiaomi"
|
|
|
|
if manufacturer_brand == "vivo":
|
|
iqoo_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["iQOO"]]
|
|
if any(key and key in corpus for key in iqoo_keys):
|
|
return "iQOO"
|
|
return "vivo"
|
|
|
|
return manufacturer_brand
|
|
|
|
|
|
def infer_device_type(
|
|
device_name: str,
|
|
section: str,
|
|
source_file: str,
|
|
aliases: Iterable[str],
|
|
default_type: str,
|
|
) -> str:
|
|
corpus = " ".join([device_name, section, *aliases, source_file])
|
|
|
|
if has_keyword(corpus, TV_KEYWORDS):
|
|
return "tv"
|
|
if has_keyword(corpus, TABLET_KEYWORDS):
|
|
return "tablet"
|
|
if has_keyword(corpus, WEAR_KEYWORDS):
|
|
return "wear"
|
|
if has_keyword(corpus, OTHER_KEYWORDS):
|
|
return "other"
|
|
if has_keyword(corpus, PHONE_KEYWORDS):
|
|
return "phone"
|
|
return default_type or "other"
|
|
|
|
|
|
def clean_entry_title(raw_title: str) -> str:
|
|
title = raw_title.strip()
|
|
if title.endswith(":"):
|
|
title = title[:-1].strip()
|
|
|
|
# remove leading tag like: [`X1`] or [X1]
|
|
title = re.sub(r"^\[[^\]]+\]\s*", "", title)
|
|
|
|
# remove one or more trailing codenames like: (`foo`) (`bar`)
|
|
title = re.sub(r"(?:\s*\(\s*`[^`]+`\s*\))+\s*$", "", title)
|
|
title = re.sub(r"\s*\((?:codename|代号)[^)]*\)\s*$", "", title, flags=re.IGNORECASE)
|
|
|
|
# strip markdown links while keeping text: [Foo](url) -> Foo
|
|
title = re.sub(r"\[([^\]]+)\]\([^)]*\)", r"\1", title)
|
|
|
|
title = " ".join(title.split())
|
|
return title
|
|
|
|
|
|
def extract_codes(text: str) -> List[str]:
|
|
return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()]
|
|
|
|
|
|
def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|
file_stem = path.stem
|
|
brand = canonical_brand(file_stem)
|
|
default_type = FILE_DEFAULT_DEVICE_TYPE.get(file_stem, "phone")
|
|
|
|
records: List[DeviceRecord] = []
|
|
lines = path.read_text(encoding="utf-8").splitlines()
|
|
|
|
section = ""
|
|
current_title = ""
|
|
current_aliases: Set[str] = set()
|
|
|
|
def flush_current() -> None:
|
|
nonlocal current_title, current_aliases
|
|
if not current_title:
|
|
return
|
|
|
|
aliases = sorted({alias.strip() for alias in current_aliases if alias.strip()})
|
|
record_id = f"{file_stem}:{len(records) + 1}"
|
|
device_type = infer_device_type(
|
|
device_name=current_title,
|
|
section=section,
|
|
source_file=path.name,
|
|
aliases=aliases,
|
|
default_type=default_type,
|
|
)
|
|
records.append(
|
|
DeviceRecord(
|
|
id=record_id,
|
|
device_name=current_title,
|
|
brand=brand,
|
|
manufacturer_brand=brand,
|
|
parent_brand=resolve_parent_brand(brand),
|
|
market_brand=infer_market_brand(
|
|
manufacturer_brand=brand,
|
|
device_name=current_title,
|
|
section=section,
|
|
aliases=aliases,
|
|
),
|
|
device_type=device_type,
|
|
aliases=aliases,
|
|
source_file=f"brands/{path.name}",
|
|
section=section,
|
|
)
|
|
)
|
|
current_title = ""
|
|
current_aliases = set()
|
|
|
|
for raw in lines:
|
|
line = raw.strip()
|
|
if not line:
|
|
continue
|
|
|
|
section_match = SECTION_RE.match(line)
|
|
if section_match:
|
|
section = section_match.group(1).strip()
|
|
continue
|
|
|
|
entry_match = ENTRY_RE.match(line)
|
|
if entry_match:
|
|
flush_current()
|
|
raw_title = entry_match.group(1).strip()
|
|
current_title = clean_entry_title(raw_title)
|
|
current_aliases = set(extract_codes(raw_title))
|
|
current_aliases.add(current_title)
|
|
continue
|
|
|
|
if not current_title:
|
|
continue
|
|
|
|
variant_match = VARIANT_RE.match(line)
|
|
if variant_match:
|
|
variant_codes = extract_codes(variant_match.group(1))
|
|
variant_name = variant_match.group(2).strip()
|
|
current_aliases.update(variant_codes)
|
|
current_aliases.add(variant_name)
|
|
|
|
flush_current()
|
|
return records
|
|
|
|
|
|
class DeviceMapper:
|
|
def __init__(self, records: List[DeviceRecord]) -> None:
|
|
self.records = records
|
|
self.records_by_id = {record.id: record for record in records}
|
|
self.manufacturer_alias_lookup: Dict[str, str] = {}
|
|
self.parent_alias_lookup: Dict[str, str] = {}
|
|
self.market_alias_lookup: Dict[str, str] = {}
|
|
self.parent_to_children: Dict[str, Set[str]] = {}
|
|
|
|
self.alias_index: Dict[str, Set[str]] = {}
|
|
for record in records:
|
|
for alias in record.aliases:
|
|
key = normalize_text(alias)
|
|
if not key:
|
|
continue
|
|
self.alias_index.setdefault(key, set()).add(record.id)
|
|
|
|
manufacturers = sorted({record.manufacturer_brand for record in records})
|
|
parents = sorted({record.parent_brand for record in records})
|
|
for brand in manufacturers:
|
|
for alias in brand_aliases(brand):
|
|
key = normalize_text(alias)
|
|
if key:
|
|
self.manufacturer_alias_lookup[key] = brand
|
|
|
|
for parent in parents:
|
|
for alias in brand_aliases(parent):
|
|
key = normalize_text(alias)
|
|
if key:
|
|
self.parent_alias_lookup[key] = parent
|
|
|
|
for manufacturer in manufacturers:
|
|
parent = resolve_parent_brand(manufacturer)
|
|
self.parent_to_children.setdefault(parent, set()).add(manufacturer)
|
|
|
|
for market_brand, aliases in MARKET_BRAND_ALIASES.items():
|
|
for alias in set([market_brand, *aliases]):
|
|
key = normalize_text(alias)
|
|
if key:
|
|
self.market_alias_lookup[key] = market_brand
|
|
|
|
def _parse_brand_filter(self, input_brand: Optional[str]) -> Dict[str, Optional[str]]:
|
|
if not input_brand:
|
|
return {
|
|
"parent_brand": None,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "none",
|
|
}
|
|
|
|
input_norm = normalize_text(input_brand)
|
|
if not input_norm:
|
|
return {
|
|
"parent_brand": None,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "none",
|
|
}
|
|
|
|
if input_norm in self.market_alias_lookup:
|
|
market_brand = self.market_alias_lookup[input_norm]
|
|
manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand)
|
|
parent_brand = resolve_parent_brand(manufacturer_brand)
|
|
if market_brand == "Xiaomi":
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": manufacturer_brand,
|
|
"market_brand": None,
|
|
"source": "manufacturer_alias_from_market",
|
|
}
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": manufacturer_brand,
|
|
"market_brand": market_brand,
|
|
"source": "market_alias_exact",
|
|
}
|
|
|
|
if input_norm in self.manufacturer_alias_lookup:
|
|
manufacturer_brand = self.manufacturer_alias_lookup[input_norm]
|
|
parent_brand = resolve_parent_brand(manufacturer_brand)
|
|
children = self.parent_to_children.get(manufacturer_brand, set())
|
|
if manufacturer_brand == parent_brand and len(children) > 1:
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "parent_alias_exact",
|
|
}
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": manufacturer_brand,
|
|
"market_brand": None,
|
|
"source": "manufacturer_alias_exact",
|
|
}
|
|
|
|
if input_norm in self.parent_alias_lookup:
|
|
parent_brand = self.parent_alias_lookup[input_norm]
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "parent_alias_exact",
|
|
}
|
|
|
|
for alias_norm, market_brand in self.market_alias_lookup.items():
|
|
if alias_norm and alias_norm in input_norm:
|
|
manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand)
|
|
return {
|
|
"parent_brand": resolve_parent_brand(manufacturer_brand),
|
|
"manufacturer_brand": manufacturer_brand,
|
|
"market_brand": market_brand,
|
|
"source": "market_alias_contains",
|
|
}
|
|
|
|
for alias_norm, manufacturer_brand in self.manufacturer_alias_lookup.items():
|
|
if alias_norm and alias_norm in input_norm:
|
|
parent_brand = resolve_parent_brand(manufacturer_brand)
|
|
children = self.parent_to_children.get(manufacturer_brand, set())
|
|
if manufacturer_brand == parent_brand and len(children) > 1:
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "parent_alias_contains",
|
|
}
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": manufacturer_brand,
|
|
"market_brand": None,
|
|
"source": "manufacturer_alias_contains",
|
|
}
|
|
|
|
for alias_norm, parent_brand in self.parent_alias_lookup.items():
|
|
if alias_norm and alias_norm in input_norm:
|
|
return {
|
|
"parent_brand": parent_brand,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "parent_alias_contains",
|
|
}
|
|
|
|
return {
|
|
"parent_brand": None,
|
|
"manufacturer_brand": None,
|
|
"market_brand": None,
|
|
"source": "none",
|
|
}
|
|
|
|
@staticmethod
|
|
def _brand_match(
|
|
brand_filter: Dict[str, Optional[str]],
|
|
record: DeviceRecord,
|
|
) -> bool:
|
|
parent = brand_filter.get("parent_brand")
|
|
manufacturer = brand_filter.get("manufacturer_brand")
|
|
market = brand_filter.get("market_brand")
|
|
|
|
if parent and record.parent_brand != parent:
|
|
return False
|
|
if manufacturer and record.manufacturer_brand != manufacturer:
|
|
return False
|
|
if market and record.market_brand != market:
|
|
return False
|
|
return True
|
|
|
|
def find(self, name: str, brand: Optional[str] = None, limit: int = 5) -> Dict[str, object]:
|
|
query = normalize_text(name)
|
|
if not query:
|
|
return {
|
|
"matched": False,
|
|
"reason": "Empty device name.",
|
|
"query_name": name,
|
|
"query_brand": brand,
|
|
"candidates": [],
|
|
}
|
|
|
|
candidate_ids = list(self.alias_index.get(query, set()))
|
|
matched_records = [self.records_by_id[rid] for rid in candidate_ids]
|
|
brand_filter = self._parse_brand_filter(brand)
|
|
|
|
if brand:
|
|
matched_records = [r for r in matched_records if self._brand_match(brand_filter, r)]
|
|
if not matched_records and brand_filter.get("manufacturer_brand"):
|
|
fallback_filter = {
|
|
"parent_brand": brand_filter.get("parent_brand"),
|
|
"manufacturer_brand": brand_filter.get("manufacturer_brand"),
|
|
"market_brand": None,
|
|
}
|
|
matched_records = [r for r in [self.records_by_id[rid] for rid in candidate_ids] if self._brand_match(fallback_filter, r)]
|
|
|
|
matched_records.sort(key=lambda r: (r.device_name, r.source_file, r.id))
|
|
|
|
if matched_records:
|
|
best = matched_records[0]
|
|
return {
|
|
"matched": True,
|
|
"query_name": name,
|
|
"query_brand": brand,
|
|
"query_brand_parsed": brand_filter,
|
|
"best": asdict(best),
|
|
"candidates": [asdict(r) for r in matched_records[:limit]],
|
|
}
|
|
|
|
suggestions: List[str] = []
|
|
for alias in self.alias_index:
|
|
if query in alias or alias in query:
|
|
suggestions.append(alias)
|
|
if len(suggestions) >= limit:
|
|
break
|
|
|
|
return {
|
|
"matched": False,
|
|
"query_name": name,
|
|
"query_brand": brand,
|
|
"query_brand_parsed": brand_filter,
|
|
"reason": "No exact alias match.",
|
|
"candidates": [],
|
|
"suggestions": suggestions,
|
|
}
|
|
|
|
|
|
def build_records(repo_root: Path) -> List[DeviceRecord]:
|
|
brands_dir = repo_root / "brands"
|
|
records: List[DeviceRecord] = []
|
|
|
|
for md_path in sorted(brands_dir.glob("*.md")):
|
|
records.extend(parse_brand_file(md_path))
|
|
|
|
return records
|
|
|
|
|
|
def export_index(records: List[DeviceRecord], output_path: Path) -> None:
|
|
lookup: Dict[str, List[str]] = {}
|
|
manufacturer_brands_in_data = sorted({record.manufacturer_brand for record in records})
|
|
parent_brands_in_data = sorted({record.parent_brand for record in records})
|
|
market_brands_in_data = sorted({record.market_brand for record in records})
|
|
all_brands_in_data = sorted(
|
|
set(manufacturer_brands_in_data)
|
|
| set(market_brands_in_data)
|
|
| set(MARKET_BRAND_TO_MANUFACTURER.keys())
|
|
)
|
|
manufacturer_stats = dict(sorted(Counter(record.manufacturer_brand for record in records).items()))
|
|
parent_stats = dict(sorted(Counter(record.parent_brand for record in records).items()))
|
|
market_brand_stats = dict(sorted(Counter(record.market_brand for record in records).items()))
|
|
|
|
brand_to_manufacturer = {}
|
|
for brand in all_brands_in_data:
|
|
if brand in MARKET_BRAND_TO_MANUFACTURER:
|
|
brand_to_manufacturer[brand] = MARKET_BRAND_TO_MANUFACTURER[brand]
|
|
else:
|
|
brand_to_manufacturer[brand] = resolve_parent_brand(brand)
|
|
|
|
parent_to_children: Dict[str, List[str]] = {}
|
|
for child, parent in brand_to_manufacturer.items():
|
|
parent_to_children.setdefault(parent, []).append(child)
|
|
for parent in parent_to_children:
|
|
parent_to_children[parent] = sorted(parent_to_children[parent])
|
|
|
|
all_aliases = {brand: brand_aliases(brand) for brand in all_brands_in_data}
|
|
|
|
for record in records:
|
|
for alias in record.aliases:
|
|
key = normalize_text(alias)
|
|
if not key:
|
|
continue
|
|
lookup.setdefault(key, []).append(record.id)
|
|
|
|
for key, ids in lookup.items():
|
|
lookup[key] = sorted(set(ids))
|
|
|
|
output = {
|
|
"generated_on": date.today().isoformat(),
|
|
"total_records": len(records),
|
|
"brands": manufacturer_brands_in_data,
|
|
"brand_aliases": all_aliases,
|
|
"brand_management": {
|
|
"brands": all_brands_in_data,
|
|
"manufacturers": sorted(set(brand_to_manufacturer.values())),
|
|
"manufacturer_aliases": all_aliases,
|
|
"manufacturer_to_parent": brand_to_manufacturer,
|
|
"brand_to_manufacturer": brand_to_manufacturer,
|
|
"parent_to_children": parent_to_children,
|
|
"parent_aliases": {brand: brand_aliases(brand) for brand in parent_brands_in_data},
|
|
"market_brand_aliases": MARKET_BRAND_ALIASES,
|
|
"market_brand_to_manufacturer": MARKET_BRAND_TO_MANUFACTURER,
|
|
"market_brands": market_brands_in_data,
|
|
"parent_brands": parent_brands_in_data,
|
|
"stats": {
|
|
"manufacturer_brand": manufacturer_stats,
|
|
"parent_brand": parent_stats,
|
|
"market_brand": market_brand_stats,
|
|
},
|
|
},
|
|
"lookup": lookup,
|
|
"records": [asdict(r) for r in records],
|
|
}
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="MobileModels device mapper")
|
|
parser.add_argument(
|
|
"--repo-root",
|
|
type=Path,
|
|
default=WORKSPACE_ROOT,
|
|
help="Path to workspace root",
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
|
|
build_cmd = subparsers.add_parser("build", help="Build JSON index")
|
|
build_cmd.add_argument(
|
|
"--output",
|
|
type=Path,
|
|
default=Path("dist/device_index.json"),
|
|
help="Output JSON path",
|
|
)
|
|
|
|
find_cmd = subparsers.add_parser("find", help="Find a device by name + optional brand")
|
|
find_cmd.add_argument("--name", required=True, help="Raw device name from app")
|
|
find_cmd.add_argument("--brand", default=None, help="Optional raw brand from app")
|
|
find_cmd.add_argument("--limit", type=int, default=5, help="Max matched candidates")
|
|
|
|
args = parser.parse_args()
|
|
|
|
records = build_records(args.repo_root)
|
|
mapper = DeviceMapper(records)
|
|
|
|
if args.command == "build":
|
|
output_path: Path = args.output
|
|
if not output_path.is_absolute():
|
|
output_path = PROJECT_ROOT / output_path
|
|
export_index(records, output_path)
|
|
print(f"Built index: {output_path}")
|
|
print(f"Total records: {len(records)}")
|
|
return
|
|
|
|
if args.command == "find":
|
|
result = mapper.find(name=args.name, brand=args.brand, limit=args.limit)
|
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|