#!/usr/bin/env python3 """Build and query a cross-platform device mapping index from MobileModels markdown data.""" from __future__ import annotations import argparse from collections import Counter import json import re from dataclasses import asdict, dataclass from datetime import date from pathlib import Path from typing import Dict, Iterable, List, Optional, Set from project_layout import PROJECT_ROOT, WORKSPACE_ROOT ENTRY_RE = re.compile(r"^\*\*(.+?)\*\*\s*$") VARIANT_RE = re.compile(r"^\s*((?:`[^`]+`\s*)+):\s*(.+?)\s*$") BACKTICK_RE = re.compile(r"`([^`]+)`") SECTION_RE = re.compile(r"^##\s+(.+?)\s*$") FILE_BRAND_MAP: Dict[str, str] = { "360shouji": "360", "apple_all": "Apple", "apple_all_en": "Apple", "apple_cn": "Apple", "asus_cn": "ASUS", "asus_en": "ASUS", "blackshark": "Black Shark", "blackshark_en": "Black Shark", "coolpad": "Coolpad", "google": "Google", "honor_cn": "HONOR", "honor_global_en": "HONOR", "huawei_cn": "HUAWEI", "huawei_global_en": "HUAWEI", "lenovo_cn": "Lenovo", "letv": "LeTV", "meizu": "Meizu", "meizu_en": "Meizu", "mitv_cn": "Xiaomi", "mitv_global_en": "Xiaomi", "motorola_cn": "Motorola", "nokia_cn": "Nokia", "nothing": "Nothing", "nubia": "nubia", "oneplus": "OnePlus", "oneplus_en": "OnePlus", "oppo_cn": "OPPO", "oppo_global_en": "OPPO", "realme_cn": "realme", "realme_global_en": "realme", "samsung_cn": "Samsung", "samsung_global_en": "Samsung", "smartisan": "Smartisan", "sony": "Sony", "sony_cn": "Sony", "vivo_cn": "vivo", "vivo_global_en": "vivo", "xiaomi": "Xiaomi", "xiaomi_cn": "Xiaomi", "xiaomi_en": "Xiaomi", "xiaomi-wear": "Xiaomi", "zhixuan": "HUAWEI Smart Selection", "zte_cn": "ZTE", } FILE_DEFAULT_DEVICE_TYPE: Dict[str, str] = { "mitv_cn": "tv", "mitv_global_en": "tv", "xiaomi-wear": "wear", "apple_all": "phone", "apple_all_en": "phone", "apple_cn": "phone", "google": "phone", "honor_cn": "phone", "honor_global_en": "phone", "huawei_cn": "phone", "huawei_global_en": "phone", "xiaomi": "phone", "xiaomi_cn": "phone", "xiaomi_en": "phone", "zhixuan": "phone", } BRAND_ALIASES: Dict[str, List[str]] = { "360": ["360", "360手机", "奇酷", "qiku"], "Apple": ["apple", "苹果", "iphone", "ipad", "ipod"], "ASUS": ["asus", "华硕", "rog", "zenfone"], "Black Shark": ["black shark", "blackshark", "黑鲨"], "Coolpad": ["coolpad", "酷派"], "Google": ["google", "pixel"], "HONOR": ["honor", "荣耀"], "HUAWEI": ["huawei", "华为"], "HUAWEI Smart Selection": ["华为智选", "zhixuan", "umagic", "wiko", "hi nova", "nzone"], "Lenovo": ["lenovo", "联想", "zuk", "拯救者"], "LeTV": ["letv", "乐视"], "Meizu": ["meizu", "魅族"], "Motorola": ["motorola", "摩托罗拉", "moto"], "Nokia": ["nokia", "诺基亚"], "Nothing": ["nothing", "cmf"], "nubia": ["nubia", "努比亚", "红魔", "redmagic"], "iQOO": ["iqoo", "i qoo", "艾酷"], "OnePlus": ["oneplus", "一加"], "OPPO": ["oppo"], "POCO": ["poco"], "Redmi": ["redmi", "红米", "hongmi"], "realme": ["realme", "真我"], "Samsung": ["samsung", "三星", "galaxy"], "Smartisan": ["smartisan", "锤子", "坚果"], "Sony": ["sony", "索尼", "xperia"], "vivo": ["vivo"], "Xiaomi": ["xiaomi", "小米", "mi", "米家", "mipad"], "ZTE": ["zte", "中兴"], } MANUFACTURER_PARENT_BRAND: Dict[str, str] = { "Black Shark": "Xiaomi", "HUAWEI Smart Selection": "HUAWEI", "Motorola": "Lenovo", "iQOO": "vivo", "POCO": "Xiaomi", "Redmi": "Xiaomi", "OnePlus": "OPPO", "realme": "OPPO", "nubia": "ZTE", } MARKET_BRAND_ALIASES: Dict[str, List[str]] = { "iQOO": ["iqoo", "i qoo", "艾酷"], "POCO": ["poco"], "Redmi": ["redmi", "红米", "hongmi"], "Xiaomi": ["xiaomi", "小米", "mi", "mipad", "米家"], } MARKET_BRAND_TO_MANUFACTURER: Dict[str, str] = { "iQOO": "vivo", "POCO": "Xiaomi", "Redmi": "Xiaomi", "Xiaomi": "Xiaomi", } TV_KEYWORDS = [ "tv", "电视", "智慧屏", "smart tv", "机顶盒", "tv box", "stick", "dongle", ] TABLET_KEYWORDS = [ "ipad", "tablet", "tab", "pad", "平板", "matepad", ] WEAR_KEYWORDS = [ "watch", "smartwatch", "手表", "手环", "band", "wear", "wearable", "buds", "earbuds", "耳机", "tws", "eyewear", "glasses", "眼镜", ] OTHER_KEYWORDS = [ "matebook", "笔记本", "laptop", "notebook", "vision", "vr", "ipod", "airpods", ] PHONE_KEYWORDS = [ "iphone", "phone", "手机", "galaxy", "pixel", "xiaomi", "redmi", "poco", "honor", "huawei", "mate", "nova", "oppo", "vivo", "realme", "oneplus", "nokia", "nubia", "meizu", "lenovo", "motorola", "zte", "smartisan", "zenfone", "rog", "麦芒", "畅享", "优畅享", ] @dataclass class DeviceRecord: id: str device_name: str brand: str manufacturer_brand: str parent_brand: str market_brand: str device_type: str aliases: List[str] source_file: str section: str def normalize_text(text: str) -> str: return re.sub(r"[^0-9a-z\u4e00-\u9fff]+", "", text.lower()) def canonical_brand(file_stem: str) -> str: return FILE_BRAND_MAP.get(file_stem, file_stem) def brand_aliases(brand: str) -> List[str]: aliases = set(BRAND_ALIASES.get(brand, [])) aliases.add(brand) return sorted(aliases) def has_keyword(text: str, keywords: Iterable[str]) -> bool: norm_text = normalize_text(text) for kw in keywords: if normalize_text(kw) and normalize_text(kw) in norm_text: return True return False def resolve_parent_brand(manufacturer_brand: str) -> str: return MANUFACTURER_PARENT_BRAND.get(manufacturer_brand, manufacturer_brand) def infer_market_brand( manufacturer_brand: str, device_name: str, section: str, aliases: Iterable[str], ) -> str: corpus = normalize_text(" ".join([device_name, section, *aliases])) if manufacturer_brand == "Xiaomi": poco_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["POCO"]] redmi_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["Redmi"]] if any(key and key in corpus for key in poco_keys): return "POCO" if any(key and key in corpus for key in redmi_keys): return "Redmi" return "Xiaomi" if manufacturer_brand == "vivo": iqoo_keys = [normalize_text(v) for v in MARKET_BRAND_ALIASES["iQOO"]] if any(key and key in corpus for key in iqoo_keys): return "iQOO" return "vivo" return manufacturer_brand def infer_device_type( device_name: str, section: str, source_file: str, aliases: Iterable[str], default_type: str, ) -> str: corpus = " ".join([device_name, section, *aliases, source_file]) if has_keyword(corpus, TV_KEYWORDS): return "tv" if has_keyword(corpus, TABLET_KEYWORDS): return "tablet" if has_keyword(corpus, WEAR_KEYWORDS): return "wear" if has_keyword(corpus, OTHER_KEYWORDS): return "other" if has_keyword(corpus, PHONE_KEYWORDS): return "phone" return default_type or "other" def clean_entry_title(raw_title: str) -> str: title = raw_title.strip() if title.endswith(":"): title = title[:-1].strip() # remove leading tag like: [`X1`] or [X1] title = re.sub(r"^\[[^\]]+\]\s*", "", title) # remove one or more trailing codenames like: (`foo`) (`bar`) title = re.sub(r"(?:\s*\(\s*`[^`]+`\s*\))+\s*$", "", title) title = re.sub(r"\s*\((?:codename|代号)[^)]*\)\s*$", "", title, flags=re.IGNORECASE) # strip markdown links while keeping text: [Foo](url) -> Foo title = re.sub(r"\[([^\]]+)\]\([^)]*\)", r"\1", title) title = " ".join(title.split()) return title def extract_codes(text: str) -> List[str]: return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()] def parse_brand_file(path: Path) -> List[DeviceRecord]: file_stem = path.stem brand = canonical_brand(file_stem) default_type = FILE_DEFAULT_DEVICE_TYPE.get(file_stem, "phone") records: List[DeviceRecord] = [] lines = path.read_text(encoding="utf-8").splitlines() section = "" current_title = "" current_aliases: Set[str] = set() def flush_current() -> None: nonlocal current_title, current_aliases if not current_title: return aliases = sorted({alias.strip() for alias in current_aliases if alias.strip()}) record_id = f"{file_stem}:{len(records) + 1}" device_type = infer_device_type( device_name=current_title, section=section, source_file=path.name, aliases=aliases, default_type=default_type, ) records.append( DeviceRecord( id=record_id, device_name=current_title, brand=brand, manufacturer_brand=brand, parent_brand=resolve_parent_brand(brand), market_brand=infer_market_brand( manufacturer_brand=brand, device_name=current_title, section=section, aliases=aliases, ), device_type=device_type, aliases=aliases, source_file=f"brands/{path.name}", section=section, ) ) current_title = "" current_aliases = set() for raw in lines: line = raw.strip() if not line: continue section_match = SECTION_RE.match(line) if section_match: section = section_match.group(1).strip() continue entry_match = ENTRY_RE.match(line) if entry_match: flush_current() raw_title = entry_match.group(1).strip() current_title = clean_entry_title(raw_title) current_aliases = set(extract_codes(raw_title)) current_aliases.add(current_title) continue if not current_title: continue variant_match = VARIANT_RE.match(line) if variant_match: variant_codes = extract_codes(variant_match.group(1)) variant_name = variant_match.group(2).strip() current_aliases.update(variant_codes) current_aliases.add(variant_name) flush_current() return records class DeviceMapper: def __init__(self, records: List[DeviceRecord]) -> None: self.records = records self.records_by_id = {record.id: record for record in records} self.manufacturer_alias_lookup: Dict[str, str] = {} self.parent_alias_lookup: Dict[str, str] = {} self.market_alias_lookup: Dict[str, str] = {} self.parent_to_children: Dict[str, Set[str]] = {} self.alias_index: Dict[str, Set[str]] = {} for record in records: for alias in record.aliases: key = normalize_text(alias) if not key: continue self.alias_index.setdefault(key, set()).add(record.id) manufacturers = sorted({record.manufacturer_brand for record in records}) parents = sorted({record.parent_brand for record in records}) for brand in manufacturers: for alias in brand_aliases(brand): key = normalize_text(alias) if key: self.manufacturer_alias_lookup[key] = brand for parent in parents: for alias in brand_aliases(parent): key = normalize_text(alias) if key: self.parent_alias_lookup[key] = parent for manufacturer in manufacturers: parent = resolve_parent_brand(manufacturer) self.parent_to_children.setdefault(parent, set()).add(manufacturer) for market_brand, aliases in MARKET_BRAND_ALIASES.items(): for alias in set([market_brand, *aliases]): key = normalize_text(alias) if key: self.market_alias_lookup[key] = market_brand def _parse_brand_filter(self, input_brand: Optional[str]) -> Dict[str, Optional[str]]: if not input_brand: return { "parent_brand": None, "manufacturer_brand": None, "market_brand": None, "source": "none", } input_norm = normalize_text(input_brand) if not input_norm: return { "parent_brand": None, "manufacturer_brand": None, "market_brand": None, "source": "none", } if input_norm in self.market_alias_lookup: market_brand = self.market_alias_lookup[input_norm] manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand) parent_brand = resolve_parent_brand(manufacturer_brand) if market_brand == "Xiaomi": return { "parent_brand": parent_brand, "manufacturer_brand": manufacturer_brand, "market_brand": None, "source": "manufacturer_alias_from_market", } return { "parent_brand": parent_brand, "manufacturer_brand": manufacturer_brand, "market_brand": market_brand, "source": "market_alias_exact", } if input_norm in self.manufacturer_alias_lookup: manufacturer_brand = self.manufacturer_alias_lookup[input_norm] parent_brand = resolve_parent_brand(manufacturer_brand) children = self.parent_to_children.get(manufacturer_brand, set()) if manufacturer_brand == parent_brand and len(children) > 1: return { "parent_brand": parent_brand, "manufacturer_brand": None, "market_brand": None, "source": "parent_alias_exact", } return { "parent_brand": parent_brand, "manufacturer_brand": manufacturer_brand, "market_brand": None, "source": "manufacturer_alias_exact", } if input_norm in self.parent_alias_lookup: parent_brand = self.parent_alias_lookup[input_norm] return { "parent_brand": parent_brand, "manufacturer_brand": None, "market_brand": None, "source": "parent_alias_exact", } for alias_norm, market_brand in self.market_alias_lookup.items(): if alias_norm and alias_norm in input_norm: manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand) return { "parent_brand": resolve_parent_brand(manufacturer_brand), "manufacturer_brand": manufacturer_brand, "market_brand": market_brand, "source": "market_alias_contains", } for alias_norm, manufacturer_brand in self.manufacturer_alias_lookup.items(): if alias_norm and alias_norm in input_norm: parent_brand = resolve_parent_brand(manufacturer_brand) children = self.parent_to_children.get(manufacturer_brand, set()) if manufacturer_brand == parent_brand and len(children) > 1: return { "parent_brand": parent_brand, "manufacturer_brand": None, "market_brand": None, "source": "parent_alias_contains", } return { "parent_brand": parent_brand, "manufacturer_brand": manufacturer_brand, "market_brand": None, "source": "manufacturer_alias_contains", } for alias_norm, parent_brand in self.parent_alias_lookup.items(): if alias_norm and alias_norm in input_norm: return { "parent_brand": parent_brand, "manufacturer_brand": None, "market_brand": None, "source": "parent_alias_contains", } return { "parent_brand": None, "manufacturer_brand": None, "market_brand": None, "source": "none", } @staticmethod def _brand_match( brand_filter: Dict[str, Optional[str]], record: DeviceRecord, ) -> bool: parent = brand_filter.get("parent_brand") manufacturer = brand_filter.get("manufacturer_brand") market = brand_filter.get("market_brand") if parent and record.parent_brand != parent: return False if manufacturer and record.manufacturer_brand != manufacturer: return False if market and record.market_brand != market: return False return True def find(self, name: str, brand: Optional[str] = None, limit: int = 5) -> Dict[str, object]: query = normalize_text(name) if not query: return { "matched": False, "reason": "Empty device name.", "query_name": name, "query_brand": brand, "candidates": [], } candidate_ids = list(self.alias_index.get(query, set())) matched_records = [self.records_by_id[rid] for rid in candidate_ids] brand_filter = self._parse_brand_filter(brand) if brand: matched_records = [r for r in matched_records if self._brand_match(brand_filter, r)] if not matched_records and brand_filter.get("manufacturer_brand"): fallback_filter = { "parent_brand": brand_filter.get("parent_brand"), "manufacturer_brand": brand_filter.get("manufacturer_brand"), "market_brand": None, } matched_records = [r for r in [self.records_by_id[rid] for rid in candidate_ids] if self._brand_match(fallback_filter, r)] matched_records.sort(key=lambda r: (r.device_name, r.source_file, r.id)) if matched_records: best = matched_records[0] return { "matched": True, "query_name": name, "query_brand": brand, "query_brand_parsed": brand_filter, "best": asdict(best), "candidates": [asdict(r) for r in matched_records[:limit]], } suggestions: List[str] = [] for alias in self.alias_index: if query in alias or alias in query: suggestions.append(alias) if len(suggestions) >= limit: break return { "matched": False, "query_name": name, "query_brand": brand, "query_brand_parsed": brand_filter, "reason": "No exact alias match.", "candidates": [], "suggestions": suggestions, } def build_records(repo_root: Path) -> List[DeviceRecord]: brands_dir = repo_root / "brands" records: List[DeviceRecord] = [] for md_path in sorted(brands_dir.glob("*.md")): records.extend(parse_brand_file(md_path)) return records def export_index(records: List[DeviceRecord], output_path: Path) -> None: lookup: Dict[str, List[str]] = {} manufacturer_brands_in_data = sorted({record.manufacturer_brand for record in records}) parent_brands_in_data = sorted({record.parent_brand for record in records}) market_brands_in_data = sorted({record.market_brand for record in records}) all_brands_in_data = sorted( set(manufacturer_brands_in_data) | set(market_brands_in_data) | set(MARKET_BRAND_TO_MANUFACTURER.keys()) ) manufacturer_stats = dict(sorted(Counter(record.manufacturer_brand for record in records).items())) parent_stats = dict(sorted(Counter(record.parent_brand for record in records).items())) market_brand_stats = dict(sorted(Counter(record.market_brand for record in records).items())) brand_to_manufacturer = {} for brand in all_brands_in_data: if brand in MARKET_BRAND_TO_MANUFACTURER: brand_to_manufacturer[brand] = MARKET_BRAND_TO_MANUFACTURER[brand] else: brand_to_manufacturer[brand] = resolve_parent_brand(brand) parent_to_children: Dict[str, List[str]] = {} for child, parent in brand_to_manufacturer.items(): parent_to_children.setdefault(parent, []).append(child) for parent in parent_to_children: parent_to_children[parent] = sorted(parent_to_children[parent]) all_aliases = {brand: brand_aliases(brand) for brand in all_brands_in_data} for record in records: for alias in record.aliases: key = normalize_text(alias) if not key: continue lookup.setdefault(key, []).append(record.id) for key, ids in lookup.items(): lookup[key] = sorted(set(ids)) output = { "generated_on": date.today().isoformat(), "total_records": len(records), "brands": manufacturer_brands_in_data, "brand_aliases": all_aliases, "brand_management": { "brands": all_brands_in_data, "manufacturers": sorted(set(brand_to_manufacturer.values())), "manufacturer_aliases": all_aliases, "manufacturer_to_parent": brand_to_manufacturer, "brand_to_manufacturer": brand_to_manufacturer, "parent_to_children": parent_to_children, "parent_aliases": {brand: brand_aliases(brand) for brand in parent_brands_in_data}, "market_brand_aliases": MARKET_BRAND_ALIASES, "market_brand_to_manufacturer": MARKET_BRAND_TO_MANUFACTURER, "market_brands": market_brands_in_data, "parent_brands": parent_brands_in_data, "stats": { "manufacturer_brand": manufacturer_stats, "parent_brand": parent_stats, "market_brand": market_brand_stats, }, }, "lookup": lookup, "records": [asdict(r) for r in records], } output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(output, ensure_ascii=False, indent=2), encoding="utf-8") def main() -> None: parser = argparse.ArgumentParser(description="MobileModels device mapper") parser.add_argument( "--repo-root", type=Path, default=WORKSPACE_ROOT, help="Path to workspace root", ) subparsers = parser.add_subparsers(dest="command", required=True) build_cmd = subparsers.add_parser("build", help="Build JSON index") build_cmd.add_argument( "--output", type=Path, default=Path("dist/device_index.json"), help="Output JSON path", ) find_cmd = subparsers.add_parser("find", help="Find a device by name + optional brand") find_cmd.add_argument("--name", required=True, help="Raw device name from app") find_cmd.add_argument("--brand", default=None, help="Optional raw brand from app") find_cmd.add_argument("--limit", type=int, default=5, help="Max matched candidates") args = parser.parse_args() records = build_records(args.repo_root) mapper = DeviceMapper(records) if args.command == "build": output_path: Path = args.output if not output_path.is_absolute(): output_path = PROJECT_ROOT / output_path export_index(records, output_path) print(f"Built index: {output_path}") print(f"Total records: {len(records)}") return if args.command == "find": result = mapper.find(name=args.name, brand=args.brand, limit=args.limit) print(json.dumps(result, ensure_ascii=False, indent=2)) return if __name__ == "__main__": main()