Derive variant display names from upstream data

This commit is contained in:
2026-04-24 10:50:55 +08:00
parent 89b89d4f19
commit 3fe6876ef0
6 changed files with 17961 additions and 17152 deletions
+10813 -10090
View File
File diff suppressed because it is too large Load Diff
+7074 -6978
View File
File diff suppressed because it is too large Load Diff
+50 -6
View File
@@ -463,6 +463,40 @@ def extract_codes(text: str) -> List[str]:
return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()]
def infer_base_variant_name(variant_name: str, entry_title: str) -> Optional[str]:
base = re.split(r"\s+/\s+", variant_name.strip(), maxsplit=1)[0].strip()
if not base:
return None
base = re.sub(r"\s*(?:国行版|国内版|中国版|印度版|欧洲版|国际版|北美版|日本版|韩国版|港版|台版|海外版)\s*$", "", base)
base = re.sub(
r"\s+(?:China|Chinese|India|Europe|European|Global|International|North America|North American|Japan|Korea|Hong Kong|Taiwan|US|USA|T-Mobile|Verizon|AT&T|SIM Free|SoftBank)\s*$",
"",
base,
flags=re.IGNORECASE,
)
base = " ".join(base.split())
if not base or normalize_text(base) not in normalize_text(entry_title):
return None
return base
def split_variant_groups(entry_title: str, title_codes: Iterable[str], variants: list[tuple[list[str], str]]) -> list[tuple[str, Set[str]]]:
groups: dict[str, Set[str]] = {}
for variant_codes, variant_name in variants:
base_name = infer_base_variant_name(variant_name, entry_title)
if not base_name:
return []
aliases = groups.setdefault(base_name, set(title_codes))
aliases.add(base_name)
aliases.add(variant_name)
aliases.update(variant_codes)
if len(groups) < 2:
return []
return list(groups.items())
def parse_brand_file(path: Path) -> List[DeviceRecord]:
file_stem = path.stem
brand = canonical_brand(file_stem)
@@ -473,17 +507,22 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
section = ""
current_title = ""
current_title_codes: List[str] = []
current_aliases: Set[str] = set()
current_variants: list[tuple[list[str], str]] = []
def flush_current() -> None:
nonlocal current_title, current_aliases
nonlocal current_title, current_title_codes, current_aliases, current_variants
if not current_title:
return
aliases = sorted({alias.strip() for alias in current_aliases if alias.strip()})
split_groups = split_variant_groups(current_title, current_title_codes, current_variants)
record_groups = split_groups or [(current_title, current_aliases)]
for device_name, raw_aliases in record_groups:
aliases = sorted({alias.strip() for alias in raw_aliases if alias.strip()})
record_id = f"{file_stem}:{len(records) + 1}"
device_type = infer_device_type(
device_name=current_title,
device_name=device_name,
section=section,
source_file=path.name,
aliases=aliases,
@@ -492,13 +531,13 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
records.append(
DeviceRecord(
id=record_id,
device_name=current_title,
device_name=device_name,
brand=brand,
manufacturer_brand=brand,
parent_brand=resolve_parent_brand(brand),
market_brand=infer_market_brand(
manufacturer_brand=brand,
device_name=current_title,
device_name=device_name,
section=section,
aliases=aliases,
),
@@ -509,7 +548,9 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
)
)
current_title = ""
current_title_codes = []
current_aliases = set()
current_variants = []
for raw in lines:
line = raw.strip()
@@ -527,8 +568,10 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
flush_current()
raw_title = entry_match.group(1).strip()
current_title = clean_entry_title(raw_title)
current_aliases = set(extract_codes(raw_title))
current_title_codes = extract_codes(raw_title)
current_aliases = set(current_title_codes)
current_aliases.add(current_title)
current_variants = []
continue
if not current_title:
@@ -538,6 +581,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
if variant_match:
variant_codes = extract_codes(variant_match.group(1))
variant_name = variant_match.group(2).strip()
current_variants.append((variant_codes, variant_name))
current_aliases.update(variant_codes)
current_aliases.add(variant_name)
-50
View File
@@ -23,41 +23,6 @@ SYNC_PATHS = [
"LICENSE.txt",
]
LOCAL_TEXT_REPLACEMENTS = {
"brands/oneplus.md": [
(
"""**一加 Ace 3 / 一加 12R (`aston`) / 一加 Ace 3 原神刻晴定制机 / 一加 12R Genshin Impact Edition (`martin`):**
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机""",
"""**一加 Ace 3 (`aston`):**
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
**一加 12R (`aston`):**
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机""",
),
],
"brands/oneplus_en.md": [
(
"""**OnePlus Ace 3 / OnePlus 12R (`aston`) / OnePlus Ace 3 Genshin Impact Edition / OnePlus 12R Genshin Impact Edition (`martin`):**
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
`CPH2585`: OnePlus 12R India / Genshin Impact Edition""",
"""**OnePlus Ace 3 (`aston`):**
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
**OnePlus 12R (`aston`):**
`CPH2585`: OnePlus 12R India / Genshin Impact Edition""",
),
],
}
def run(cmd: list[str], cwd: Path | None = None) -> None:
subprocess.run(cmd, cwd=cwd or PROJECT_ROOT, check=True)
@@ -97,21 +62,6 @@ def sync_selected_paths(upstream_root: Path) -> None:
raise FileNotFoundError(f"Missing upstream path: {relative_path}")
sync_path(src, dst)
apply_local_corrections()
def apply_local_corrections() -> None:
for relative_path, replacements in LOCAL_TEXT_REPLACEMENTS.items():
path = WORKSPACE_ROOT / relative_path
if not path.exists():
continue
text = path.read_text(encoding="utf-8")
updated = text
for old, new in replacements:
updated = updated.replace(old, new)
if updated != text:
path.write_text(updated, encoding="utf-8")
def build_index(output_path: str) -> None:
run(
+1 -3
View File
@@ -280,12 +280,10 @@
`CPH2583`: 一加 12 北美版
**一加 Ace 3 (`aston`):**
**一加 Ace 3 / 一加 12R (`aston`) / 一加 Ace 3 原神刻晴定制机 / 一加 12R Genshin Impact Edition (`martin`):**
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
**一加 12R (`aston`):**
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机
`CPH2609`: 一加 12R 欧洲版 / 国际版 / 原神刻晴定制机
+1 -3
View File
@@ -279,12 +279,10 @@
`CPH2583`: OnePlus 12 North America
**OnePlus Ace 3 (`aston`):**
**OnePlus Ace 3 / OnePlus 12R (`aston`) / OnePlus Ace 3 Genshin Impact Edition / OnePlus 12R Genshin Impact Edition (`martin`):**
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
**OnePlus 12R (`aston`):**
`CPH2585`: OnePlus 12R India / Genshin Impact Edition
`CPH2609`: OnePlus 12R Europe / Global / Genshin Impact Edition