Derive variant display names from upstream data
This commit is contained in:
Vendored
+10813
-10090
File diff suppressed because it is too large
Load Diff
Vendored
+7074
-6978
File diff suppressed because it is too large
Load Diff
+50
-6
@@ -463,6 +463,40 @@ def extract_codes(text: str) -> List[str]:
|
|||||||
return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()]
|
return [code.strip() for code in BACKTICK_RE.findall(text) if code.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def infer_base_variant_name(variant_name: str, entry_title: str) -> Optional[str]:
|
||||||
|
base = re.split(r"\s+/\s+", variant_name.strip(), maxsplit=1)[0].strip()
|
||||||
|
if not base:
|
||||||
|
return None
|
||||||
|
|
||||||
|
base = re.sub(r"\s*(?:国行版|国内版|中国版|印度版|欧洲版|国际版|北美版|日本版|韩国版|港版|台版|海外版)\s*$", "", base)
|
||||||
|
base = re.sub(
|
||||||
|
r"\s+(?:China|Chinese|India|Europe|European|Global|International|North America|North American|Japan|Korea|Hong Kong|Taiwan|US|USA|T-Mobile|Verizon|AT&T|SIM Free|SoftBank)\s*$",
|
||||||
|
"",
|
||||||
|
base,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
base = " ".join(base.split())
|
||||||
|
if not base or normalize_text(base) not in normalize_text(entry_title):
|
||||||
|
return None
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def split_variant_groups(entry_title: str, title_codes: Iterable[str], variants: list[tuple[list[str], str]]) -> list[tuple[str, Set[str]]]:
|
||||||
|
groups: dict[str, Set[str]] = {}
|
||||||
|
for variant_codes, variant_name in variants:
|
||||||
|
base_name = infer_base_variant_name(variant_name, entry_title)
|
||||||
|
if not base_name:
|
||||||
|
return []
|
||||||
|
aliases = groups.setdefault(base_name, set(title_codes))
|
||||||
|
aliases.add(base_name)
|
||||||
|
aliases.add(variant_name)
|
||||||
|
aliases.update(variant_codes)
|
||||||
|
|
||||||
|
if len(groups) < 2:
|
||||||
|
return []
|
||||||
|
return list(groups.items())
|
||||||
|
|
||||||
|
|
||||||
def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
||||||
file_stem = path.stem
|
file_stem = path.stem
|
||||||
brand = canonical_brand(file_stem)
|
brand = canonical_brand(file_stem)
|
||||||
@@ -473,17 +507,22 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
|
|
||||||
section = ""
|
section = ""
|
||||||
current_title = ""
|
current_title = ""
|
||||||
|
current_title_codes: List[str] = []
|
||||||
current_aliases: Set[str] = set()
|
current_aliases: Set[str] = set()
|
||||||
|
current_variants: list[tuple[list[str], str]] = []
|
||||||
|
|
||||||
def flush_current() -> None:
|
def flush_current() -> None:
|
||||||
nonlocal current_title, current_aliases
|
nonlocal current_title, current_title_codes, current_aliases, current_variants
|
||||||
if not current_title:
|
if not current_title:
|
||||||
return
|
return
|
||||||
|
|
||||||
aliases = sorted({alias.strip() for alias in current_aliases if alias.strip()})
|
split_groups = split_variant_groups(current_title, current_title_codes, current_variants)
|
||||||
|
record_groups = split_groups or [(current_title, current_aliases)]
|
||||||
|
for device_name, raw_aliases in record_groups:
|
||||||
|
aliases = sorted({alias.strip() for alias in raw_aliases if alias.strip()})
|
||||||
record_id = f"{file_stem}:{len(records) + 1}"
|
record_id = f"{file_stem}:{len(records) + 1}"
|
||||||
device_type = infer_device_type(
|
device_type = infer_device_type(
|
||||||
device_name=current_title,
|
device_name=device_name,
|
||||||
section=section,
|
section=section,
|
||||||
source_file=path.name,
|
source_file=path.name,
|
||||||
aliases=aliases,
|
aliases=aliases,
|
||||||
@@ -492,13 +531,13 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
records.append(
|
records.append(
|
||||||
DeviceRecord(
|
DeviceRecord(
|
||||||
id=record_id,
|
id=record_id,
|
||||||
device_name=current_title,
|
device_name=device_name,
|
||||||
brand=brand,
|
brand=brand,
|
||||||
manufacturer_brand=brand,
|
manufacturer_brand=brand,
|
||||||
parent_brand=resolve_parent_brand(brand),
|
parent_brand=resolve_parent_brand(brand),
|
||||||
market_brand=infer_market_brand(
|
market_brand=infer_market_brand(
|
||||||
manufacturer_brand=brand,
|
manufacturer_brand=brand,
|
||||||
device_name=current_title,
|
device_name=device_name,
|
||||||
section=section,
|
section=section,
|
||||||
aliases=aliases,
|
aliases=aliases,
|
||||||
),
|
),
|
||||||
@@ -509,7 +548,9 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
current_title = ""
|
current_title = ""
|
||||||
|
current_title_codes = []
|
||||||
current_aliases = set()
|
current_aliases = set()
|
||||||
|
current_variants = []
|
||||||
|
|
||||||
for raw in lines:
|
for raw in lines:
|
||||||
line = raw.strip()
|
line = raw.strip()
|
||||||
@@ -527,8 +568,10 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
flush_current()
|
flush_current()
|
||||||
raw_title = entry_match.group(1).strip()
|
raw_title = entry_match.group(1).strip()
|
||||||
current_title = clean_entry_title(raw_title)
|
current_title = clean_entry_title(raw_title)
|
||||||
current_aliases = set(extract_codes(raw_title))
|
current_title_codes = extract_codes(raw_title)
|
||||||
|
current_aliases = set(current_title_codes)
|
||||||
current_aliases.add(current_title)
|
current_aliases.add(current_title)
|
||||||
|
current_variants = []
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not current_title:
|
if not current_title:
|
||||||
@@ -538,6 +581,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
if variant_match:
|
if variant_match:
|
||||||
variant_codes = extract_codes(variant_match.group(1))
|
variant_codes = extract_codes(variant_match.group(1))
|
||||||
variant_name = variant_match.group(2).strip()
|
variant_name = variant_match.group(2).strip()
|
||||||
|
current_variants.append((variant_codes, variant_name))
|
||||||
current_aliases.update(variant_codes)
|
current_aliases.update(variant_codes)
|
||||||
current_aliases.add(variant_name)
|
current_aliases.add(variant_name)
|
||||||
|
|
||||||
|
|||||||
@@ -23,41 +23,6 @@ SYNC_PATHS = [
|
|||||||
"LICENSE.txt",
|
"LICENSE.txt",
|
||||||
]
|
]
|
||||||
|
|
||||||
LOCAL_TEXT_REPLACEMENTS = {
|
|
||||||
"brands/oneplus.md": [
|
|
||||||
(
|
|
||||||
"""**一加 Ace 3 / 一加 12R (`aston`) / 一加 Ace 3 原神刻晴定制机 / 一加 12R Genshin Impact Edition (`martin`):**
|
|
||||||
|
|
||||||
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
|
|
||||||
|
|
||||||
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机""",
|
|
||||||
"""**一加 Ace 3 (`aston`):**
|
|
||||||
|
|
||||||
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
|
|
||||||
|
|
||||||
**一加 12R (`aston`):**
|
|
||||||
|
|
||||||
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机""",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
"brands/oneplus_en.md": [
|
|
||||||
(
|
|
||||||
"""**OnePlus Ace 3 / OnePlus 12R (`aston`) / OnePlus Ace 3 Genshin Impact Edition / OnePlus 12R Genshin Impact Edition (`martin`):**
|
|
||||||
|
|
||||||
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
|
|
||||||
|
|
||||||
`CPH2585`: OnePlus 12R India / Genshin Impact Edition""",
|
|
||||||
"""**OnePlus Ace 3 (`aston`):**
|
|
||||||
|
|
||||||
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
|
|
||||||
|
|
||||||
**OnePlus 12R (`aston`):**
|
|
||||||
|
|
||||||
`CPH2585`: OnePlus 12R India / Genshin Impact Edition""",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def run(cmd: list[str], cwd: Path | None = None) -> None:
|
def run(cmd: list[str], cwd: Path | None = None) -> None:
|
||||||
subprocess.run(cmd, cwd=cwd or PROJECT_ROOT, check=True)
|
subprocess.run(cmd, cwd=cwd or PROJECT_ROOT, check=True)
|
||||||
@@ -97,21 +62,6 @@ def sync_selected_paths(upstream_root: Path) -> None:
|
|||||||
raise FileNotFoundError(f"Missing upstream path: {relative_path}")
|
raise FileNotFoundError(f"Missing upstream path: {relative_path}")
|
||||||
sync_path(src, dst)
|
sync_path(src, dst)
|
||||||
|
|
||||||
apply_local_corrections()
|
|
||||||
|
|
||||||
|
|
||||||
def apply_local_corrections() -> None:
|
|
||||||
for relative_path, replacements in LOCAL_TEXT_REPLACEMENTS.items():
|
|
||||||
path = WORKSPACE_ROOT / relative_path
|
|
||||||
if not path.exists():
|
|
||||||
continue
|
|
||||||
text = path.read_text(encoding="utf-8")
|
|
||||||
updated = text
|
|
||||||
for old, new in replacements:
|
|
||||||
updated = updated.replace(old, new)
|
|
||||||
if updated != text:
|
|
||||||
path.write_text(updated, encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def build_index(output_path: str) -> None:
|
def build_index(output_path: str) -> None:
|
||||||
run(
|
run(
|
||||||
|
|||||||
@@ -280,12 +280,10 @@
|
|||||||
|
|
||||||
`CPH2583`: 一加 12 北美版
|
`CPH2583`: 一加 12 北美版
|
||||||
|
|
||||||
**一加 Ace 3 (`aston`):**
|
**一加 Ace 3 / 一加 12R (`aston`) / 一加 Ace 3 原神刻晴定制机 / 一加 12R Genshin Impact Edition (`martin`):**
|
||||||
|
|
||||||
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
|
`PJE110`: 一加 Ace 3 国行版 / 原神刻晴定制机
|
||||||
|
|
||||||
**一加 12R (`aston`):**
|
|
||||||
|
|
||||||
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机
|
`CPH2585`: 一加 12R 印度版 / 原神刻晴定制机
|
||||||
|
|
||||||
`CPH2609`: 一加 12R 欧洲版 / 国际版 / 原神刻晴定制机
|
`CPH2609`: 一加 12R 欧洲版 / 国际版 / 原神刻晴定制机
|
||||||
|
|||||||
@@ -279,12 +279,10 @@
|
|||||||
|
|
||||||
`CPH2583`: OnePlus 12 North America
|
`CPH2583`: OnePlus 12 North America
|
||||||
|
|
||||||
**OnePlus Ace 3 (`aston`):**
|
**OnePlus Ace 3 / OnePlus 12R (`aston`) / OnePlus Ace 3 Genshin Impact Edition / OnePlus 12R Genshin Impact Edition (`martin`):**
|
||||||
|
|
||||||
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
|
`PJE110`: OnePlus Ace 3 China / Genshin Impact Edition
|
||||||
|
|
||||||
**OnePlus 12R (`aston`):**
|
|
||||||
|
|
||||||
`CPH2585`: OnePlus 12R India / Genshin Impact Edition
|
`CPH2585`: OnePlus 12R India / Genshin Impact Edition
|
||||||
|
|
||||||
`CPH2609`: OnePlus 12R Europe / Global / Genshin Impact Edition
|
`CPH2609`: OnePlus 12R Europe / Global / Genshin Impact Edition
|
||||||
|
|||||||
Reference in New Issue
Block a user