Files
MobileModels/delivery/tools/export_mysql_seed.py
2026-03-19 13:00:40 +08:00

282 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""Export MobileModels records into MySQL-friendly seed SQL."""
from __future__ import annotations
import argparse
import re
from pathlib import Path
from typing import Iterable
from device_mapper import (
MARKET_BRAND_ALIASES,
MARKET_BRAND_TO_MANUFACTURER,
build_records,
brand_aliases,
normalize_text,
resolve_parent_brand,
)
from project_layout import DELIVERY_ROOT, WORKSPACE_ROOT
LEGACY_CODE_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9,._/+\\-]{1,63}$")
def is_cn_source_file(source_file: str) -> bool:
return source_file.endswith("_cn.md")
def build_source_order(records: list[object]) -> list[str]:
source_files = sorted({record.source_file for record in records})
cn = [source for source in source_files if is_cn_source_file(source)]
other = [source for source in source_files if not is_cn_source_file(source)]
return sorted(cn) + sorted(other)
def build_source_weights(records: list[object]) -> tuple[dict[str, int], dict[str, float]]:
order = build_source_order(records)
total = len(order)
rank_map: dict[str, int] = {}
weight_map: dict[str, float] = {}
for idx, source_file in enumerate(order):
rank = idx + 1
weight = (((total - idx) / total) * 6) if total > 1 else 6
rank_map[source_file] = rank
weight_map[source_file] = round(weight, 3)
return rank_map, weight_map
def sql_quote(value: object | None) -> str:
if value is None:
return "NULL"
if isinstance(value, bool):
return "1" if value else "0"
if isinstance(value, (int, float)):
return str(value)
text = str(value)
text = text.replace("\\", "\\\\").replace("'", "\\'")
return f"'{text}'"
def batched(items: list[tuple[str, ...]], batch_size: int) -> Iterable[list[tuple[str, ...]]]:
for start in range(0, len(items), batch_size):
yield items[start:start + batch_size]
def build_catalog_rows(records: list[object]) -> list[tuple[str, ...]]:
rank_map, weight_map = build_source_weights(records)
rows = []
seen_keys: set[tuple[str, str]] = set()
for record in records:
aliases = sorted({alias.strip() for alias in record.aliases if alias.strip()})
code_aliases = [alias for alias in aliases if is_legacy_code_alias(alias)]
primary_code = code_aliases[0] if code_aliases else None
other_codes = [alias for alias in code_aliases if alias != primary_code]
code_alias = " | ".join(other_codes) if other_codes else None
version_names = [alias for alias in aliases if not is_legacy_code_alias(alias)]
ver_name = " | ".join(version_names) if version_names else None
for alias in aliases:
alias_norm = normalize_text(alias)
if not alias_norm:
continue
dedupe_key = (record.id, alias_norm)
if dedupe_key in seen_keys:
continue
seen_keys.add(dedupe_key)
rows.append((
sql_quote(record.id),
sql_quote(alias),
sql_quote(alias_norm),
sql_quote(record.device_name),
sql_quote(record.brand),
sql_quote(record.manufacturer_brand),
sql_quote(record.parent_brand),
sql_quote(record.market_brand),
sql_quote(record.device_type),
sql_quote(primary_code),
sql_quote(code_alias),
sql_quote(ver_name),
sql_quote(record.source_file),
sql_quote(record.section),
sql_quote(rank_map[record.source_file]),
sql_quote(f"{weight_map[record.source_file]:.3f}"),
))
rows.sort(key=lambda item: (item[2], item[14], item[0], item[1]))
return rows
def build_brand_rows(records: list[object]) -> list[tuple[str, ...]]:
manufacturer_brands = sorted({record.manufacturer_brand for record in records})
parent_brands = sorted({record.parent_brand for record in records})
rows: dict[tuple[str, str], tuple[str, ...]] = {}
for brand in manufacturer_brands:
parent_brand = resolve_parent_brand(brand)
for alias in brand_aliases(brand):
alias_norm = normalize_text(alias)
if not alias_norm:
continue
rows[(alias_norm, "manufacturer")] = (
sql_quote(alias_norm),
sql_quote("manufacturer"),
sql_quote(brand),
sql_quote(brand),
sql_quote(parent_brand),
sql_quote(None),
)
for brand in parent_brands:
for alias in brand_aliases(brand):
alias_norm = normalize_text(alias)
if not alias_norm:
continue
rows[(alias_norm, "parent")] = (
sql_quote(alias_norm),
sql_quote("parent"),
sql_quote(brand),
sql_quote(None),
sql_quote(brand),
sql_quote(None),
)
for market_brand, aliases in MARKET_BRAND_ALIASES.items():
manufacturer_brand = MARKET_BRAND_TO_MANUFACTURER.get(market_brand, market_brand)
parent_brand = resolve_parent_brand(manufacturer_brand)
for alias in sorted(set([market_brand, *aliases])):
alias_norm = normalize_text(alias)
if not alias_norm:
continue
rows[(alias_norm, "market")] = (
sql_quote(alias_norm),
sql_quote("market"),
sql_quote(market_brand),
sql_quote(manufacturer_brand),
sql_quote(parent_brand),
sql_quote(market_brand),
)
return [rows[key] for key in sorted(rows)]
def is_legacy_code_alias(text: str) -> bool:
value = (text or "").strip()
if not value or not LEGACY_CODE_RE.match(value):
return False
return any(ch.isdigit() for ch in value)
def append_insert_block(lines: list[str], table_name: str, columns: list[str], rows: list[tuple[str, ...]], batch_size: int = 500) -> None:
if not rows:
return
column_sql = ", ".join(f"`{column}`" for column in columns)
for chunk in batched(rows, batch_size):
values_sql = ",\n".join(f" ({', '.join(row)})" for row in chunk)
lines.append(f"INSERT INTO `{table_name}` ({column_sql}) VALUES\n{values_sql};")
lines.append("")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Export MobileModels MySQL seed SQL.")
parser.add_argument(
"--repo-root",
type=Path,
default=WORKSPACE_ROOT,
help="Path to workspace root",
)
parser.add_argument(
"--output",
type=Path,
default=Path("dist/mobilemodels_mysql_seed.sql"),
help="Output SQL path",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
repo_root = args.repo_root.resolve()
output_path = args.output if args.output.is_absolute() else DELIVERY_ROOT / args.output
records = build_records(repo_root)
device_record_count = len(records)
catalog_rows = build_catalog_rows(records)
brand_rows = build_brand_rows(records)
lines = [
"-- MobileModels MySQL seed",
"-- Generated by tools/export_mysql_seed.py",
"USE `mobilemodels`;",
"",
"START TRANSACTION;",
"",
"DELETE FROM `mm_device_catalog`;",
"DELETE FROM `mm_brand_lookup`;",
"",
]
append_insert_block(
lines,
"mm_device_catalog",
[
"record_id",
"model",
"alias_norm",
"device_name",
"brand",
"manufacturer_brand",
"parent_brand",
"market_brand",
"device_type",
"code",
"code_alias",
"ver_name",
"source_file",
"section",
"source_rank",
"source_weight",
],
catalog_rows,
)
append_insert_block(
lines,
"mm_brand_lookup",
[
"alias_norm",
"alias_type",
"canonical_brand",
"manufacturer_brand",
"parent_brand",
"market_brand",
],
brand_rows,
)
lines.extend([
"COMMIT;",
"",
f"-- device_records: {device_record_count}",
f"-- device_catalog_rows: {len(catalog_rows)}",
f"-- device_lookup_rows: {len(catalog_rows)}",
f"-- brand_lookup_rows: {len(brand_rows)}",
f"-- legacy_models_rows: {len(catalog_rows)}",
"",
])
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text("\n".join(lines), encoding="utf-8")
print(f"Exported MySQL seed: {output_path}")
print(f"device_records={device_record_count}")
print(f"device_catalog_rows={len(catalog_rows)}")
print(f"device_lookup_rows={len(catalog_rows)}")
print(f"brand_lookup_rows={len(brand_rows)}")
print(f"legacy_models_rows={len(catalog_rows)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())