Fix device type inference and add computer category

This commit is contained in:
2026-04-10 17:54:04 +08:00
parent 0cb08642aa
commit b76f51a1aa
5 changed files with 2920 additions and 2877 deletions
+499 -499
View File
File diff suppressed because it is too large Load Diff
+2373 -2373
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -25,7 +25,7 @@ python3 tools/device_mapper.py find --name 'L55M5-AD' --brand Xiaomi
- `brand`: normalized brand - `brand`: normalized brand
- `manufacturer_brand`: manufacturer-level brand - `manufacturer_brand`: manufacturer-level brand
- `market_brand`: market sub-brand - `market_brand`: market sub-brand
- `device_type`: `phone | tablet | wear | tv | other` - `device_type`: `phone | tablet | wear | tv | computer | other`
- `aliases`: all searchable aliases - `aliases`: all searchable aliases
- `lookup`: normalized alias -> candidate `record.id[]` - `lookup`: normalized alias -> candidate `record.id[]`
- `brand_aliases`: normalized brand aliases to filter by app-provided brand - `brand_aliases`: normalized brand aliases to filter by app-provided brand
@@ -54,4 +54,5 @@ Supported categories:
- `tablet` - `tablet`
- `wear` - `wear`
- `tv` - `tv`
- `computer`
- `other` - `other`
+31 -1
View File
@@ -23,6 +23,36 @@ DEALLOCATE PREPARE stmt;
USE `mobilemodels`; USE `mobilemodels`;
SET @drop_stmt = (
SELECT CASE `TABLE_TYPE`
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_catalog`'
WHEN 'VIEW' THEN 'DROP VIEW `mm_device_catalog`'
ELSE 'DO 0'
END
FROM `information_schema`.`TABLES`
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_device_catalog'
LIMIT 1
);
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
PREPARE stmt FROM @drop_stmt;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
SET @drop_stmt = (
SELECT CASE `TABLE_TYPE`
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_brand_lookup`'
WHEN 'VIEW' THEN 'DROP VIEW `mm_brand_lookup`'
ELSE 'DO 0'
END
FROM `information_schema`.`TABLES`
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_brand_lookup'
LIMIT 1
);
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
PREPARE stmt FROM @drop_stmt;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
SET @drop_stmt = ( SET @drop_stmt = (
SELECT CASE `TABLE_TYPE` SELECT CASE `TABLE_TYPE`
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_record`' WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_record`'
@@ -107,7 +137,7 @@ CREATE TABLE IF NOT EXISTS `mm_device_catalog` (
`manufacturer_brand` varchar(64) NOT NULL, `manufacturer_brand` varchar(64) NOT NULL,
`parent_brand` varchar(64) NOT NULL, `parent_brand` varchar(64) NOT NULL,
`market_brand` varchar(64) NOT NULL, `market_brand` varchar(64) NOT NULL,
`device_type` enum('phone','tablet','wear','tv','other') NOT NULL, `device_type` enum('phone','tablet','wear','tv','computer','other') NOT NULL,
`code` varchar(64) DEFAULT NULL, `code` varchar(64) DEFAULT NULL,
`code_alias` varchar(255) DEFAULT NULL, `code_alias` varchar(255) DEFAULT NULL,
`ver_name` text DEFAULT NULL, `ver_name` text DEFAULT NULL,
+15 -3
View File
@@ -177,11 +177,17 @@ WEAR_KEYWORDS = [
"glasses", "glasses",
"眼镜", "眼镜",
] ]
OTHER_KEYWORDS = [ COMPUTER_KEYWORDS = [
"matebook", "matebook",
"macbook",
"笔记本", "笔记本",
"电脑",
"laptop", "laptop",
"notebook", "notebook",
"desktop",
"workstation",
]
OTHER_KEYWORDS = [
"vision", "vision",
"vr", "vr",
"ipod", "ipod",
@@ -248,9 +254,12 @@ def brand_aliases(brand: str) -> List[str]:
def has_keyword(text: str, keywords: Iterable[str]) -> bool: def has_keyword(text: str, keywords: Iterable[str]) -> bool:
norm_text = normalize_text(text) norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
norm_text = " ".join(norm_text.split())
for kw in keywords: for kw in keywords:
if normalize_text(kw) and normalize_text(kw) in norm_text: kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
kw_norm = " ".join(kw_norm.split())
if kw_norm and kw_norm in norm_text:
return True return True
return False return False
@@ -300,6 +309,8 @@ def infer_device_type(
return "tablet" return "tablet"
if has_keyword(corpus, WEAR_KEYWORDS): if has_keyword(corpus, WEAR_KEYWORDS):
return "wear" return "wear"
if has_keyword(corpus, COMPUTER_KEYWORDS):
return "computer"
if has_keyword(corpus, OTHER_KEYWORDS): if has_keyword(corpus, OTHER_KEYWORDS):
return "other" return "other"
if has_keyword(corpus, PHONE_KEYWORDS): if has_keyword(corpus, PHONE_KEYWORDS):
@@ -385,6 +396,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
section_match = SECTION_RE.match(line) section_match = SECTION_RE.match(line)
if section_match: if section_match:
flush_current()
section = section_match.group(1).strip() section = section_match.group(1).strip()
continue continue