Fix device type inference and add computer category
This commit is contained in:
Vendored
+499
-499
File diff suppressed because it is too large
Load Diff
Vendored
+2373
-2373
File diff suppressed because it is too large
Load Diff
@@ -25,7 +25,7 @@ python3 tools/device_mapper.py find --name 'L55M5-AD' --brand Xiaomi
|
||||
- `brand`: normalized brand
|
||||
- `manufacturer_brand`: manufacturer-level brand
|
||||
- `market_brand`: market sub-brand
|
||||
- `device_type`: `phone | tablet | wear | tv | other`
|
||||
- `device_type`: `phone | tablet | wear | tv | computer | other`
|
||||
- `aliases`: all searchable aliases
|
||||
- `lookup`: normalized alias -> candidate `record.id[]`
|
||||
- `brand_aliases`: normalized brand aliases to filter by app-provided brand
|
||||
@@ -54,4 +54,5 @@ Supported categories:
|
||||
- `tablet`
|
||||
- `wear`
|
||||
- `tv`
|
||||
- `computer`
|
||||
- `other`
|
||||
|
||||
@@ -23,6 +23,36 @@ DEALLOCATE PREPARE stmt;
|
||||
|
||||
USE `mobilemodels`;
|
||||
|
||||
SET @drop_stmt = (
|
||||
SELECT CASE `TABLE_TYPE`
|
||||
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_catalog`'
|
||||
WHEN 'VIEW' THEN 'DROP VIEW `mm_device_catalog`'
|
||||
ELSE 'DO 0'
|
||||
END
|
||||
FROM `information_schema`.`TABLES`
|
||||
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_device_catalog'
|
||||
LIMIT 1
|
||||
);
|
||||
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
|
||||
PREPARE stmt FROM @drop_stmt;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
|
||||
SET @drop_stmt = (
|
||||
SELECT CASE `TABLE_TYPE`
|
||||
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_brand_lookup`'
|
||||
WHEN 'VIEW' THEN 'DROP VIEW `mm_brand_lookup`'
|
||||
ELSE 'DO 0'
|
||||
END
|
||||
FROM `information_schema`.`TABLES`
|
||||
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_brand_lookup'
|
||||
LIMIT 1
|
||||
);
|
||||
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
|
||||
PREPARE stmt FROM @drop_stmt;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
|
||||
SET @drop_stmt = (
|
||||
SELECT CASE `TABLE_TYPE`
|
||||
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_record`'
|
||||
@@ -107,7 +137,7 @@ CREATE TABLE IF NOT EXISTS `mm_device_catalog` (
|
||||
`manufacturer_brand` varchar(64) NOT NULL,
|
||||
`parent_brand` varchar(64) NOT NULL,
|
||||
`market_brand` varchar(64) NOT NULL,
|
||||
`device_type` enum('phone','tablet','wear','tv','other') NOT NULL,
|
||||
`device_type` enum('phone','tablet','wear','tv','computer','other') NOT NULL,
|
||||
`code` varchar(64) DEFAULT NULL,
|
||||
`code_alias` varchar(255) DEFAULT NULL,
|
||||
`ver_name` text DEFAULT NULL,
|
||||
|
||||
+15
-3
@@ -177,11 +177,17 @@ WEAR_KEYWORDS = [
|
||||
"glasses",
|
||||
"眼镜",
|
||||
]
|
||||
OTHER_KEYWORDS = [
|
||||
COMPUTER_KEYWORDS = [
|
||||
"matebook",
|
||||
"macbook",
|
||||
"笔记本",
|
||||
"电脑",
|
||||
"laptop",
|
||||
"notebook",
|
||||
"desktop",
|
||||
"workstation",
|
||||
]
|
||||
OTHER_KEYWORDS = [
|
||||
"vision",
|
||||
"vr",
|
||||
"ipod",
|
||||
@@ -248,9 +254,12 @@ def brand_aliases(brand: str) -> List[str]:
|
||||
|
||||
|
||||
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
||||
norm_text = normalize_text(text)
|
||||
norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
|
||||
norm_text = " ".join(norm_text.split())
|
||||
for kw in keywords:
|
||||
if normalize_text(kw) and normalize_text(kw) in norm_text:
|
||||
kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
|
||||
kw_norm = " ".join(kw_norm.split())
|
||||
if kw_norm and kw_norm in norm_text:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -300,6 +309,8 @@ def infer_device_type(
|
||||
return "tablet"
|
||||
if has_keyword(corpus, WEAR_KEYWORDS):
|
||||
return "wear"
|
||||
if has_keyword(corpus, COMPUTER_KEYWORDS):
|
||||
return "computer"
|
||||
if has_keyword(corpus, OTHER_KEYWORDS):
|
||||
return "other"
|
||||
if has_keyword(corpus, PHONE_KEYWORDS):
|
||||
@@ -385,6 +396,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
||||
|
||||
section_match = SECTION_RE.match(line)
|
||||
if section_match:
|
||||
flush_current()
|
||||
section = section_match.group(1).strip()
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user