Fix device type inference and add computer category
This commit is contained in:
Vendored
+499
-499
File diff suppressed because it is too large
Load Diff
Vendored
+2373
-2373
File diff suppressed because it is too large
Load Diff
@@ -25,7 +25,7 @@ python3 tools/device_mapper.py find --name 'L55M5-AD' --brand Xiaomi
|
|||||||
- `brand`: normalized brand
|
- `brand`: normalized brand
|
||||||
- `manufacturer_brand`: manufacturer-level brand
|
- `manufacturer_brand`: manufacturer-level brand
|
||||||
- `market_brand`: market sub-brand
|
- `market_brand`: market sub-brand
|
||||||
- `device_type`: `phone | tablet | wear | tv | other`
|
- `device_type`: `phone | tablet | wear | tv | computer | other`
|
||||||
- `aliases`: all searchable aliases
|
- `aliases`: all searchable aliases
|
||||||
- `lookup`: normalized alias -> candidate `record.id[]`
|
- `lookup`: normalized alias -> candidate `record.id[]`
|
||||||
- `brand_aliases`: normalized brand aliases to filter by app-provided brand
|
- `brand_aliases`: normalized brand aliases to filter by app-provided brand
|
||||||
@@ -54,4 +54,5 @@ Supported categories:
|
|||||||
- `tablet`
|
- `tablet`
|
||||||
- `wear`
|
- `wear`
|
||||||
- `tv`
|
- `tv`
|
||||||
|
- `computer`
|
||||||
- `other`
|
- `other`
|
||||||
|
|||||||
@@ -23,6 +23,36 @@ DEALLOCATE PREPARE stmt;
|
|||||||
|
|
||||||
USE `mobilemodels`;
|
USE `mobilemodels`;
|
||||||
|
|
||||||
|
SET @drop_stmt = (
|
||||||
|
SELECT CASE `TABLE_TYPE`
|
||||||
|
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_catalog`'
|
||||||
|
WHEN 'VIEW' THEN 'DROP VIEW `mm_device_catalog`'
|
||||||
|
ELSE 'DO 0'
|
||||||
|
END
|
||||||
|
FROM `information_schema`.`TABLES`
|
||||||
|
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_device_catalog'
|
||||||
|
LIMIT 1
|
||||||
|
);
|
||||||
|
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
|
||||||
|
PREPARE stmt FROM @drop_stmt;
|
||||||
|
EXECUTE stmt;
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
|
||||||
|
SET @drop_stmt = (
|
||||||
|
SELECT CASE `TABLE_TYPE`
|
||||||
|
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_brand_lookup`'
|
||||||
|
WHEN 'VIEW' THEN 'DROP VIEW `mm_brand_lookup`'
|
||||||
|
ELSE 'DO 0'
|
||||||
|
END
|
||||||
|
FROM `information_schema`.`TABLES`
|
||||||
|
WHERE `TABLE_SCHEMA` = 'mobilemodels' AND `TABLE_NAME` = 'mm_brand_lookup'
|
||||||
|
LIMIT 1
|
||||||
|
);
|
||||||
|
SET @drop_stmt = COALESCE(@drop_stmt, 'DO 0');
|
||||||
|
PREPARE stmt FROM @drop_stmt;
|
||||||
|
EXECUTE stmt;
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
|
||||||
SET @drop_stmt = (
|
SET @drop_stmt = (
|
||||||
SELECT CASE `TABLE_TYPE`
|
SELECT CASE `TABLE_TYPE`
|
||||||
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_record`'
|
WHEN 'BASE TABLE' THEN 'DROP TABLE `mm_device_record`'
|
||||||
@@ -107,7 +137,7 @@ CREATE TABLE IF NOT EXISTS `mm_device_catalog` (
|
|||||||
`manufacturer_brand` varchar(64) NOT NULL,
|
`manufacturer_brand` varchar(64) NOT NULL,
|
||||||
`parent_brand` varchar(64) NOT NULL,
|
`parent_brand` varchar(64) NOT NULL,
|
||||||
`market_brand` varchar(64) NOT NULL,
|
`market_brand` varchar(64) NOT NULL,
|
||||||
`device_type` enum('phone','tablet','wear','tv','other') NOT NULL,
|
`device_type` enum('phone','tablet','wear','tv','computer','other') NOT NULL,
|
||||||
`code` varchar(64) DEFAULT NULL,
|
`code` varchar(64) DEFAULT NULL,
|
||||||
`code_alias` varchar(255) DEFAULT NULL,
|
`code_alias` varchar(255) DEFAULT NULL,
|
||||||
`ver_name` text DEFAULT NULL,
|
`ver_name` text DEFAULT NULL,
|
||||||
|
|||||||
+15
-3
@@ -177,11 +177,17 @@ WEAR_KEYWORDS = [
|
|||||||
"glasses",
|
"glasses",
|
||||||
"眼镜",
|
"眼镜",
|
||||||
]
|
]
|
||||||
OTHER_KEYWORDS = [
|
COMPUTER_KEYWORDS = [
|
||||||
"matebook",
|
"matebook",
|
||||||
|
"macbook",
|
||||||
"笔记本",
|
"笔记本",
|
||||||
|
"电脑",
|
||||||
"laptop",
|
"laptop",
|
||||||
"notebook",
|
"notebook",
|
||||||
|
"desktop",
|
||||||
|
"workstation",
|
||||||
|
]
|
||||||
|
OTHER_KEYWORDS = [
|
||||||
"vision",
|
"vision",
|
||||||
"vr",
|
"vr",
|
||||||
"ipod",
|
"ipod",
|
||||||
@@ -248,9 +254,12 @@ def brand_aliases(brand: str) -> List[str]:
|
|||||||
|
|
||||||
|
|
||||||
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
||||||
norm_text = normalize_text(text)
|
norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
|
||||||
|
norm_text = " ".join(norm_text.split())
|
||||||
for kw in keywords:
|
for kw in keywords:
|
||||||
if normalize_text(kw) and normalize_text(kw) in norm_text:
|
kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
|
||||||
|
kw_norm = " ".join(kw_norm.split())
|
||||||
|
if kw_norm and kw_norm in norm_text:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -300,6 +309,8 @@ def infer_device_type(
|
|||||||
return "tablet"
|
return "tablet"
|
||||||
if has_keyword(corpus, WEAR_KEYWORDS):
|
if has_keyword(corpus, WEAR_KEYWORDS):
|
||||||
return "wear"
|
return "wear"
|
||||||
|
if has_keyword(corpus, COMPUTER_KEYWORDS):
|
||||||
|
return "computer"
|
||||||
if has_keyword(corpus, OTHER_KEYWORDS):
|
if has_keyword(corpus, OTHER_KEYWORDS):
|
||||||
return "other"
|
return "other"
|
||||||
if has_keyword(corpus, PHONE_KEYWORDS):
|
if has_keyword(corpus, PHONE_KEYWORDS):
|
||||||
@@ -385,6 +396,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
|||||||
|
|
||||||
section_match = SECTION_RE.match(line)
|
section_match = SECTION_RE.match(line)
|
||||||
if section_match:
|
if section_match:
|
||||||
|
flush_current()
|
||||||
section = section_match.group(1).strip()
|
section = section_match.group(1).strip()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user