Fix device type inference and add computer category

This commit is contained in:
2026-04-10 17:54:04 +08:00
parent 0cb08642aa
commit b76f51a1aa
5 changed files with 2920 additions and 2877 deletions
+15 -3
View File
@@ -177,11 +177,17 @@ WEAR_KEYWORDS = [
"glasses",
"眼镜",
]
OTHER_KEYWORDS = [
COMPUTER_KEYWORDS = [
"matebook",
"macbook",
"笔记本",
"电脑",
"laptop",
"notebook",
"desktop",
"workstation",
]
OTHER_KEYWORDS = [
"vision",
"vr",
"ipod",
@@ -248,9 +254,12 @@ def brand_aliases(brand: str) -> List[str]:
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
norm_text = normalize_text(text)
norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
norm_text = " ".join(norm_text.split())
for kw in keywords:
if normalize_text(kw) and normalize_text(kw) in norm_text:
kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
kw_norm = " ".join(kw_norm.split())
if kw_norm and kw_norm in norm_text:
return True
return False
@@ -300,6 +309,8 @@ def infer_device_type(
return "tablet"
if has_keyword(corpus, WEAR_KEYWORDS):
return "wear"
if has_keyword(corpus, COMPUTER_KEYWORDS):
return "computer"
if has_keyword(corpus, OTHER_KEYWORDS):
return "other"
if has_keyword(corpus, PHONE_KEYWORDS):
@@ -385,6 +396,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
section_match = SECTION_RE.match(line)
if section_match:
flush_current()
section = section_match.group(1).strip()
continue