Fix device type inference and add computer category
This commit is contained in:
+15
-3
@@ -177,11 +177,17 @@ WEAR_KEYWORDS = [
|
||||
"glasses",
|
||||
"眼镜",
|
||||
]
|
||||
OTHER_KEYWORDS = [
|
||||
COMPUTER_KEYWORDS = [
|
||||
"matebook",
|
||||
"macbook",
|
||||
"笔记本",
|
||||
"电脑",
|
||||
"laptop",
|
||||
"notebook",
|
||||
"desktop",
|
||||
"workstation",
|
||||
]
|
||||
OTHER_KEYWORDS = [
|
||||
"vision",
|
||||
"vr",
|
||||
"ipod",
|
||||
@@ -248,9 +254,12 @@ def brand_aliases(brand: str) -> List[str]:
|
||||
|
||||
|
||||
def has_keyword(text: str, keywords: Iterable[str]) -> bool:
|
||||
norm_text = normalize_text(text)
|
||||
norm_text = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", text.lower())
|
||||
norm_text = " ".join(norm_text.split())
|
||||
for kw in keywords:
|
||||
if normalize_text(kw) and normalize_text(kw) in norm_text:
|
||||
kw_norm = re.sub(r"[^0-9a-z\u4e00-\u9fff]+", " ", kw.lower())
|
||||
kw_norm = " ".join(kw_norm.split())
|
||||
if kw_norm and kw_norm in norm_text:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -300,6 +309,8 @@ def infer_device_type(
|
||||
return "tablet"
|
||||
if has_keyword(corpus, WEAR_KEYWORDS):
|
||||
return "wear"
|
||||
if has_keyword(corpus, COMPUTER_KEYWORDS):
|
||||
return "computer"
|
||||
if has_keyword(corpus, OTHER_KEYWORDS):
|
||||
return "other"
|
||||
if has_keyword(corpus, PHONE_KEYWORDS):
|
||||
@@ -385,6 +396,7 @@ def parse_brand_file(path: Path) -> List[DeviceRecord]:
|
||||
|
||||
section_match = SECTION_RE.match(line)
|
||||
if section_match:
|
||||
flush_current()
|
||||
section = section_match.group(1).strip()
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user