添加文件类型处理

2026-04-02 16:02:35 +08:00
parent ff0993e431
commit b9fa4a257b
1 changed files with 7 additions and 3 deletions
@@ -24,7 +24,11 @@ def parse_pdf(content: bytes) -> str:
 def parse_docx(content: bytes) -> str:
    """解析 Word (.docx) 文件，提取段落和表格文本"""
-    doc = Document(io.BytesIO(content))
+    try:
        doc = Document(io.BytesIO(content))
    except Exception:
        raise ValueError("无法解析该 Word 文件，如果是旧版 .doc 格式，请另存为 .docx 后重试")
    text_parts: list[str] = []
    # 段落
@@ -60,9 +64,9 @@ def parse_to_text(filename: str, content: bytes) -> str:
    if suffix == ".pdf":
        return parse_pdf(content)
-    elif suffix == ".docx":
+    elif suffix in (".docx", ".doc"):
        return parse_docx(content)
    elif suffix == ".txt":
        return parse_txt(content)
    else:
-        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .txt")
+        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .doc, .txt")