From b9fa4a257b9b92bc40d86980c226035fe83514bb Mon Sep 17 00:00:00 2001
From: zk <sim18502043706@163.com>
Date: Thu, 2 Apr 2026 16:02:35 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=96=87=E4=BB=B6=E7=B1=BB?=
 =?UTF-8?q?=E5=9E=8B=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/tool/file_parser.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/app/tool/file_parser.py b/app/tool/file_parser.py
index 04a2f9b..2920d35 100644
--- a/app/tool/file_parser.py
+++ b/app/tool/file_parser.py
@@ -24,7 +24,11 @@ def parse_pdf(content: bytes) -> str:
 
 def parse_docx(content: bytes) -> str:
     """解析 Word (.docx) 文件，提取段落和表格文本"""
-    doc = Document(io.BytesIO(content))
+    try:
+        doc = Document(io.BytesIO(content))
+    except Exception:
+        raise ValueError("无法解析该 Word 文件，如果是旧版 .doc 格式，请另存为 .docx 后重试")
+
     text_parts: list[str] = []
 
     # 段落
@@ -60,9 +64,9 @@ def parse_to_text(filename: str, content: bytes) -> str:
 
     if suffix == ".pdf":
         return parse_pdf(content)
-    elif suffix == ".docx":
+    elif suffix in (".docx", ".doc"):
         return parse_docx(content)
     elif suffix == ".txt":
         return parse_txt(content)
     else:
-        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .txt")
+        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .doc, .txt")