Coreference-Bot / app.py
JJS341's picture
Update app.py
ba1029c verified
import os
import sys
import warnings
import logging
# 1. 抑制所有討厭的非同步與資源回收警告 (解決 ValueError: Invalid file descriptor: -1)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", message="Exception ignored in")
logging.getLogger("asyncio").setLevel(logging.ERROR)
# 2. 強制下載 Spacy 英文模型
os.system(f"{sys.executable} -m spacy download en_core_web_sm")
# 3. 解決新舊版本 Hub 相容性的 Mock 補丁
try:
import huggingface_hub
if not hasattr(huggingface_hub, 'HfFolder'):
class MockHfFolder:
@staticmethod
def get_token(): return os.getenv("HF_TOKEN")
@staticmethod
def save_token(token): pass
huggingface_hub.HfFolder = MockHfFolder
except:
pass
import gradio as gr
import spacy
from fastcoref import FCoref
from deep_translator import GoogleTranslator
# 4. 初始化 Spacy NLP 詞性解析器與指代模型
print("🚀 [System] 正在初始化 NLP 詞性解析器...")
nlp = spacy.load("en_core_web_sm")
print("🚀 [System] 正在初始化 F-Coref 核心大腦...")
try:
model = FCoref(model_name_or_path='biu-nlp/f-coref', device='cpu')
print("✅ [System] 所有模型加載成功!")
except:
model = FCoref('biu-nlp/f-coref', device='cpu')
print("✅ [System] 備用路徑加載成功!")
# 5. 核心運算邏輯
def coref_learning_pipeline(user_input):
if not user_input.strip():
return "等待輸入...", "等待輸入...", "等待輸入..."
try:
# A. 判斷語言並進行中翻英橋接
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)
if has_chinese:
working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
mode_notice = "中文輸入模式(已啟動 AI 跨語言橋接)"
# ✨ 關鍵修正:輸入中文時,完整文本翻譯欄位輸出「英文」
translation_text = f"【英文對照】\n{working_text}"
else:
working_text = user_input
mode_notice = "英文原語模式"
# ✨ 關鍵修正:輸入英文時,完整文本翻譯欄位輸出「繁體中文」
translated_zh = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
translation_text = f"【中文翻譯】\n{translated_zh}"
# B. 執行 AI 指代消解運算
preds = model.predict(texts=[working_text])
clusters = preds[0].get_clusters()
# C. 建立【AI 智慧單字本】
vocab_output = ""
doc = nlp(working_text)
extracted_words = set()
for token in doc:
if token.pos_ == "NOUN" and len(token.text) > 2:
extracted_words.add(token.lemma_.lower())
if extracted_words:
for word in sorted(extracted_words):
try:
word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word)
vocab_output += f"🔸 {word.capitalize()}{word_zh}\n"
except:
pass
else:
vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。"
# D. 生成「AI 語意共指報告」
report_text = f"✨ 系統狀態:{mode_notice}\n"
report_text += f"📝 英文運算空間: {working_text}\n"
report_text += "-----------------------------------------\n"
if not clusters:
report_text += "🔍 分析結果:指代關係明確,無需額外消解。"
else:
report_text += "🎯【實體連連看鏈結 (Entity Chains)】:\n"
for i, cluster in enumerate(clusters):
cluster_str_en = ' ↔ '.join(cluster)
try:
translated_items = [GoogleTranslator(source='en', target='zh-TW').translate(item) for item in cluster]
cluster_str_zh = ' ↔ '.join(translated_items)
except:
cluster_str_zh = cluster_str_en
report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n"
report_text += f" └─ (英): {cluster_str_en}\n"
# 💥 這裡非常重要!必須依序回傳給前端的三個框框
return translation_text, vocab_output, report_text
except Exception as e:
return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}"
# 6. 精美 UI 介面設計 (將 theme 移出 Blocks 構造函數)
with gr.Blocks() as demo:
gr.Markdown("# 🤖 AI 跨語言智慧語意學習終端")
gr.Markdown("### 🚀 專題亮點:結合核心指代消解 (Coreference Resolution) 與 NLP 智慧名詞提取技術")
with gr.Row():
with gr.Column(scale=1):
txt_input = gr.Textbox(
label="📥 請輸入中文或英文段落 (Input Text)",
placeholder="例如:Mary is a Farmer. Mary had a little lamb.",
lines=5
)
btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary")
with gr.Column(scale=1):
out_translation = gr.Textbox(label="📖 完整文本翻譯/對照 (Translation/Context)", lines=3)
out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5)
out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5)
btn_submit.click(
fn=coref_learning_pipeline,
inputs=txt_input,
outputs=[out_translation, out_vocab, out_report]
)
if __name__ == "__main__":
# ✨ 關鍵修正:按照最新規範,將 theme 傳入 launch() 方法中
demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"))