import os import sys import warnings import logging # 1. 抑制所有討厭的非同步與資源回收警告 (解決 ValueError: Invalid file descriptor: -1) warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", message="Exception ignored in") logging.getLogger("asyncio").setLevel(logging.ERROR) # 2. 強制下載 Spacy 英文模型 os.system(f"{sys.executable} -m spacy download en_core_web_sm") # 3. 解決新舊版本 Hub 相容性的 Mock 補丁 try: import huggingface_hub if not hasattr(huggingface_hub, 'HfFolder'): class MockHfFolder: @staticmethod def get_token(): return os.getenv("HF_TOKEN") @staticmethod def save_token(token): pass huggingface_hub.HfFolder = MockHfFolder except: pass import gradio as gr import spacy from fastcoref import FCoref from deep_translator import GoogleTranslator # 4. 初始化 Spacy NLP 詞性解析器與指代模型 print("🚀 [System] 正在初始化 NLP 詞性解析器...") nlp = spacy.load("en_core_web_sm") print("🚀 [System] 正在初始化 F-Coref 核心大腦...") try: model = FCoref(model_name_or_path='biu-nlp/f-coref', device='cpu') print("✅ [System] 所有模型加載成功!") except: model = FCoref('biu-nlp/f-coref', device='cpu') print("✅ [System] 備用路徑加載成功!") # 5. 核心運算邏輯 def coref_learning_pipeline(user_input): if not user_input.strip(): return "等待輸入...", "等待輸入...", "等待輸入..." try: # A. 判斷語言並進行中翻英橋接 has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input) if has_chinese: working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input) mode_notice = "中文輸入模式(已啟動 AI 跨語言橋接)" # ✨ 關鍵修正:輸入中文時,完整文本翻譯欄位輸出「英文」 translation_text = f"【英文對照】\n{working_text}" else: working_text = user_input mode_notice = "英文原語模式" # ✨ 關鍵修正:輸入英文時,完整文本翻譯欄位輸出「繁體中文」 translated_zh = GoogleTranslator(source='en', target='zh-TW').translate(working_text) translation_text = f"【中文翻譯】\n{translated_zh}" # B. 執行 AI 指代消解運算 preds = model.predict(texts=[working_text]) clusters = preds[0].get_clusters() # C. 建立【AI 智慧單字本】 vocab_output = "" doc = nlp(working_text) extracted_words = set() for token in doc: if token.pos_ == "NOUN" and len(token.text) > 2: extracted_words.add(token.lemma_.lower()) if extracted_words: for word in sorted(extracted_words): try: word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word) vocab_output += f"🔸 {word.capitalize()} ➔ {word_zh}\n" except: pass else: vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。" # D. 生成「AI 語意共指報告」 report_text = f"✨ 系統狀態:{mode_notice}\n" report_text += f"📝 英文運算空間: {working_text}\n" report_text += "-----------------------------------------\n" if not clusters: report_text += "🔍 分析結果:指代關係明確,無需額外消解。" else: report_text += "🎯【實體連連看鏈結 (Entity Chains)】:\n" for i, cluster in enumerate(clusters): cluster_str_en = ' ↔ '.join(cluster) try: translated_items = [GoogleTranslator(source='en', target='zh-TW').translate(item) for item in cluster] cluster_str_zh = ' ↔ '.join(translated_items) except: cluster_str_zh = cluster_str_en report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n" report_text += f" └─ (英): {cluster_str_en}\n" # 💥 這裡非常重要!必須依序回傳給前端的三個框框 return translation_text, vocab_output, report_text except Exception as e: return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}" # 6. 精美 UI 介面設計 (將 theme 移出 Blocks 構造函數) with gr.Blocks() as demo: gr.Markdown("# 🤖 AI 跨語言智慧語意學習終端") gr.Markdown("### 🚀 專題亮點:結合核心指代消解 (Coreference Resolution) 與 NLP 智慧名詞提取技術") with gr.Row(): with gr.Column(scale=1): txt_input = gr.Textbox( label="📥 請輸入中文或英文段落 (Input Text)", placeholder="例如:Mary is a Farmer. Mary had a little lamb.", lines=5 ) btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary") with gr.Column(scale=1): out_translation = gr.Textbox(label="📖 完整文本翻譯/對照 (Translation/Context)", lines=3) out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5) out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5) btn_submit.click( fn=coref_learning_pipeline, inputs=txt_input, outputs=[out_translation, out_vocab, out_report] ) if __name__ == "__main__": # ✨ 關鍵修正:按照最新規範,將 theme 傳入 launch() 方法中 demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"))