Spaces:

JJS341
/

Coreference-Bot

Sleeping

App Files Files Community

Coreference-Bot / app.py

JJS341

Update app.py

ba1029c verified 9 days ago

raw

history blame contribute delete

5.95 kB

	import os
	import sys
	import warnings
	import logging

	# 1. 抑制所有討厭的非同步與資源回收警告 (解決 ValueError: Invalid file descriptor: -1)
	warnings.filterwarnings("ignore", category=RuntimeWarning)
	warnings.filterwarnings("ignore", message="Exception ignored in")
	logging.getLogger("asyncio").setLevel(logging.ERROR)

	# 2. 強制下載 Spacy 英文模型
	os.system(f"{sys.executable} -m spacy download en_core_web_sm")

	# 3. 解決新舊版本 Hub 相容性的 Mock 補丁
	try:
	import huggingface_hub
	if not hasattr(huggingface_hub, 'HfFolder'):
	class MockHfFolder:
	@staticmethod
	def get_token(): return os.getenv("HF_TOKEN")
	@staticmethod
	def save_token(token): pass
	huggingface_hub.HfFolder = MockHfFolder
	except:
	pass

	import gradio as gr
	import spacy
	from fastcoref import FCoref
	from deep_translator import GoogleTranslator

	# 4. 初始化 Spacy NLP 詞性解析器與指代模型
	print("🚀 [System] 正在初始化 NLP 詞性解析器...")
	nlp = spacy.load("en_core_web_sm")

	print("🚀 [System] 正在初始化 F-Coref 核心大腦...")
	try:
	model = FCoref(model_name_or_path='biu-nlp/f-coref', device='cpu')
	print("✅ [System] 所有模型加載成功！")
	except:
	model = FCoref('biu-nlp/f-coref', device='cpu')
	print("✅ [System] 備用路徑加載成功！")

	# 5. 核心運算邏輯
	def coref_learning_pipeline(user_input):
	if not user_input.strip():
	return "等待輸入...", "等待輸入...", "等待輸入..."

	try:
	# A. 判斷語言並進行中翻英橋接
	has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)

	if has_chinese:
	working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
	mode_notice = "中文輸入模式（已啟動 AI 跨語言橋接）"

	# ✨ 關鍵修正：輸入中文時，完整文本翻譯欄位輸出「英文」
	translation_text = f"【英文對照】\n{working_text}"
	else:
	working_text = user_input
	mode_notice = "英文原語模式"

	# ✨ 關鍵修正：輸入英文時，完整文本翻譯欄位輸出「繁體中文」
	translated_zh = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
	translation_text = f"【中文翻譯】\n{translated_zh}"

	# B. 執行 AI 指代消解運算
	preds = model.predict(texts=[working_text])
	clusters = preds[0].get_clusters()

	# C. 建立【AI 智慧單字本】
	vocab_output = ""
	doc = nlp(working_text)
	extracted_words = set()

	for token in doc:
	if token.pos_ == "NOUN" and len(token.text) > 2:
	extracted_words.add(token.lemma_.lower())

	if extracted_words:
	for word in sorted(extracted_words):
	try:
	word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word)
	vocab_output += f"🔸 {word.capitalize()} ➔ {word_zh}\n"
	except:
	pass
	else:
	vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。"

	# D. 生成「AI 語意共指報告」
	report_text = f"✨ 系統狀態：{mode_notice}\n"
	report_text += f"📝 英文運算空間: {working_text}\n"
	report_text += "-----------------------------------------\n"
	if not clusters:
	report_text += "🔍 分析結果：指代關係明確，無需額外消解。"
	else:
	report_text += "🎯【實體連連看鏈結 (Entity Chains)】:\n"
	for i, cluster in enumerate(clusters):
	cluster_str_en = ' ↔ '.join(cluster)
	try:
	translated_items = [GoogleTranslator(source='en', target='zh-TW').translate(item) for item in cluster]
	cluster_str_zh = ' ↔ '.join(translated_items)
	except:
	cluster_str_zh = cluster_str_en
	report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n"
	report_text += f" └─ (英): {cluster_str_en}\n"

	# 💥 這裡非常重要！必須依序回傳給前端的三個框框
	return translation_text, vocab_output, report_text

	except Exception as e:
	return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}"

	# 6. 精美 UI 介面設計 (將 theme 移出 Blocks 構造函數)
	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 AI 跨語言智慧語意學習終端")
	gr.Markdown("### 🚀 專題亮點：結合核心指代消解 (Coreference Resolution) 與 NLP 智慧名詞提取技術")

	with gr.Row():
	with gr.Column(scale=1):
	txt_input = gr.Textbox(
	label="📥 請輸入中文或英文段落 (Input Text)",
	placeholder="例如：Mary is a Farmer. Mary had a little lamb.",
	lines=5
	)
	btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary")

	with gr.Column(scale=1):
	out_translation = gr.Textbox(label="📖 完整文本翻譯/對照 (Translation/Context)", lines=3)
	out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5)
	out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5)

	btn_submit.click(
	fn=coref_learning_pipeline,
	inputs=txt_input,
	outputs=[out_translation, out_vocab, out_report]
	)

	if __name__ == "__main__":
	# ✨ 關鍵修正：按照最新規範，將 theme 傳入 launch() 方法中
	demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"))