TensorVizion's picture
Update app.py
10f2c4b verified
Raw
History Blame Contribute Delete
14.6 kB
import gradio as gr
import json
import os
import csv
import tempfile
from huggingface_hub import InferenceClient
# Replace this with your exact model repo ID
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
# Securely load the Hugging Face token from Space secrets
hf_token = os.environ.get("HF_TOKEN")
# Initialize the HF inference client with the token
client = InferenceClient(model=MODEL_ID, token=hf_token)
# -------------------------
# Custom CSS Styling
# -------------------------
custom_css = """
.hero-container {
background: linear-gradient(135deg, #6366f1 0%, #14b8a6 100%);
padding: 2.5rem;
border-radius: 20px;
color: white;
margin-bottom: 2rem;
box-shadow: 0 10px 25px -5px rgba(99, 102, 241, 0.2);
}
.hero-container h1 {
color: white !important;
font-size: 2.5rem !important;
font-weight: 800 !important;
margin-bottom: 0.5rem;
text-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.hero-container p {
color: rgba(255, 255, 255, 0.9) !important;
font-size: 1.1rem !important;
}
.primary-btn {
background: linear-gradient(90deg, #6366f1 0%, #14b8a6 100%) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
border-radius: 10px !important;
transition: all 0.3s ease !important;
padding: 12px 24px !important;
}
.primary-btn:hover {
transform: translateY(-2px);
box-shadow: 0 8px 20px -5px rgba(99, 102, 241, 0.4);
}
.secondary-btn {
border-radius: 10px !important;
font-weight: 600 !important;
}
.feedback-card {
border-left: 4px solid #6366f1;
background-color: rgba(99, 102, 241, 0.05);
}
"""
# -------------------------
# Helper & Extraction Logic
# -------------------------
def generate_kpi_html(structured_data):
"""Generates modern, responsive KPI metrics cards dynamically based on JSON data."""
if not structured_data or "error" in structured_data:
return """
<div style='display: flex; justify-content: center; align-items: center; height: 100px; border: 2px dashed var(--border-color-primary, #e5e7eb); border-radius: 12px; color: var(--text-color-subdued, #9ca3af);'>
Await extraction to generate KPI metrics...
</div>
"""
cards_html = ""
if isinstance(structured_data, dict):
# Pick the top 4 attributes to show as metrics
items = list(structured_data.items())[:4]
for key, val in items:
# Clean up the key label
display_key = str(key).replace("_", " ").replace("-", " ").title()
# Format list value representation
if isinstance(val, list):
display_val = ", ".join(map(str, val))
else:
display_val = str(val)
# Truncate if string is too long for the card layout
if len(display_val) > 40:
display_val = display_val[:37] + "..."
# Dynamic highlight accents based on field types
accent_color = "#6366f1" # default Indigo
if any(x in display_key.lower() for x in ["price", "total", "amount", "cost", "revenue", "budget"]):
accent_color = "#10b981" # Emerald for cash/costs
elif any(x in display_key.lower() for x in ["date", "deadline", "due", "time"]):
accent_color = "#f59e0b" # Amber for dates/reminders
elif any(x in display_key.lower() for x in ["status", "priority", "importance"]):
accent_color = "#ef4444" # Crimson for status/alerts
cards_html += f"""
<div style='background: var(--body-background-fill, #ffffff); padding: 1rem; border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05); border: 1px solid var(--border-color-primary, #e5e7eb); border-left: 5px solid {accent_color}; min-width: 140px; flex: 1;'>
<div style='font-size: 0.7rem; color: var(--text-color-subdued, #6b7280); text-transform: uppercase; font-weight: 700; letter-spacing: 0.05em; margin-bottom: 0.25rem;'>{display_key}</div>
<div style='font-size: 1.05rem; color: var(--body-text-color, #111827); font-weight: 800; word-break: break-word;'>{display_val}</div>
</div>
"""
elif isinstance(structured_data, list):
# Summary KPI for array data structures
cards_html = f"""
<div style='background: var(--body-background-fill, #ffffff); padding: 1rem; border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05); border: 1px solid var(--border-color-primary, #e5e7eb); border-left: 5px solid #6366f1; min-width: 140px; flex: 1;'>
<div style='font-size: 0.7rem; color: var(--text-color-subdued, #6b7280); text-transform: uppercase; font-weight: 700; letter-spacing: 0.05em; margin-bottom: 0.25rem;'>Total Records Found</div>
<div style='font-size: 1.5rem; color: var(--body-text-color, #111827); font-weight: 800;'>{len(structured_data)}</div>
</div>
"""
return f"""
<div style='display: flex; flex-wrap: wrap; gap: 0.75rem; margin-bottom: 1rem; width: 100%;'>
{cards_html}
</div>
"""
def extract_data(raw_text, fields_to_extract):
if not hf_token:
err_state = {"error": "HF_TOKEN secret is missing. Please add your Hugging Face Access Token to the Space Secrets."}
return err_state, [["Error", "HF_TOKEN missing"]], generate_kpi_html(err_state)
if not raw_text.strip() or not fields_to_extract.strip():
err_state = {"error": "Please provide both raw text and fields to extract."}
return err_state, [["Error", "Incomplete inputs"]], generate_kpi_html(err_state)
# Construct the system instruction
system_prompt = (
"You are an expert data extraction assistant. Your job is to extract specific "
"information from messy, unstructured text and output it as clean, valid JSON.\n"
"Rules:\n"
"1. Only extract the fields requested.\n"
"2. If a field is not found in the text, return 'null' for that field.\n"
"3. Output ONLY a raw JSON object. Do not include markdown formatting, backticks, or conversational text."
)
user_prompt = f"Fields to extract:\n{fields_to_extract}\n\nUnstructured Text:\n{raw_text}"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
try:
# Call the model via the chat completion API
response = client.chat_completion(
messages=messages,
max_tokens=1024,
temperature=0.1,
)
output_text = response.choices[0].message.content.strip()
# Fallback: Safely strip markdown code blocks without regular expressions
cleaned_text = output_text
if cleaned_text.startswith("```"):
lines = cleaned_text.splitlines()
if len(lines) >= 2:
if lines[0].startswith("```"):
lines = lines[1:]
if lines and lines[-1].strip() == "```":
lines = lines[:-1]
cleaned_text = "\n".join(lines).strip()
# Parse the text into an actual JSON dictionary
structured_data = json.loads(cleaned_text)
# Convert JSON structure to a displayable 2D list for the Table view
table_data = []
if isinstance(structured_data, dict):
for k, v in structured_data.items():
val_str = ", ".join(map(str, v)) if isinstance(v, list) else str(v)
table_data.append([k, val_str])
elif isinstance(structured_data, list):
for idx, item in enumerate(structured_data):
table_data.append([f"Item {idx + 1}", str(item)])
return structured_data, table_data, generate_kpi_html(structured_data)
except json.JSONDecodeError:
error_dict = {
"error": "The model failed to return valid JSON. It returned this instead:",
"raw_output": output_text
}
return error_dict, [["Error", "Invalid JSON parsed"]], generate_kpi_html(error_dict)
except Exception as e:
error_msg = str(e)
if "model_not_found" in error_msg or "does not exist" in error_msg:
err_dict = {
"error": f"The model '{MODEL_ID}' was not found on Hugging Face.",
"troubleshooting": [
"1. Check your Hugging Face repo for typos (case-sensitive).",
"2. Verify HF_TOKEN secret read permissions.",
"3. GGUF or LoRA adapter models are not directly supported by the Serverless API."
]
}
return err_dict, [["Connection Error", "Model Not Found"]], generate_kpi_html(err_dict)
err_state = {"error": error_msg}
return err_state, [["Error", error_msg]], generate_kpi_html(err_state)
def generate_csv(json_data):
"""Converts the JSON output into a downloadable CSV file."""
if not json_data or "error" in json_data:
return None
if isinstance(json_data, dict):
data_list = [json_data]
elif isinstance(json_data, list):
data_list = json_data
else:
return None
# Create a secure temporary file to hold the CSV
temp_dir = tempfile.mkdtemp()
csv_path = os.path.join(temp_dir, "extracted_data.csv")
try:
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
headers = set()
for item in data_list:
if isinstance(item, dict):
headers.update(item.keys())
headers = list(headers)
if not headers:
return None
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
for item in data_list:
if isinstance(item, dict):
flat_item = {k: (str(v) if isinstance(v, (list, dict)) else v) for k, v in item.items()}
writer.writerow(flat_item)
return csv_path
except Exception as e:
return None
# -------------------------
# Build the Gradio UI
# -------------------------
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
# Styled Header Block
with gr.HTML(elem_classes="hero-container"):
gr.Markdown(
f"""
# πŸ›Ÿ The Data Rescuer
Turn messy logs, disorganized lists, automated transcripts, and raw OCR scripts into highly structured business-ready assets β€” powered by `{MODEL_ID}`.
"""
)
with gr.Row():
# Left Column: Inputs
with gr.Column(scale=1):
raw_input = gr.Textbox(
label="1. Paste Unstructured Text",
placeholder="Paste your messy meeting notes, emails, or raw text here...",
lines=12
)
schema_input = gr.Textbox(
label="2. What fields do you want to extract?",
placeholder="e.g., Company Name, Contact Person, Deadline, Action Items (list)",
lines=3
)
extract_btn = gr.Button("πŸš€ Extract Structured Data", variant="primary", elem_classes="primary-btn")
# Right Column: Multi-view Output Panels
with gr.Column(scale=1):
# Dynamic HTML summary cards (Dashboard metrics style)
kpi_output = gr.HTML(
value="""
<div style='display: flex; justify-content: center; align-items: center; height: 100px; border: 2px dashed var(--border-color-primary, #e5e7eb); border-radius: 12px; color: var(--text-color-subdued, #9ca3af);'>
Await extraction to generate KPI metrics...
</div>
"""
)
with gr.Tabs():
with gr.TabItem("πŸ“Š Structured Table"):
table_output = gr.Dataframe(
headers=["Field Name", "Extracted Value"],
datatype=["str", "str"],
interactive=False,
wrap=True
)
with gr.TabItem("πŸ” Raw JSON Tree"):
json_output = gr.JSON(label="JSON Object")
# Action controls below outputs
with gr.Row():
export_btn = gr.Button("πŸ’Ύ Build Export File", variant="secondary", elem_classes="secondary-btn")
csv_output = gr.File(label="Ready for Download", interactive=False)
# -------------------------
# Examples Panel
# -------------------------
gr.Markdown("### Try it out with these examples:")
gr.Examples(
examples=[
[
"Hey guys, quick recap of today's sync. Sarah is going to handle the frontend React components by next Tuesday. John, you need to fix the database migration issue before Friday. Also, our client 'Acme Corp' wants the final delivery by October 15th.",
"Task Owner, Task Description, Deadline, Client Name"
],
[
"Invoice #99214. From: BlueTech Software. To: Jane Doe. Items: 1x Server Maintenance ($500), 2x Cloud Storage ($100 each). Total due: $700. Please pay by end of month.",
"Invoice Number, Sender, Recipient, Items (list of names and prices), Total Amount"
]
],
inputs=[raw_input, schema_input],
label="Click an example to populate the inputs"
)
# -------------------------
# Event Connections
# -------------------------
# 1. Connect extraction button to the Table View, JSON Tree, and KPI output
extract_btn.click(
fn=extract_data,
inputs=[raw_input, schema_input],
outputs=[json_output, table_output, kpi_output]
)
# 2. Connect CSV generation
export_btn.click(
fn=generate_csv,
inputs=[json_output],
outputs=[csv_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch()