from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
from gradio import Server
from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets
import mimetypes
from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function
from textwrap import dedent
from pathlib import Path # load the custom frontend from disk
import re # remove thinking tag from response
import json, time, uuid # agent-trace logging
from datetime import datetime, timezone

# ----- Get Model ----- #
# Download Q4_K_M GGUF file from the repo
model_path = hf_hub_download(
    repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
    filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
)

# Initialize llama.cpp with the local cached path
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2
)

# ----- Init App ----- #
app = gr.Server(title="Code-to-Flowchart Generator")

# ----- Agent traces ----- #
# Each generation appends one JSON line capturing the full LLM call (input code,
# the model's reasoning, output Mermaid + linemap, token usage, latency).
# Download the whole log from the running app at  /traces .
MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL"
TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl"

def write_trace(record: dict) -> None:
    try:
        with open(TRACE_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception:
        pass  # tracing must never break generation

# ----- Functions ----- #

# This is a cleaning function to resolve common syntax errors.
def quote_labels(text: str) -> str:
    # Mermaid node labels can't hold raw code characters, so quote-wrap each label body
    # A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL
    # operators after a subscript (== < <= > >= != %) are never mistaken for a close.
    END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))'

    def esc(body: str) -> str:
        return (body.replace('"', "'")
                    .replace('[', '&#91;').replace(']', '&#93;')
                    .replace('{', '&#123;').replace('}', '&#125;'))

    out = []
    for line in text.split('\n'):
        line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line)
        line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line)
        out.append(line)
    return '\n'.join(out)

# Parse the model's <linemap> block into {nodeId: [startLine, endLine]}.
# Tolerant of junk lines; drops any entry whose line(s) fall outside the source.
def parse_linemap(block: str, num_lines: int) -> dict:
    out: dict = {}
    for raw in block.strip().splitlines():
        m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw)
        if not m:
            continue
        a = int(m.group(2))
        b = int(m.group(3)) if m.group(3) else a
        if a > b:
            a, b = b, a
        if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines:
            out[m.group(1)] = [a, b]
    return out

@app.api(name="generate_flowchart")
def generate_flowchart(src_code: str) -> dict:
    # check if src_code is empty
    if not src_code.strip(): return {"mermaid": "", "linemap": {}}

    # Number the source lines so the model can cite them in the <linemap> block.
    src_lines = src_code.splitlines()
    num_lines = len(src_lines)
    numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1))

    # Set system prompt
    system_prompt = dedent("""
    ## Role/Persona
    You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff.

    ## Context/Objective
    The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance.

    ## Strict Constraints
    <constraints>
    1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required <linemap> block (constraint 5). Nothing else.
    2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD.
    3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node.
    4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0)
    5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a <linemap> block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line.
    </constraints>

    <banned_vocabulary>
    - Here is the flowchart
    - ```mermaid
    - ```
    - Note:
    - Explanation:
    - In this diagram
    - As requested
    </banned_vocabulary>

    ## Response Workflow
    Before outputting the final diagram syntax, perform structural parsing inside a hidden <thinking> tag according to these steps:
    1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw.
    2. Map out the execution flow nodes chronologically.
    3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications.
    4. Ensure no markdown formatting tags leak past the closing </thinking> tag.

    ## Few-Shot Examples

    Input:
    1| def check_status(val):
    2|     if val > 10:
    3|         return "Active"
    4|     else:
    5|         return "Inactive"

    Output:
    <thinking>
    1. Control structures: One conditional check, two return branches.
    2. Nodes: A Start, B Conditional, C Active return, D Inactive return.
    3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5.
    </thinking>
    graph TD
        A[Start: check_status] --> B{val > 10}
        B -- True --> C[Return 'Active']
        B -- False --> D[Return 'Inactive']
    <linemap>
    A: 1
    B: 2
    C: 3
    D: 5
    </linemap>
    """).strip()

    # Reset the cache per request so no cross-request bleeding
    llm.reset()

    # Casting else PyLance gets mad
    t0 = time.perf_counter()
    response = cast(Any, llm.create_chat_completion(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": numbered}
        ],
        temperature=0.1, # Keep it quite deterministic for now
        max_tokens=1024,
        stream=False
    ))
    latency_ms = round((time.perf_counter() - t0) * 1000)

    raw = response["choices"][0]["message"]["content"]
    usage = response.get("usage", {}) or {}

    # Capture the model's hidden reasoning for the trace, then strip the tags
    think = re.search(r'<thinking>(.*?)</thinking>', raw, flags=re.DOTALL)
    reasoning = think.group(1).strip() if think else ""
    content = re.sub(r'<thinking>.*?</thinking>', '', raw, flags=re.DOTALL)

    # Extract + strip the node→line map, then validate it against the source length
    linemap: dict = {}
    lm = re.search(r'<linemap>(.*?)</linemap>', content, flags=re.DOTALL)
    if lm:
        linemap = parse_linemap(lm.group(1), num_lines)
        content = content[:lm.start()] + content[lm.end():]

    # Quote-wrap each node label and escape any leaked code characters
    mermaid = quote_labels(content).strip() # and remove excess whitespace

    # ----- Agent trace (append-only JSONL; downloadable at /traces) -----
    write_trace({
        "id": uuid.uuid4().hex,
        "ts": datetime.now(timezone.utc).isoformat(),
        "event": "generate_flowchart",
        "model": MODEL_NAME,
        "params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096},
        "input": {"src_code": src_code, "num_lines": num_lines},
        "reasoning": reasoning,
        "output": {"raw": raw, "mermaid": mermaid, "linemap": linemap},
        "usage": {
            "prompt_tokens": usage.get("prompt_tokens"),
            "completion_tokens": usage.get("completion_tokens"),
            "total_tokens": usage.get("total_tokens"),
        },
        "latency_ms": latency_ms,
        "status": "ok",
    })

    return {"mermaid": mermaid, "linemap": linemap}

# ----- Custom Frontend ----- #
# Served from frontend.html so the same file can be opened directly in a
# browser (file://) to preview the UI without loading the model.
index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8")

# Load the custom HTML
# / takes precedent over default Blocks UI
@app.get("/")
def index():
    return HTMLResponse(index_html)

# Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client,
# fonts) locally so the app needs NO external CDN/API at runtime.
STATIC_DIR = (Path(__file__).parent / "static").resolve()
mimetypes.add_type("text/javascript", ".js")
mimetypes.add_type("font/woff2", ".woff2")

@app.get("/static/{fname:path}")
def static_files(fname: str):
    fp = (STATIC_DIR / fname).resolve()
    # contain to STATIC_DIR (no path traversal) and require a real file
    if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file():
        return PlainTextResponse("not found", status_code=404)
    mt, _ = mimetypes.guess_type(str(fp))
    return FileResponse(fp, media_type=mt or "application/octet-stream")

# Download every agent trace collected this run (one JSON object per line).
#   curl https://<your-space>/traces > agent_traces.jsonl
@app.get("/traces")
def traces():
    text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else ""
    return PlainTextResponse(text, media_type="application/x-ndjson",
                             headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'})

app.launch(share=False)   # no external gradio.live tunnel — fully self-hosted