from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr from gradio import Server from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets import mimetypes from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function from textwrap import dedent from pathlib import Path # load the custom frontend from disk import re # remove thinking tag from response import json, time, uuid # agent-trace logging from datetime import datetime, timezone # ----- Get Model ----- # # Download Q4_K_M GGUF file from the repo model_path = hf_hub_download( repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF", filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL ) # Initialize llama.cpp with the local cached path llm = Llama( model_path=model_path, n_ctx=4096, n_threads=2 ) # ----- Init App ----- # app = gr.Server(title="Code-to-Flowchart Generator") # ----- Agent traces ----- # # Each generation appends one JSON line capturing the full LLM call (input code, # the model's reasoning, output Mermaid + linemap, token usage, latency). # Download the whole log from the running app at /traces . MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL" TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl" def write_trace(record: dict) -> None: try: with open(TRACE_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") except Exception: pass # tracing must never break generation # ----- Functions ----- # # This is a cleaning function to resolve common syntax errors. def quote_labels(text: str) -> str: # Mermaid node labels can't hold raw code characters, so quote-wrap each label body # A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL # operators after a subscript (== < <= > >= != %) are never mistaken for a close. END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))' def esc(body: str) -> str: return (body.replace('"', "'") .replace('[', '[').replace(']', ']') .replace('{', '{').replace('}', '}')) out = [] for line in text.split('\n'): line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line) line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line) out.append(line) return '\n'.join(out) # Parse the model's block into {nodeId: [startLine, endLine]}. # Tolerant of junk lines; drops any entry whose line(s) fall outside the source. def parse_linemap(block: str, num_lines: int) -> dict: out: dict = {} for raw in block.strip().splitlines(): m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw) if not m: continue a = int(m.group(2)) b = int(m.group(3)) if m.group(3) else a if a > b: a, b = b, a if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines: out[m.group(1)] = [a, b] return out @app.api(name="generate_flowchart") def generate_flowchart(src_code: str) -> dict: # check if src_code is empty if not src_code.strip(): return {"mermaid": "", "linemap": {}} # Number the source lines so the model can cite them in the block. src_lines = src_code.splitlines() num_lines = len(src_lines) numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1)) # Set system prompt system_prompt = dedent(""" ## Role/Persona You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff. ## Context/Objective The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance. ## Strict Constraints 1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required block (constraint 5). Nothing else. 2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD. 3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node. 4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0) 5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line. - Here is the flowchart - ```mermaid - ``` - Note: - Explanation: - In this diagram - As requested ## Response Workflow Before outputting the final diagram syntax, perform structural parsing inside a hidden tag according to these steps: 1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw. 2. Map out the execution flow nodes chronologically. 3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications. 4. Ensure no markdown formatting tags leak past the closing tag. ## Few-Shot Examples Input: 1| def check_status(val): 2| if val > 10: 3| return "Active" 4| else: 5| return "Inactive" Output: 1. Control structures: One conditional check, two return branches. 2. Nodes: A Start, B Conditional, C Active return, D Inactive return. 3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5. graph TD A[Start: check_status] --> B{val > 10} B -- True --> C[Return 'Active'] B -- False --> D[Return 'Inactive'] A: 1 B: 2 C: 3 D: 5 """).strip() # Reset the cache per request so no cross-request bleeding llm.reset() # Casting else PyLance gets mad t0 = time.perf_counter() response = cast(Any, llm.create_chat_completion( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": numbered} ], temperature=0.1, # Keep it quite deterministic for now max_tokens=1024, stream=False )) latency_ms = round((time.perf_counter() - t0) * 1000) raw = response["choices"][0]["message"]["content"] usage = response.get("usage", {}) or {} # Capture the model's hidden reasoning for the trace, then strip the tags think = re.search(r'(.*?)', raw, flags=re.DOTALL) reasoning = think.group(1).strip() if think else "" content = re.sub(r'.*?', '', raw, flags=re.DOTALL) # Extract + strip the node→line map, then validate it against the source length linemap: dict = {} lm = re.search(r'(.*?)', content, flags=re.DOTALL) if lm: linemap = parse_linemap(lm.group(1), num_lines) content = content[:lm.start()] + content[lm.end():] # Quote-wrap each node label and escape any leaked code characters mermaid = quote_labels(content).strip() # and remove excess whitespace # ----- Agent trace (append-only JSONL; downloadable at /traces) ----- write_trace({ "id": uuid.uuid4().hex, "ts": datetime.now(timezone.utc).isoformat(), "event": "generate_flowchart", "model": MODEL_NAME, "params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096}, "input": {"src_code": src_code, "num_lines": num_lines}, "reasoning": reasoning, "output": {"raw": raw, "mermaid": mermaid, "linemap": linemap}, "usage": { "prompt_tokens": usage.get("prompt_tokens"), "completion_tokens": usage.get("completion_tokens"), "total_tokens": usage.get("total_tokens"), }, "latency_ms": latency_ms, "status": "ok", }) return {"mermaid": mermaid, "linemap": linemap} # ----- Custom Frontend ----- # # Served from frontend.html so the same file can be opened directly in a # browser (file://) to preview the UI without loading the model. index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8") # Load the custom HTML # / takes precedent over default Blocks UI @app.get("/") def index(): return HTMLResponse(index_html) # Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client, # fonts) locally so the app needs NO external CDN/API at runtime. STATIC_DIR = (Path(__file__).parent / "static").resolve() mimetypes.add_type("text/javascript", ".js") mimetypes.add_type("font/woff2", ".woff2") @app.get("/static/{fname:path}") def static_files(fname: str): fp = (STATIC_DIR / fname).resolve() # contain to STATIC_DIR (no path traversal) and require a real file if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file(): return PlainTextResponse("not found", status_code=404) mt, _ = mimetypes.guess_type(str(fp)) return FileResponse(fp, media_type=mt or "application/octet-stream") # Download every agent trace collected this run (one JSON object per line). # curl https:///traces > agent_traces.jsonl @app.get("/traces") def traces(): text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else "" return PlainTextResponse(text, media_type="application/x-ndjson", headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'}) app.launch(share=False) # no external gradio.live tunnel — fully self-hosted