Spaces:
Running
Running
Updated README, added tracing capabilities, changed size of flowchart, vendored the animation assets so this meets off the grid.
1433b16 | from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import gradio as gr | |
| from gradio import Server | |
| from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets | |
| import mimetypes | |
| from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function | |
| from textwrap import dedent | |
| from pathlib import Path # load the custom frontend from disk | |
| import re # remove thinking tag from response | |
| import json, time, uuid # agent-trace logging | |
| from datetime import datetime, timezone | |
| # ----- Get Model ----- # | |
| # Download Q4_K_M GGUF file from the repo | |
| model_path = hf_hub_download( | |
| repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF", | |
| filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL | |
| ) | |
| # Initialize llama.cpp with the local cached path | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2 | |
| ) | |
| # ----- Init App ----- # | |
| app = gr.Server(title="Code-to-Flowchart Generator") | |
| # ----- Agent traces ----- # | |
| # Each generation appends one JSON line capturing the full LLM call (input code, | |
| # the model's reasoning, output Mermaid + linemap, token usage, latency). | |
| # Download the whole log from the running app at /traces . | |
| MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL" | |
| TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl" | |
| def write_trace(record: dict) -> None: | |
| try: | |
| with open(TRACE_PATH, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| except Exception: | |
| pass # tracing must never break generation | |
| # ----- Functions ----- # | |
| # This is a cleaning function to resolve common syntax errors. | |
| def quote_labels(text: str) -> str: | |
| # Mermaid node labels can't hold raw code characters, so quote-wrap each label body | |
| # A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL | |
| # operators after a subscript (== < <= > >= != %) are never mistaken for a close. | |
| END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))' | |
| def esc(body: str) -> str: | |
| return (body.replace('"', "'") | |
| .replace('[', '[').replace(']', ']') | |
| .replace('{', '{').replace('}', '}')) | |
| out = [] | |
| for line in text.split('\n'): | |
| line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line) | |
| line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line) | |
| out.append(line) | |
| return '\n'.join(out) | |
| # Parse the model's <linemap> block into {nodeId: [startLine, endLine]}. | |
| # Tolerant of junk lines; drops any entry whose line(s) fall outside the source. | |
| def parse_linemap(block: str, num_lines: int) -> dict: | |
| out: dict = {} | |
| for raw in block.strip().splitlines(): | |
| m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw) | |
| if not m: | |
| continue | |
| a = int(m.group(2)) | |
| b = int(m.group(3)) if m.group(3) else a | |
| if a > b: | |
| a, b = b, a | |
| if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines: | |
| out[m.group(1)] = [a, b] | |
| return out | |
| def generate_flowchart(src_code: str) -> dict: | |
| # check if src_code is empty | |
| if not src_code.strip(): return {"mermaid": "", "linemap": {}} | |
| # Number the source lines so the model can cite them in the <linemap> block. | |
| src_lines = src_code.splitlines() | |
| num_lines = len(src_lines) | |
| numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1)) | |
| # Set system prompt | |
| system_prompt = dedent(""" | |
| ## Role/Persona | |
| You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff. | |
| ## Context/Objective | |
| The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance. | |
| ## Strict Constraints | |
| <constraints> | |
| 1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required <linemap> block (constraint 5). Nothing else. | |
| 2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD. | |
| 3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node. | |
| 4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0) | |
| 5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a <linemap> block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line. | |
| </constraints> | |
| <banned_vocabulary> | |
| - Here is the flowchart | |
| - ```mermaid | |
| - ``` | |
| - Note: | |
| - Explanation: | |
| - In this diagram | |
| - As requested | |
| </banned_vocabulary> | |
| ## Response Workflow | |
| Before outputting the final diagram syntax, perform structural parsing inside a hidden <thinking> tag according to these steps: | |
| 1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw. | |
| 2. Map out the execution flow nodes chronologically. | |
| 3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications. | |
| 4. Ensure no markdown formatting tags leak past the closing </thinking> tag. | |
| ## Few-Shot Examples | |
| Input: | |
| 1| def check_status(val): | |
| 2| if val > 10: | |
| 3| return "Active" | |
| 4| else: | |
| 5| return "Inactive" | |
| Output: | |
| <thinking> | |
| 1. Control structures: One conditional check, two return branches. | |
| 2. Nodes: A Start, B Conditional, C Active return, D Inactive return. | |
| 3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5. | |
| </thinking> | |
| graph TD | |
| A[Start: check_status] --> B{val > 10} | |
| B -- True --> C[Return 'Active'] | |
| B -- False --> D[Return 'Inactive'] | |
| <linemap> | |
| A: 1 | |
| B: 2 | |
| C: 3 | |
| D: 5 | |
| </linemap> | |
| """).strip() | |
| # Reset the cache per request so no cross-request bleeding | |
| llm.reset() | |
| # Casting else PyLance gets mad | |
| t0 = time.perf_counter() | |
| response = cast(Any, llm.create_chat_completion( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": numbered} | |
| ], | |
| temperature=0.1, # Keep it quite deterministic for now | |
| max_tokens=1024, | |
| stream=False | |
| )) | |
| latency_ms = round((time.perf_counter() - t0) * 1000) | |
| raw = response["choices"][0]["message"]["content"] | |
| usage = response.get("usage", {}) or {} | |
| # Capture the model's hidden reasoning for the trace, then strip the tags | |
| think = re.search(r'<thinking>(.*?)</thinking>', raw, flags=re.DOTALL) | |
| reasoning = think.group(1).strip() if think else "" | |
| content = re.sub(r'<thinking>.*?</thinking>', '', raw, flags=re.DOTALL) | |
| # Extract + strip the node→line map, then validate it against the source length | |
| linemap: dict = {} | |
| lm = re.search(r'<linemap>(.*?)</linemap>', content, flags=re.DOTALL) | |
| if lm: | |
| linemap = parse_linemap(lm.group(1), num_lines) | |
| content = content[:lm.start()] + content[lm.end():] | |
| # Quote-wrap each node label and escape any leaked code characters | |
| mermaid = quote_labels(content).strip() # and remove excess whitespace | |
| # ----- Agent trace (append-only JSONL; downloadable at /traces) ----- | |
| write_trace({ | |
| "id": uuid.uuid4().hex, | |
| "ts": datetime.now(timezone.utc).isoformat(), | |
| "event": "generate_flowchart", | |
| "model": MODEL_NAME, | |
| "params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096}, | |
| "input": {"src_code": src_code, "num_lines": num_lines}, | |
| "reasoning": reasoning, | |
| "output": {"raw": raw, "mermaid": mermaid, "linemap": linemap}, | |
| "usage": { | |
| "prompt_tokens": usage.get("prompt_tokens"), | |
| "completion_tokens": usage.get("completion_tokens"), | |
| "total_tokens": usage.get("total_tokens"), | |
| }, | |
| "latency_ms": latency_ms, | |
| "status": "ok", | |
| }) | |
| return {"mermaid": mermaid, "linemap": linemap} | |
| # ----- Custom Frontend ----- # | |
| # Served from frontend.html so the same file can be opened directly in a | |
| # browser (file://) to preview the UI without loading the model. | |
| index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8") | |
| # Load the custom HTML | |
| # / takes precedent over default Blocks UI | |
| def index(): | |
| return HTMLResponse(index_html) | |
| # Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client, | |
| # fonts) locally so the app needs NO external CDN/API at runtime. | |
| STATIC_DIR = (Path(__file__).parent / "static").resolve() | |
| mimetypes.add_type("text/javascript", ".js") | |
| mimetypes.add_type("font/woff2", ".woff2") | |
| def static_files(fname: str): | |
| fp = (STATIC_DIR / fname).resolve() | |
| # contain to STATIC_DIR (no path traversal) and require a real file | |
| if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file(): | |
| return PlainTextResponse("not found", status_code=404) | |
| mt, _ = mimetypes.guess_type(str(fp)) | |
| return FileResponse(fp, media_type=mt or "application/octet-stream") | |
| # Download every agent trace collected this run (one JSON object per line). | |
| # curl https://<your-space>/traces > agent_traces.jsonl | |
| def traces(): | |
| text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else "" | |
| return PlainTextResponse(text, media_type="application/x-ndjson", | |
| headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'}) | |
| app.launch(share=False) # no external gradio.live tunnel — fully self-hosted |