import time
from typing import Optional, Tuple

import cv2
import gradio as gr
import numpy as np

FACE_CASCADE = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")

CUSTOM_CSS = """
:root { --radius-xl: 22px; }
.gradio-container {
    max-width: 1500px !important;
    margin: auto !important;
    background: radial-gradient(circle at 15% 15%, rgba(38, 166, 191, .22), transparent 30%),
                linear-gradient(135deg, #101827 0%, #273142 48%, #10232b 100%) !important;
    color: #eef6ff !important;
}
#component-0, .contain, .block, .panel {
    border-radius: var(--radius-xl) !important;
}
.prose h1, h1 {
    font-size: clamp(2.2rem, 5vw, 4.4rem) !important;
    text-align: center !important;
    letter-spacing: -0.04em !important;
}
.prose p, .prose li { color: #c9d6e8 !important; }
label, .label-wrap span { font-weight: 800 !important; }
button {
    border-radius: 999px !important;
    font-weight: 800 !important;
}
.image-container, .wrap, .block {
    overflow: hidden !important;
}
#status_box textarea, #status_box .prose {
    font-size: 1.05rem !important;
}
@media (max-width: 760px) {
    .gradio-container { padding: 8px !important; }
    h1 { font-size: 2.35rem !important; }
    .image-container img, .image-container video { max-height: 62vh !important; object-fit: contain !important; }
}
"""

HEADER = """
# FaceSense Live
Real-time face boxes for desktop and mobile webcam testing.  
Click **Record** on the camera panel to start live analysis. On phones, allow camera permission; your browser may offer front/rear camera selection.
"""


def draw_corner_box(img: np.ndarray, x: int, y: int, w: int, h: int) -> None:
    color = (0, 238, 255)
    shadow = (6, 18, 28)
    thickness = 4
    line = max(18, int(min(w, h) * 0.22))

    # shadow first for readability
    cv2.rectangle(img, (x, y), (x + w, y + h), shadow, 7)
    # corner style box
    cv2.line(img, (x, y), (x + line, y), color, thickness)
    cv2.line(img, (x, y), (x, y + line), color, thickness)
    cv2.line(img, (x + w, y), (x + w - line, y), color, thickness)
    cv2.line(img, (x + w, y), (x + w, y + line), color, thickness)
    cv2.line(img, (x, y + h), (x + line, y + h), color, thickness)
    cv2.line(img, (x, y + h), (x, y + h - line), color, thickness)
    cv2.line(img, (x + w, y + h), (x + w - line, y + h), color, thickness)
    cv2.line(img, (x + w, y + h), (x + w, y + h - line), color, thickness)

    label = "Face detected"
    cv2.rectangle(img, (x, max(0, y - 38)), (x + 185, y), (0, 145, 175), -1)
    cv2.putText(img, label, (x + 10, max(22, y - 12)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)


def process_frame(frame: Optional[np.ndarray]) -> Tuple[Optional[np.ndarray], str]:
    start = time.perf_counter()
    if frame is None:
        return None, "### Waiting for camera\nClick **Record** on the webcam panel to start live analysis."

    output = frame.copy()
    if output.ndim == 2:
        output = cv2.cvtColor(output, cv2.COLOR_GRAY2RGB)
    if output.shape[-1] == 4:
        output = cv2.cvtColor(output, cv2.COLOR_RGBA2RGB)

    gray = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)
    gray = cv2.equalizeHist(gray)
    faces = FACE_CASCADE.detectMultiScale(
        gray,
        scaleFactor=1.08,
        minNeighbors=5,
        minSize=(55, 55),
        flags=cv2.CASCADE_SCALE_IMAGE,
    )

    for (x, y, w, h) in faces:
        draw_corner_box(output, int(x), int(y), int(w), int(h))

    elapsed_ms = (time.perf_counter() - start) * 1000
    fps = 1000 / elapsed_ms if elapsed_ms > 0 else 0

    if len(faces) == 0:
        status = (
            "### Live status\n"
            "No frontal face detected yet. Move your face toward the center, improve lighting, and keep the camera steady.\n\n"
            f"**Processing:** {elapsed_ms:.1f} ms  \n"
            f"**Approx FPS:** {fps:.1f}"
        )
    else:
        status = (
            "### Live status\n"
            f"**Faces detected:** {len(faces)}  \n"
            "**Phase 1:** bounding boxes only  \n"
            "**Next phase:** facial expression + apparent age range + optional presentation estimate  \n\n"
            f"**Processing:** {elapsed_ms:.1f} ms  \n"
            f"**Approx FPS:** {fps:.1f}"
        )
    return output, status


demo = gr.Interface(
    fn=process_frame,
    inputs=gr.Image(
        label="Camera input",
        sources=["webcam"],
        type="numpy",
        streaming=True,
        mirror_webcam=True,
        height=520,
    ),
    outputs=[
        gr.Image(label="Annotated output", type="numpy", height=520),
        gr.Markdown(label="Status"),
    ],
    title="FaceSense Live",
    description=HEADER,
    live=True,
    css=CUSTOM_CSS,
    allow_flagging="never",
    api_name="predict",
)

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=4).launch()