Upload folder using huggingface_hub
#11
by petter2025 - opened
- Dockerfile +1 -5
- README.md +3 -8
- app/api/routes_governance.py +193 -30
- app/api/routes_incidents.py +1 -1
- app/api/routes_users.py +70 -15
- app/core/usage_tracker.py +92 -159
- app/database/models_intents.py +194 -36
- app/services/intent_store.py +76 -6
- app/services/risk_service.py +34 -25
Dockerfile
CHANGED
|
@@ -1,11 +1,7 @@
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
| 3 |
WORKDIR /app
|
| 4 |
-
|
| 5 |
-
ARG CLASSIC_TOKEN=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
|
| 6 |
-
RUN git config --global url."https://x-access-token:${CLASSIC_TOKEN}@github.com/".insteadOf "https://github.com/"
|
| 7 |
-
|
| 8 |
COPY requirements.txt .
|
| 9 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
COPY . .
|
| 11 |
-
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
| 3 |
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
COPY requirements.txt .
|
| 5 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 6 |
COPY . .
|
| 7 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,9 +1,3 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: ARF API Control Plane
|
| 3 |
-
sdk: docker
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: green
|
| 6 |
-
---
|
| 7 |
# arf-api
|
| 8 |
|
| 9 |
ARF API Control Plane (FastAPI)
|
|
@@ -87,7 +81,7 @@ curl -X POST "http://localhost:8000/api/v1/v1/incidents/evaluate" -H "Content-
|
|
| 87 |
"justification": "Causal: If we apply restart_container instead of no_action, latency would change from 600.00 to 510.00 (Δ = -90.00). Based on heuristic causal model.",
|
| 88 |
"confidence": 0.85,
|
| 89 |
"risk_score": 0.54,
|
| 90 |
-
"status": "
|
| 91 |
},
|
| 92 |
"causal_explanation": {
|
| 93 |
"factual_outcome": 600,
|
|
@@ -123,4 +117,5 @@ Notes
|
|
| 123 |
-----
|
| 124 |
|
| 125 |
- The governance endpoints use an in-process `RiskEngine` initialized at startup.
|
| 126 |
-
- The outcome recording endpoint is not implemented in this repository and returns HTTP 501.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# arf-api
|
| 2 |
|
| 3 |
ARF API Control Plane (FastAPI)
|
|
|
|
| 81 |
"justification": "Causal: If we apply restart_container instead of no_action, latency would change from 600.00 to 510.00 (Δ = -90.00). Based on heuristic causal model.",
|
| 82 |
"confidence": 0.85,
|
| 83 |
"risk_score": 0.54,
|
| 84 |
+
"status": "success"
|
| 85 |
},
|
| 86 |
"causal_explanation": {
|
| 87 |
"factual_outcome": 600,
|
|
|
|
| 117 |
-----
|
| 118 |
|
| 119 |
- The governance endpoints use an in-process `RiskEngine` initialized at startup.
|
| 120 |
+
- The outcome recording endpoint is not implemented in this repository and returns HTTP 501.
|
| 121 |
+
|
app/api/routes_governance.py
CHANGED
|
@@ -1,25 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, Request, BackgroundTasks, Header
|
| 2 |
from fastapi.encoders import jsonable_encoder
|
| 3 |
from sqlalchemy.orm import Session
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from app.models.infrastructure_intents import InfrastructureIntentRequest
|
| 5 |
from app.services.intent_adapter import to_oss_intent
|
| 6 |
from app.services.risk_service import evaluate_intent, evaluate_healing_decision
|
| 7 |
from app.services.intent_store import save_evaluated_intent
|
| 8 |
from app.services.outcome_service import record_outcome
|
| 9 |
from app.api.deps import get_db
|
| 10 |
-
from
|
| 11 |
-
import uuid
|
| 12 |
-
import logging
|
| 13 |
-
import time
|
| 14 |
-
from typing import Optional
|
| 15 |
-
|
| 16 |
from agentic_reliability_framework.core.models.event import ReliabilityEvent
|
|
|
|
| 17 |
|
| 18 |
-
# ===== USAGE TRACKER
|
| 19 |
import app.core.usage_tracker
|
| 20 |
from app.core.usage_tracker import UsageRecord
|
| 21 |
|
| 22 |
-
# ===== PRICING CALCULATOR
|
| 23 |
try:
|
| 24 |
from arf_pricing_calculator.storage.buffer import add_event
|
| 25 |
PRICING_AVAILABLE = True
|
|
@@ -27,7 +43,15 @@ except ImportError:
|
|
| 27 |
PRICING_AVAILABLE = False
|
| 28 |
add_event = None
|
| 29 |
|
| 30 |
-
# =====
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
try:
|
| 32 |
from opentelemetry import trace
|
| 33 |
from opentelemetry.trace import Status, StatusCode
|
|
@@ -52,6 +76,86 @@ class HealingDecisionRequest(BaseModel):
|
|
| 52 |
event: ReliabilityEvent
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
@router.post("/intents/evaluate")
|
| 56 |
async def evaluate_intent_endpoint(
|
| 57 |
request: Request,
|
|
@@ -61,9 +165,8 @@ async def evaluate_intent_endpoint(
|
|
| 61 |
idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"),
|
| 62 |
):
|
| 63 |
"""
|
| 64 |
-
Evaluate an infrastructure intent with idempotency
|
| 65 |
"""
|
| 66 |
-
# ── optional trace ──────────────────────────────────────
|
| 67 |
span = None
|
| 68 |
if OTEL_AVAILABLE and _tracer:
|
| 69 |
span = _tracer.start_span("governance.evaluate_intent")
|
|
@@ -75,13 +178,20 @@ async def evaluate_intent_endpoint(
|
|
| 75 |
if not api_key:
|
| 76 |
api_key = request.query_params.get("api_key", "unknown")
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
current_tracker = app.core.usage_tracker.tracker
|
| 79 |
if current_tracker is None:
|
| 80 |
if span:
|
| 81 |
span.set_status(Status(StatusCode.ERROR, "tracker unavailable"))
|
| 82 |
span.end()
|
| 83 |
-
raise HTTPException(status_code=503,
|
| 84 |
-
detail="Usage tracking service unavailable")
|
| 85 |
|
| 86 |
record = UsageRecord(
|
| 87 |
api_key=api_key,
|
|
@@ -102,22 +212,25 @@ async def evaluate_intent_endpoint(
|
|
| 102 |
if existing_response:
|
| 103 |
return existing_response
|
| 104 |
else:
|
| 105 |
-
raise HTTPException(status_code=429,
|
| 106 |
-
detail="Monthly evaluation quota exceeded")
|
| 107 |
|
| 108 |
try:
|
| 109 |
oss_intent = to_oss_intent(intent_req)
|
| 110 |
risk_engine = request.app.state.risk_engine
|
|
|
|
|
|
|
|
|
|
| 111 |
result = evaluate_intent(
|
| 112 |
engine=risk_engine,
|
| 113 |
intent=oss_intent,
|
| 114 |
cost_estimate=intent_req.estimated_cost,
|
| 115 |
-
policy_violations=intent_req.policy_violations
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
if span:
|
| 119 |
span.set_attribute("risk_score", result["risk_score"])
|
| 120 |
-
span.set_attribute("deterministic_id", str(uuid.uuid4()))
|
| 121 |
|
| 122 |
deterministic_id = str(uuid.uuid4())
|
| 123 |
api_payload = jsonable_encoder(intent_req.model_dump())
|
|
@@ -136,6 +249,19 @@ async def evaluate_intent_endpoint(
|
|
| 136 |
result["intent_id"] = deterministic_id
|
| 137 |
response_data = result
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
if current_tracker:
|
| 140 |
background_tasks.add_task(
|
| 141 |
current_tracker._insert_audit_log,
|
|
@@ -172,6 +298,9 @@ async def evaluate_intent_endpoint(
|
|
| 172 |
raise HTTPException(status_code=500, detail=error_msg)
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
|
|
|
| 175 |
@router.post("/intents/outcome")
|
| 176 |
async def record_outcome_endpoint(
|
| 177 |
request: Request,
|
|
@@ -181,7 +310,6 @@ async def record_outcome_endpoint(
|
|
| 181 |
):
|
| 182 |
"""
|
| 183 |
Record an outcome for a previously evaluated intent.
|
| 184 |
-
Idempotent based on deterministic_id and success value (handled in service).
|
| 185 |
Also updates the pricing calculator's calibration buffer if available.
|
| 186 |
"""
|
| 187 |
try:
|
|
@@ -205,19 +333,18 @@ async def record_outcome_endpoint(
|
|
| 205 |
"source": "arf_api_outcome"
|
| 206 |
}
|
| 207 |
add_event(event)
|
| 208 |
-
logger.info(
|
| 209 |
-
f"Added outcome to pricing buffer for intent {
|
| 210 |
-
outcome.deterministic_id}")
|
| 211 |
except Exception as e:
|
| 212 |
-
logger.warning(
|
| 213 |
-
f"Failed to update pricing buffer for intent {
|
| 214 |
-
outcome.deterministic_id}: {e}")
|
| 215 |
|
| 216 |
return {"message": "Outcome recorded", "outcome_id": outcome_record.id}
|
| 217 |
except Exception as e:
|
| 218 |
raise HTTPException(status_code=500, detail=str(e))
|
| 219 |
|
| 220 |
|
|
|
|
|
|
|
|
|
|
| 221 |
@router.post("/healing/evaluate")
|
| 222 |
async def evaluate_healing_decision_endpoint(
|
| 223 |
request: Request,
|
|
@@ -226,9 +353,8 @@ async def evaluate_healing_decision_endpoint(
|
|
| 226 |
idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"),
|
| 227 |
):
|
| 228 |
"""
|
| 229 |
-
Evaluate a healing decision
|
| 230 |
"""
|
| 231 |
-
# ── optional trace ──────────────────────────────────────
|
| 232 |
span = None
|
| 233 |
if OTEL_AVAILABLE and _tracer:
|
| 234 |
span = _tracer.start_span("governance.evaluate_healing")
|
|
@@ -239,13 +365,19 @@ async def evaluate_healing_decision_endpoint(
|
|
| 239 |
if not api_key:
|
| 240 |
api_key = request.query_params.get("api_key", "unknown")
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
current_tracker = app.core.usage_tracker.tracker
|
| 243 |
if current_tracker is None:
|
| 244 |
if span:
|
| 245 |
span.set_status(Status(StatusCode.ERROR, "tracker unavailable"))
|
| 246 |
span.end()
|
| 247 |
-
raise HTTPException(status_code=503,
|
| 248 |
-
detail="Usage tracking service unavailable")
|
| 249 |
|
| 250 |
record = UsageRecord(
|
| 251 |
api_key=api_key,
|
|
@@ -266,8 +398,7 @@ async def evaluate_healing_decision_endpoint(
|
|
| 266 |
if existing_response:
|
| 267 |
return existing_response
|
| 268 |
else:
|
| 269 |
-
raise HTTPException(status_code=429,
|
| 270 |
-
detail="Monthly evaluation quota exceeded")
|
| 271 |
|
| 272 |
try:
|
| 273 |
policy_engine = request.app.state.policy_engine
|
|
@@ -284,6 +415,38 @@ async def evaluate_healing_decision_endpoint(
|
|
| 284 |
tokenizer=tokenizer,
|
| 285 |
)
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
if span:
|
| 288 |
span.set_attribute("risk_score", response_data.get("risk_score", 0.0))
|
| 289 |
span.set_attribute("selected_action", response_data.get("selected_action", "unknown"))
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Routes for governance evaluation – tenant‑aware, audited, and Rust‑enforced.
|
| 3 |
+
|
| 4 |
+
This module provides the primary API endpoints for evaluating infrastructure
|
| 5 |
+
intents and healing decisions. It integrates:
|
| 6 |
+
|
| 7 |
+
- Idempotent quota consumption (usage tracker)
|
| 8 |
+
- Tenant isolation (tenant_id from request.state)
|
| 9 |
+
- Auditable decision logging (DecisionAuditLogDB)
|
| 10 |
+
- Pricing telemetry (optional, to arf‑pricing‑calculator)
|
| 11 |
+
- OpenTelemetry tracing
|
| 12 |
+
- Optional Rust execution ladder for mechanical enforcement
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
from fastapi import APIRouter, Depends, HTTPException, Request, BackgroundTasks, Header
|
| 16 |
from fastapi.encoders import jsonable_encoder
|
| 17 |
from sqlalchemy.orm import Session
|
| 18 |
+
from pydantic import BaseModel
|
| 19 |
+
import uuid
|
| 20 |
+
import logging
|
| 21 |
+
import time
|
| 22 |
+
from typing import Optional, Dict, Any
|
| 23 |
+
|
| 24 |
from app.models.infrastructure_intents import InfrastructureIntentRequest
|
| 25 |
from app.services.intent_adapter import to_oss_intent
|
| 26 |
from app.services.risk_service import evaluate_intent, evaluate_healing_decision
|
| 27 |
from app.services.intent_store import save_evaluated_intent
|
| 28 |
from app.services.outcome_service import record_outcome
|
| 29 |
from app.api.deps import get_db
|
| 30 |
+
from app.database.models_intents import DecisionAuditLogDB, TenantDB # <-- NEW
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
from agentic_reliability_framework.core.models.event import ReliabilityEvent
|
| 32 |
+
from agentic_reliability_framework.core.governance.healing_intent import HealingIntent
|
| 33 |
|
| 34 |
+
# ===== USAGE TRACKER =====
|
| 35 |
import app.core.usage_tracker
|
| 36 |
from app.core.usage_tracker import UsageRecord
|
| 37 |
|
| 38 |
+
# ===== PRICING CALCULATOR =====
|
| 39 |
try:
|
| 40 |
from arf_pricing_calculator.storage.buffer import add_event
|
| 41 |
PRICING_AVAILABLE = True
|
|
|
|
| 43 |
PRICING_AVAILABLE = False
|
| 44 |
add_event = None
|
| 45 |
|
| 46 |
+
# ===== RUST EXECUTION LADDER (optional) =====
|
| 47 |
+
try:
|
| 48 |
+
from arf_enterprise.execution_ladder import ExecutionLadder
|
| 49 |
+
RUST_AVAILABLE = True
|
| 50 |
+
except ImportError:
|
| 51 |
+
RUST_AVAILABLE = False
|
| 52 |
+
ExecutionLadder = None
|
| 53 |
+
|
| 54 |
+
# ===== OPEN TELEMETRY =====
|
| 55 |
try:
|
| 56 |
from opentelemetry import trace
|
| 57 |
from opentelemetry.trace import Status, StatusCode
|
|
|
|
| 76 |
event: ReliabilityEvent
|
| 77 |
|
| 78 |
|
| 79 |
+
# --------------------------------------------------------------------------
|
| 80 |
+
# Helper: write audit log (idempotent)
|
| 81 |
+
# --------------------------------------------------------------------------
|
| 82 |
+
async def write_audit_log(
|
| 83 |
+
db: Session,
|
| 84 |
+
tenant_id: str,
|
| 85 |
+
deterministic_id: str,
|
| 86 |
+
healing_intent: Dict[str, Any],
|
| 87 |
+
trace_id: Optional[str] = None,
|
| 88 |
+
idempotency_key: Optional[str] = None,
|
| 89 |
+
) -> None:
|
| 90 |
+
"""
|
| 91 |
+
Store a governance decision in the immutable audit log.
|
| 92 |
+
Idempotent on (tenant_id, deterministic_id) – if already exists, skip.
|
| 93 |
+
"""
|
| 94 |
+
# Check if already logged (idempotency)
|
| 95 |
+
existing = db.query(DecisionAuditLogDB).filter(
|
| 96 |
+
DecisionAuditLogDB.tenant_id == tenant_id,
|
| 97 |
+
DecisionAuditLogDB.deterministic_id == deterministic_id
|
| 98 |
+
).first()
|
| 99 |
+
if existing:
|
| 100 |
+
logger.info(f"Audit log already exists for {deterministic_id}, skipping.")
|
| 101 |
+
return
|
| 102 |
+
|
| 103 |
+
# Extract fields from HealingIntent (or result dict)
|
| 104 |
+
risk_score = healing_intent.get("risk_score", 0.5)
|
| 105 |
+
action = healing_intent.get("recommended_action", "deny") # approve/deny/escalate
|
| 106 |
+
justification = healing_intent.get("justification", "")
|
| 107 |
+
confidence = healing_intent.get("confidence", 0.85)
|
| 108 |
+
confidence_dist = healing_intent.get("confidence_distribution", {})
|
| 109 |
+
confidence_lower = confidence_dist.get("p5", confidence - 0.1)
|
| 110 |
+
confidence_upper = confidence_dist.get("p95", confidence + 0.1)
|
| 111 |
+
cost_projection = healing_intent.get("cost_projection")
|
| 112 |
+
policy_violations = healing_intent.get("policy_violations", [])
|
| 113 |
+
source = healing_intent.get("source", "advisory_analysis")
|
| 114 |
+
parent_intent_id = healing_intent.get("parent_intent_id")
|
| 115 |
+
root_intent_id = healing_intent.get("root_intent_id")
|
| 116 |
+
ancestor_chain = healing_intent.get("ancestor_chain", [])
|
| 117 |
+
|
| 118 |
+
# Memory and causal fields (usually in metadata)
|
| 119 |
+
metadata = healing_intent.get("metadata", {})
|
| 120 |
+
memory_success_rate = metadata.get("memory_success_rate")
|
| 121 |
+
memory_weight = metadata.get("memory_weight")
|
| 122 |
+
counterfactual = metadata.get("counterfactual")
|
| 123 |
+
epistemic_uncertainty = metadata.get("epistemic_uncertainty") # could be derived from risk_factors
|
| 124 |
+
causal_effect = metadata.get("causal_effect")
|
| 125 |
+
|
| 126 |
+
# Build audit entry
|
| 127 |
+
audit_entry = DecisionAuditLogDB(
|
| 128 |
+
tenant_id=tenant_id,
|
| 129 |
+
deterministic_id=deterministic_id,
|
| 130 |
+
timestamp=datetime.datetime.utcnow(),
|
| 131 |
+
risk_score=risk_score,
|
| 132 |
+
action=action,
|
| 133 |
+
justification=justification,
|
| 134 |
+
recommended_action=action, # same as action for now
|
| 135 |
+
confidence=confidence,
|
| 136 |
+
confidence_lower=confidence_lower,
|
| 137 |
+
confidence_upper=confidence_upper,
|
| 138 |
+
memory_success_rate=memory_success_rate,
|
| 139 |
+
memory_weight=memory_weight,
|
| 140 |
+
counterfactual=counterfactual,
|
| 141 |
+
epistemic_uncertainty=epistemic_uncertainty,
|
| 142 |
+
causal_effect=causal_effect,
|
| 143 |
+
cost_projection=cost_projection,
|
| 144 |
+
policy_violations=policy_violations,
|
| 145 |
+
source=source,
|
| 146 |
+
parent_intent_id=parent_intent_id,
|
| 147 |
+
root_intent_id=root_intent_id,
|
| 148 |
+
ancestor_chain=ancestor_chain,
|
| 149 |
+
trace_id=trace_id,
|
| 150 |
+
)
|
| 151 |
+
db.add(audit_entry)
|
| 152 |
+
db.commit()
|
| 153 |
+
logger.info(f"Audit log written for {deterministic_id}")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# --------------------------------------------------------------------------
|
| 157 |
+
# Endpoint: evaluate infrastructure intent
|
| 158 |
+
# --------------------------------------------------------------------------
|
| 159 |
@router.post("/intents/evaluate")
|
| 160 |
async def evaluate_intent_endpoint(
|
| 161 |
request: Request,
|
|
|
|
| 165 |
idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"),
|
| 166 |
):
|
| 167 |
"""
|
| 168 |
+
Evaluate an infrastructure intent with idempotency, tenant isolation, and audit logging.
|
| 169 |
"""
|
|
|
|
| 170 |
span = None
|
| 171 |
if OTEL_AVAILABLE and _tracer:
|
| 172 |
span = _tracer.start_span("governance.evaluate_intent")
|
|
|
|
| 178 |
if not api_key:
|
| 179 |
api_key = request.query_params.get("api_key", "unknown")
|
| 180 |
|
| 181 |
+
# Get tenant_id from request.state (set by enforce_quota)
|
| 182 |
+
tenant_id = getattr(request.state, "tenant_id", None)
|
| 183 |
+
if not tenant_id:
|
| 184 |
+
if span:
|
| 185 |
+
span.set_status(Status(StatusCode.ERROR, "Missing tenant_id"))
|
| 186 |
+
span.end()
|
| 187 |
+
raise HTTPException(status_code=403, detail="Tenant not identified")
|
| 188 |
+
|
| 189 |
current_tracker = app.core.usage_tracker.tracker
|
| 190 |
if current_tracker is None:
|
| 191 |
if span:
|
| 192 |
span.set_status(Status(StatusCode.ERROR, "tracker unavailable"))
|
| 193 |
span.end()
|
| 194 |
+
raise HTTPException(status_code=503, detail="Usage tracking service unavailable")
|
|
|
|
| 195 |
|
| 196 |
record = UsageRecord(
|
| 197 |
api_key=api_key,
|
|
|
|
| 212 |
if existing_response:
|
| 213 |
return existing_response
|
| 214 |
else:
|
| 215 |
+
raise HTTPException(status_code=429, detail="Monthly evaluation quota exceeded")
|
|
|
|
| 216 |
|
| 217 |
try:
|
| 218 |
oss_intent = to_oss_intent(intent_req)
|
| 219 |
risk_engine = request.app.state.risk_engine
|
| 220 |
+
|
| 221 |
+
# TODO: Modify risk_service.evaluate_intent to accept tenant_id
|
| 222 |
+
# and pass it down to RiskEngine (which will select the correct BetaStore)
|
| 223 |
result = evaluate_intent(
|
| 224 |
engine=risk_engine,
|
| 225 |
intent=oss_intent,
|
| 226 |
cost_estimate=intent_req.estimated_cost,
|
| 227 |
+
policy_violations=intent_req.policy_violations,
|
| 228 |
+
# tenant_id=tenant_id # after modification
|
| 229 |
)
|
| 230 |
|
| 231 |
if span:
|
| 232 |
span.set_attribute("risk_score", result["risk_score"])
|
| 233 |
+
span.set_attribute("deterministic_id", str(uuid.uuid4()))
|
| 234 |
|
| 235 |
deterministic_id = str(uuid.uuid4())
|
| 236 |
api_payload = jsonable_encoder(intent_req.model_dump())
|
|
|
|
| 249 |
result["intent_id"] = deterministic_id
|
| 250 |
response_data = result
|
| 251 |
|
| 252 |
+
# ---- Write audit log (asynchronously) ----
|
| 253 |
+
# Extract the HealingIntent dictionary from result (if not present, construct minimal)
|
| 254 |
+
healing_intent_dict = result.get("healing_intent", result)
|
| 255 |
+
background_tasks.add_task(
|
| 256 |
+
write_audit_log,
|
| 257 |
+
db=db,
|
| 258 |
+
tenant_id=tenant_id,
|
| 259 |
+
deterministic_id=deterministic_id,
|
| 260 |
+
healing_intent=healing_intent_dict,
|
| 261 |
+
trace_id=span.get_span_context().trace_id if span else None,
|
| 262 |
+
idempotency_key=idempotency_key,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
if current_tracker:
|
| 266 |
background_tasks.add_task(
|
| 267 |
current_tracker._insert_audit_log,
|
|
|
|
| 298 |
raise HTTPException(status_code=500, detail=error_msg)
|
| 299 |
|
| 300 |
|
| 301 |
+
# --------------------------------------------------------------------------
|
| 302 |
+
# Endpoint: record outcome (idempotent, pricing)
|
| 303 |
+
# --------------------------------------------------------------------------
|
| 304 |
@router.post("/intents/outcome")
|
| 305 |
async def record_outcome_endpoint(
|
| 306 |
request: Request,
|
|
|
|
| 310 |
):
|
| 311 |
"""
|
| 312 |
Record an outcome for a previously evaluated intent.
|
|
|
|
| 313 |
Also updates the pricing calculator's calibration buffer if available.
|
| 314 |
"""
|
| 315 |
try:
|
|
|
|
| 333 |
"source": "arf_api_outcome"
|
| 334 |
}
|
| 335 |
add_event(event)
|
| 336 |
+
logger.info(f"Added outcome to pricing buffer for intent {outcome.deterministic_id}")
|
|
|
|
|
|
|
| 337 |
except Exception as e:
|
| 338 |
+
logger.warning(f"Failed to update pricing buffer for intent {outcome.deterministic_id}: {e}")
|
|
|
|
|
|
|
| 339 |
|
| 340 |
return {"message": "Outcome recorded", "outcome_id": outcome_record.id}
|
| 341 |
except Exception as e:
|
| 342 |
raise HTTPException(status_code=500, detail=str(e))
|
| 343 |
|
| 344 |
|
| 345 |
+
# --------------------------------------------------------------------------
|
| 346 |
+
# Endpoint: evaluate healing decision (with optional Rust enforcement)
|
| 347 |
+
# --------------------------------------------------------------------------
|
| 348 |
@router.post("/healing/evaluate")
|
| 349 |
async def evaluate_healing_decision_endpoint(
|
| 350 |
request: Request,
|
|
|
|
| 353 |
idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"),
|
| 354 |
):
|
| 355 |
"""
|
| 356 |
+
Evaluate a healing decision, audit it, and optionally enforce via Rust ladder.
|
| 357 |
"""
|
|
|
|
| 358 |
span = None
|
| 359 |
if OTEL_AVAILABLE and _tracer:
|
| 360 |
span = _tracer.start_span("governance.evaluate_healing")
|
|
|
|
| 365 |
if not api_key:
|
| 366 |
api_key = request.query_params.get("api_key", "unknown")
|
| 367 |
|
| 368 |
+
tenant_id = getattr(request.state, "tenant_id", None)
|
| 369 |
+
if not tenant_id:
|
| 370 |
+
if span:
|
| 371 |
+
span.set_status(Status(StatusCode.ERROR, "Missing tenant_id"))
|
| 372 |
+
span.end()
|
| 373 |
+
raise HTTPException(status_code=403, detail="Tenant not identified")
|
| 374 |
+
|
| 375 |
current_tracker = app.core.usage_tracker.tracker
|
| 376 |
if current_tracker is None:
|
| 377 |
if span:
|
| 378 |
span.set_status(Status(StatusCode.ERROR, "tracker unavailable"))
|
| 379 |
span.end()
|
| 380 |
+
raise HTTPException(status_code=503, detail="Usage tracking service unavailable")
|
|
|
|
| 381 |
|
| 382 |
record = UsageRecord(
|
| 383 |
api_key=api_key,
|
|
|
|
| 398 |
if existing_response:
|
| 399 |
return existing_response
|
| 400 |
else:
|
| 401 |
+
raise HTTPException(status_code=429, detail="Monthly evaluation quota exceeded")
|
|
|
|
| 402 |
|
| 403 |
try:
|
| 404 |
policy_engine = request.app.state.policy_engine
|
|
|
|
| 415 |
tokenizer=tokenizer,
|
| 416 |
)
|
| 417 |
|
| 418 |
+
# ---- Optional Rust enforcement ----
|
| 419 |
+
if RUST_AVAILABLE and response_data.get("recommended_action") == "approve":
|
| 420 |
+
try:
|
| 421 |
+
# Convert response_data to a HealingIntent dict (or use the actual HealingIntent object)
|
| 422 |
+
# For simplicity, assume response_data contains the same fields as HealingIntent.to_enterprise_request()
|
| 423 |
+
intent_dict = response_data.get("healing_intent", response_data)
|
| 424 |
+
ladder = ExecutionLadder()
|
| 425 |
+
rust_result = ladder.evaluate(intent_dict)
|
| 426 |
+
if not rust_result.get("allowed", False):
|
| 427 |
+
# Override decision
|
| 428 |
+
response_data["recommended_action"] = "escalate"
|
| 429 |
+
response_data["justification"] = (
|
| 430 |
+
f"Rust enforcement blocked: {rust_result.get('reason', 'gate failure')}"
|
| 431 |
+
)
|
| 432 |
+
response_data["rust_result"] = rust_result
|
| 433 |
+
logger.warning(f"Rust enforcement overrode approval: {rust_result}")
|
| 434 |
+
except Exception as e:
|
| 435 |
+
logger.warning(f"Rust enforcement failed: {e}")
|
| 436 |
+
|
| 437 |
+
# ---- Write audit log ----
|
| 438 |
+
deterministic_id = response_data.get("intent_id", str(uuid.uuid4()))
|
| 439 |
+
healing_intent_dict = response_data.get("healing_intent", response_data)
|
| 440 |
+
background_tasks.add_task(
|
| 441 |
+
write_audit_log,
|
| 442 |
+
db=db,
|
| 443 |
+
tenant_id=tenant_id,
|
| 444 |
+
deterministic_id=deterministic_id,
|
| 445 |
+
healing_intent=healing_intent_dict,
|
| 446 |
+
trace_id=span.get_span_context().trace_id if span else None,
|
| 447 |
+
idempotency_key=idempotency_key,
|
| 448 |
+
)
|
| 449 |
+
|
| 450 |
if span:
|
| 451 |
span.set_attribute("risk_score", response_data.get("risk_score", 0.0))
|
| 452 |
span.set_attribute("selected_action", response_data.get("selected_action", "unknown"))
|
app/api/routes_incidents.py
CHANGED
|
@@ -198,7 +198,7 @@ async def evaluate_incident(
|
|
| 198 |
),
|
| 199 |
"confidence": 1.0 - result.get("uncertainty", 0.0),
|
| 200 |
"risk_score": result["risk_score"],
|
| 201 |
-
"status": "
|
| 202 |
}
|
| 203 |
|
| 204 |
response_data = {
|
|
|
|
| 198 |
),
|
| 199 |
"confidence": 1.0 - result.get("uncertainty", 0.0),
|
| 200 |
"risk_score": result["risk_score"],
|
| 201 |
+
"status": "success",
|
| 202 |
}
|
| 203 |
|
| 204 |
response_data = {
|
app/api/routes_users.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
"""
|
| 2 |
-
User endpoints – registration
|
| 3 |
"""
|
| 4 |
|
| 5 |
import uuid
|
| 6 |
-
from
|
|
|
|
|
|
|
| 7 |
from slowapi import Limiter
|
| 8 |
from slowapi.util import get_remote_address
|
|
|
|
| 9 |
from app.core.usage_tracker import tracker, enforce_quota, Tier
|
|
|
|
|
|
|
| 10 |
|
| 11 |
router = APIRouter(prefix="/users", tags=["users"])
|
| 12 |
|
|
@@ -16,43 +21,93 @@ limiter = Limiter(key_func=get_remote_address, default_limits=["5/hour"])
|
|
| 16 |
|
| 17 |
@router.post("/register")
|
| 18 |
@limiter.limit("5/hour")
|
| 19 |
-
async def register_user(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
"""
|
| 21 |
-
Public endpoint to create a new free‑tier API key.
|
| 22 |
Rate‑limited to 5 requests per hour per IP address.
|
| 23 |
"""
|
| 24 |
if tracker is None:
|
| 25 |
-
raise HTTPException(
|
| 26 |
-
status_code=503,
|
| 27 |
-
detail="Usage tracking not available")
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
#
|
| 33 |
-
|
|
|
|
| 34 |
if not success:
|
|
|
|
|
|
|
|
|
|
| 35 |
raise HTTPException(status_code=500, detail="Failed to create API key")
|
| 36 |
|
| 37 |
return {
|
| 38 |
"api_key": new_key,
|
|
|
|
| 39 |
"tier": "free",
|
| 40 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
@router.get("/quota")
|
| 44 |
async def get_user_quota(
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
"""
|
| 48 |
-
Return the current user's tier
|
| 49 |
Requires API key in Authorization header.
|
| 50 |
"""
|
| 51 |
tier = quota["tier"]
|
| 52 |
remaining = quota["remaining"]
|
| 53 |
limit = tier.monthly_evaluation_limit if tier else None
|
|
|
|
| 54 |
|
| 55 |
return {
|
|
|
|
| 56 |
"tier": tier.value,
|
| 57 |
"remaining": remaining,
|
| 58 |
"limit": limit,
|
|
|
|
| 1 |
"""
|
| 2 |
+
User endpoints – registration, tenant creation, quota information.
|
| 3 |
"""
|
| 4 |
|
| 5 |
import uuid
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from fastapi import APIRouter, Depends, HTTPException, Request, Query
|
| 8 |
+
from sqlalchemy.orm import Session
|
| 9 |
from slowapi import Limiter
|
| 10 |
from slowapi.util import get_remote_address
|
| 11 |
+
|
| 12 |
from app.core.usage_tracker import tracker, enforce_quota, Tier
|
| 13 |
+
from app.api.deps import get_db
|
| 14 |
+
from app.database.models_intents import TenantDB # <-- NEW
|
| 15 |
|
| 16 |
router = APIRouter(prefix="/users", tags=["users"])
|
| 17 |
|
|
|
|
| 21 |
|
| 22 |
@router.post("/register")
|
| 23 |
@limiter.limit("5/hour")
|
| 24 |
+
async def register_user(
|
| 25 |
+
request: Request,
|
| 26 |
+
db: Session = Depends(get_db),
|
| 27 |
+
org_name: str = Query(None, description="Optional organisation name for the new tenant"),
|
| 28 |
+
):
|
| 29 |
"""
|
| 30 |
+
Public endpoint to create a new free‑tier API key and a new tenant.
|
| 31 |
Rate‑limited to 5 requests per hour per IP address.
|
| 32 |
"""
|
| 33 |
if tracker is None:
|
| 34 |
+
raise HTTPException(status_code=503, detail="Usage tracking service not initialised")
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# 1. Create a new tenant in the main database
|
| 37 |
+
tenant_id = str(uuid.uuid4())
|
| 38 |
+
name = org_name or "Default Organization"
|
| 39 |
+
new_tenant = TenantDB(
|
| 40 |
+
id=tenant_id,
|
| 41 |
+
name=name,
|
| 42 |
+
created_at=datetime.utcnow(),
|
| 43 |
+
created_by="self_service"
|
| 44 |
+
)
|
| 45 |
+
db.add(new_tenant)
|
| 46 |
+
db.commit()
|
| 47 |
+
db.refresh(new_tenant)
|
| 48 |
|
| 49 |
+
# 2. Generate a new API key for this tenant
|
| 50 |
+
new_key = f"sk_free_{uuid.uuid4().hex[:24]}"
|
| 51 |
+
success = tracker.get_or_create_api_key(api_key=new_key, tenant_id=tenant_id, tier=Tier.FREE)
|
| 52 |
if not success:
|
| 53 |
+
# Rollback tenant creation if key creation fails
|
| 54 |
+
db.delete(new_tenant)
|
| 55 |
+
db.commit()
|
| 56 |
raise HTTPException(status_code=500, detail="Failed to create API key")
|
| 57 |
|
| 58 |
return {
|
| 59 |
"api_key": new_key,
|
| 60 |
+
"tenant_id": tenant_id,
|
| 61 |
"tier": "free",
|
| 62 |
+
"organization": name,
|
| 63 |
+
"message": "API key and tenant created. Store the key securely – you won't see it again."
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@router.get("/me")
|
| 68 |
+
async def get_current_user_info(
|
| 69 |
+
request: Request,
|
| 70 |
+
quota: dict = Depends(enforce_quota),
|
| 71 |
+
db: Session = Depends(get_db),
|
| 72 |
+
):
|
| 73 |
+
"""
|
| 74 |
+
Return information about the current user's tenant and quota.
|
| 75 |
+
Requires API key in Authorization header.
|
| 76 |
+
"""
|
| 77 |
+
tenant_id = quota.get("tenant_id")
|
| 78 |
+
if not tenant_id:
|
| 79 |
+
raise HTTPException(status_code=403, detail="No tenant associated with this API key")
|
| 80 |
+
|
| 81 |
+
tenant = db.query(TenantDB).filter(TenantDB.id == tenant_id).first()
|
| 82 |
+
if not tenant:
|
| 83 |
+
raise HTTPException(status_code=404, detail="Tenant not found")
|
| 84 |
+
|
| 85 |
+
return {
|
| 86 |
+
"tenant_id": tenant_id,
|
| 87 |
+
"organization": tenant.name,
|
| 88 |
+
"created_at": tenant.created_at.isoformat() if tenant.created_at else None,
|
| 89 |
+
"tier": quota["tier"].value,
|
| 90 |
+
"remaining": quota["remaining"],
|
| 91 |
+
"limit": quota["limit"],
|
| 92 |
+
}
|
| 93 |
|
| 94 |
|
| 95 |
@router.get("/quota")
|
| 96 |
async def get_user_quota(
|
| 97 |
+
request: Request,
|
| 98 |
+
quota: dict = Depends(enforce_quota),
|
| 99 |
+
):
|
| 100 |
"""
|
| 101 |
+
Return the current user's tier, remaining quota, and tenant ID.
|
| 102 |
Requires API key in Authorization header.
|
| 103 |
"""
|
| 104 |
tier = quota["tier"]
|
| 105 |
remaining = quota["remaining"]
|
| 106 |
limit = tier.monthly_evaluation_limit if tier else None
|
| 107 |
+
tenant_id = quota.get("tenant_id")
|
| 108 |
|
| 109 |
return {
|
| 110 |
+
"tenant_id": tenant_id,
|
| 111 |
"tier": tier.value,
|
| 112 |
"remaining": remaining,
|
| 113 |
"limit": limit,
|
app/core/usage_tracker.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
"""
|
| 2 |
Usage Tracker for ARF API – quotas, tiers, and audit logging.
|
| 3 |
Thread‑safe, atomic quota consumption, idempotent, fail‑closed.
|
| 4 |
-
"""
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
import json
|
| 7 |
import sqlite3
|
| 8 |
import threading
|
|
@@ -10,7 +12,7 @@ import time
|
|
| 10 |
from contextlib import contextmanager
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
from dataclasses import dataclass
|
| 13 |
-
from typing import Dict, Any, Optional, List, Tuple
|
| 14 |
from enum import Enum
|
| 15 |
from fastapi import BackgroundTasks, HTTPException, Request
|
| 16 |
|
|
@@ -24,6 +26,7 @@ except ImportError:
|
|
| 24 |
|
| 25 |
|
| 26 |
class Tier(str, Enum):
|
|
|
|
| 27 |
FREE = "free"
|
| 28 |
PRO = "pro"
|
| 29 |
PREMIUM = "premium"
|
|
@@ -31,16 +34,18 @@ class Tier(str, Enum):
|
|
| 31 |
|
| 32 |
@property
|
| 33 |
def monthly_evaluation_limit(self) -> Optional[int]:
|
|
|
|
| 34 |
limits = {
|
| 35 |
Tier.FREE: 1000,
|
| 36 |
Tier.PRO: 10_000,
|
| 37 |
Tier.PREMIUM: 50_000,
|
| 38 |
-
Tier.ENTERPRISE: None,
|
| 39 |
}
|
| 40 |
return limits[self]
|
| 41 |
|
| 42 |
@property
|
| 43 |
def audit_log_retention_days(self) -> int:
|
|
|
|
| 44 |
retention = {
|
| 45 |
Tier.FREE: 7,
|
| 46 |
Tier.PRO: 30,
|
|
@@ -52,7 +57,7 @@ class Tier(str, Enum):
|
|
| 52 |
|
| 53 |
@dataclass
|
| 54 |
class UsageRecord:
|
| 55 |
-
"""Single
|
| 56 |
api_key: str
|
| 57 |
tier: Tier
|
| 58 |
timestamp: float
|
|
@@ -66,6 +71,7 @@ class UsageRecord:
|
|
| 66 |
class UsageTracker:
|
| 67 |
"""
|
| 68 |
Thread‑safe usage tracker with atomic quota consumption and idempotency.
|
|
|
|
| 69 |
"""
|
| 70 |
|
| 71 |
def __init__(self, db_path: str = "arf_usage.db",
|
|
@@ -78,12 +84,11 @@ class UsageTracker:
|
|
| 78 |
if redis_url and REDIS_AVAILABLE:
|
| 79 |
self._redis_client = redis.from_url(redis_url)
|
| 80 |
elif redis_url:
|
| 81 |
-
raise ImportError(
|
| 82 |
-
"Redis client not installed. Run: pip install redis")
|
| 83 |
|
| 84 |
@contextmanager
|
| 85 |
def _get_conn(self):
|
| 86 |
-
"""Get a thread‑local SQLite connection with
|
| 87 |
if not hasattr(self._local, "conn"):
|
| 88 |
self._local.conn = sqlite3.connect(
|
| 89 |
self.db_path, check_same_thread=False, isolation_level=None)
|
|
@@ -92,10 +97,13 @@ class UsageTracker:
|
|
| 92 |
yield self._local.conn
|
| 93 |
|
| 94 |
def _init_db(self):
|
|
|
|
| 95 |
with self._get_conn() as conn:
|
|
|
|
| 96 |
conn.execute("""
|
| 97 |
CREATE TABLE IF NOT EXISTS api_keys (
|
| 98 |
key TEXT PRIMARY KEY,
|
|
|
|
| 99 |
tier TEXT NOT NULL,
|
| 100 |
created_at REAL NOT NULL,
|
| 101 |
last_used_at REAL,
|
|
@@ -139,16 +147,31 @@ class UsageTracker:
|
|
| 139 |
def _get_month_key(self) -> str:
|
| 140 |
return datetime.now().strftime("%Y-%m")
|
| 141 |
|
| 142 |
-
def get_or_create_api_key(self, key: str, tier: Tier = Tier.FREE) -> bool:
|
| 143 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
with self._get_conn() as conn:
|
| 145 |
row = conn.execute(
|
| 146 |
"SELECT key FROM api_keys WHERE key = ?", (key,)).fetchone()
|
| 147 |
if row:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return True
|
| 149 |
conn.execute(
|
| 150 |
-
"INSERT INTO api_keys (key, tier, created_at, is_active) VALUES (?, ?, ?, ?)",
|
| 151 |
-
(key, tier.value, time.time(), 1)
|
| 152 |
)
|
| 153 |
conn.commit()
|
| 154 |
return True
|
|
@@ -164,6 +187,17 @@ class UsageTracker:
|
|
| 164 |
return None
|
| 165 |
return Tier(row["tier"])
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
def update_api_key_tier(self, api_key: str, new_tier: Tier) -> bool:
|
| 168 |
"""Update the tier of an existing API key. Returns True if successful."""
|
| 169 |
with self._get_conn() as conn:
|
|
@@ -173,41 +207,28 @@ class UsageTracker:
|
|
| 173 |
return False
|
| 174 |
conn.execute(
|
| 175 |
"UPDATE api_keys SET tier = ? WHERE key = ?",
|
| 176 |
-
(new_tier.value,
|
| 177 |
-
api_key))
|
| 178 |
conn.commit()
|
| 179 |
return True
|
| 180 |
|
| 181 |
# --------------------------------------------------------------------------
|
| 182 |
-
# Atomic quota consumption
|
| 183 |
# --------------------------------------------------------------------------
|
| 184 |
-
def _consume_quota_atomic_sqlite(
|
| 185 |
-
self,
|
| 186 |
-
api_key: str,
|
| 187 |
-
tier: Tier,
|
| 188 |
-
month: str) -> bool: # noqa: E501
|
| 189 |
-
"""
|
| 190 |
-
Atomically increment counter only if under limit.
|
| 191 |
-
Returns True if quota was consumed, False if limit reached.
|
| 192 |
-
"""
|
| 193 |
limit = tier.monthly_evaluation_limit
|
| 194 |
if limit is None:
|
| 195 |
-
# Unlimited – still increment for tracking but always succeed
|
| 196 |
with self._get_conn() as conn:
|
| 197 |
conn.execute(
|
| 198 |
-
"
|
| 199 |
-
|
| 200 |
-
ON CONFLICT(api_key, year_month) DO UPDATE SET count = count + 1""",
|
| 201 |
(api_key, month)
|
| 202 |
)
|
| 203 |
conn.commit()
|
| 204 |
return True
|
| 205 |
|
| 206 |
-
# Use BEGIN IMMEDIATE to lock the database for the transaction
|
| 207 |
with self._get_conn() as conn:
|
| 208 |
conn.execute("BEGIN IMMEDIATE")
|
| 209 |
try:
|
| 210 |
-
# Get current count (or 0)
|
| 211 |
row = conn.execute(
|
| 212 |
"SELECT count FROM monthly_counts WHERE api_key = ? AND year_month = ?",
|
| 213 |
(api_key, month)
|
|
@@ -216,11 +237,9 @@ class UsageTracker:
|
|
| 216 |
if current >= limit:
|
| 217 |
conn.rollback()
|
| 218 |
return False
|
| 219 |
-
# Increment
|
| 220 |
conn.execute(
|
| 221 |
-
"
|
| 222 |
-
|
| 223 |
-
ON CONFLICT(api_key, year_month) DO UPDATE SET count = count + 1""",
|
| 224 |
(api_key, month)
|
| 225 |
)
|
| 226 |
conn.commit()
|
|
@@ -229,15 +248,9 @@ class UsageTracker:
|
|
| 229 |
conn.rollback()
|
| 230 |
raise
|
| 231 |
|
| 232 |
-
def _consume_quota_atomic_redis(
|
| 233 |
-
self,
|
| 234 |
-
api_key: str,
|
| 235 |
-
tier: Tier,
|
| 236 |
-
month: str) -> bool:
|
| 237 |
-
"""Atomic Lua script for Redis: INCR only if below limit."""
|
| 238 |
limit = tier.monthly_evaluation_limit
|
| 239 |
if limit is None:
|
| 240 |
-
# Unlimited – just increment and return True
|
| 241 |
redis_key = f"arf:quota:{api_key}:{month}"
|
| 242 |
self._redis_client.incr(redis_key)
|
| 243 |
self._redis_client.expire(redis_key, timedelta(days=31))
|
|
@@ -251,7 +264,7 @@ class UsageTracker:
|
|
| 251 |
return 0
|
| 252 |
end
|
| 253 |
local new = redis.call('INCR', key)
|
| 254 |
-
redis.call('EXPIRE', key, 2678400)
|
| 255 |
return 1
|
| 256 |
"""
|
| 257 |
redis_key = f"arf:quota:{api_key}:{month}"
|
|
@@ -259,144 +272,83 @@ class UsageTracker:
|
|
| 259 |
return result == 1
|
| 260 |
|
| 261 |
# --------------------------------------------------------------------------
|
| 262 |
-
# Idempotency handling
|
| 263 |
# --------------------------------------------------------------------------
|
| 264 |
def _is_idempotent_key_used(self, key: str) -> bool:
|
| 265 |
-
"""Check if idempotency key already processed."""
|
| 266 |
with self._get_conn() as conn:
|
| 267 |
row = conn.execute(
|
| 268 |
"SELECT 1 FROM idempotency_keys WHERE key = ?", (key,)).fetchone()
|
| 269 |
return row is not None
|
| 270 |
|
| 271 |
def _mark_idempotent_key_used(self, key: str, ttl_seconds: int = 86400):
|
| 272 |
-
"""Store idempotency key with expiration (cleanup later)."""
|
| 273 |
with self._get_conn() as conn:
|
| 274 |
conn.execute(
|
| 275 |
"INSERT INTO idempotency_keys (key, consumed_at) VALUES (?, ?)",
|
| 276 |
(key, time.time())
|
| 277 |
)
|
| 278 |
conn.commit()
|
| 279 |
-
# Optionally schedule cleanup of old keys (can be done in a background
|
| 280 |
-
# thread)
|
| 281 |
|
| 282 |
# --------------------------------------------------------------------------
|
| 283 |
-
# Core usage recording (atomic + idempotent)
|
| 284 |
# --------------------------------------------------------------------------
|
| 285 |
-
def consume_quota_and_log(
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
| 290 |
-
"""
|
| 291 |
-
Atomically consume quota and insert audit log.
|
| 292 |
-
Returns (success, existing_response) where existing_response is not None
|
| 293 |
-
only when idempotency_key matched a previous successful call.
|
| 294 |
-
"""
|
| 295 |
-
# Idempotency check (if key provided)
|
| 296 |
-
if idempotency_key:
|
| 297 |
-
if self._is_idempotent_key_used(idempotency_key):
|
| 298 |
-
# Retrieve previous response from audit log (simplified – you may cache full response)
|
| 299 |
-
# For full idempotency, we would store the response body in idempotency table.
|
| 300 |
-
# Here we return a marker that caller should use cached
|
| 301 |
-
# response.
|
| 302 |
-
return False, {"idempotent": True,
|
| 303 |
-
"message": "Already processed"}
|
| 304 |
|
| 305 |
month = self._get_month_key()
|
| 306 |
-
# Atomic quota consumption
|
| 307 |
if self._redis_client:
|
| 308 |
-
quota_ok = self._consume_quota_atomic_redis(
|
| 309 |
-
record.api_key, record.tier, month)
|
| 310 |
else:
|
| 311 |
-
quota_ok = self._consume_quota_atomic_sqlite(
|
| 312 |
-
record.api_key, record.tier, month)
|
| 313 |
|
| 314 |
if not quota_ok:
|
| 315 |
return False, None
|
| 316 |
|
| 317 |
-
# Insert audit log (with idempotency key as unique constraint)
|
| 318 |
try:
|
| 319 |
with self._get_conn() as conn:
|
| 320 |
conn.execute(
|
| 321 |
"""INSERT INTO usage_log
|
| 322 |
-
(api_key, tier, timestamp, endpoint,
|
| 323 |
-
request_body, response, error, processing_ms,
|
| 324 |
-
idempotency_key)
|
| 325 |
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
| 326 |
-
(record.api_key,
|
| 327 |
-
record.
|
| 328 |
-
record.
|
| 329 |
-
record.
|
| 330 |
-
|
| 331 |
-
record.request_body) if record.request_body else None,
|
| 332 |
-
json.dumps(
|
| 333 |
-
record.response) if record.response else None,
|
| 334 |
-
record.error,
|
| 335 |
-
record.processing_ms,
|
| 336 |
-
idempotency_key,
|
| 337 |
-
))
|
| 338 |
conn.commit()
|
| 339 |
except sqlite3.IntegrityError as e:
|
| 340 |
-
# Duplicate idempotency_key – already inserted by another
|
| 341 |
-
# concurrent request
|
| 342 |
if "UNIQUE constraint failed: usage_log.idempotency_key" in str(e):
|
| 343 |
-
return False, {"idempotent": True,
|
| 344 |
-
"message": "Already processed"}
|
| 345 |
raise
|
| 346 |
|
| 347 |
if idempotency_key:
|
| 348 |
self._mark_idempotent_key_used(idempotency_key)
|
| 349 |
-
# Removed stray # noqa: E501 comment that was wrongly indented here
|
| 350 |
return True, None
|
| 351 |
|
| 352 |
# --------------------------------------------------------------------------
|
| 353 |
-
# Legacy interface (kept for compatibility
|
| 354 |
# --------------------------------------------------------------------------
|
| 355 |
-
def increment_usage_sync(
|
| 356 |
-
self,
|
| 357 |
-
record: UsageRecord,
|
| 358 |
-
idempotency_key: Optional[str] = None) -> bool:
|
| 359 |
-
"""
|
| 360 |
-
Synchronously record usage and increment counter.
|
| 361 |
-
Returns True if within quota and recorded, False otherwise.
|
| 362 |
-
This method now uses the atomic implementation.
|
| 363 |
-
"""
|
| 364 |
success, _ = self.consume_quota_and_log(record, idempotency_key)
|
| 365 |
return success
|
| 366 |
|
| 367 |
-
async def increment_usage_async(
|
| 368 |
-
|
| 369 |
-
record: UsageRecord,
|
| 370 |
-
background_tasks: BackgroundTasks,
|
| 371 |
-
idempotency_key: Optional[str] = None
|
| 372 |
-
) -> bool:
|
| 373 |
-
"""
|
| 374 |
-
Asynchronously record usage using FastAPI BackgroundTasks.
|
| 375 |
-
Still does the atomic check synchronously, then schedules the insert.
|
| 376 |
-
"""
|
| 377 |
-
# First, do atomic quota check (synchronous) – we must ensure we don't double-consume.
|
| 378 |
-
# Because background tasks may run later, we still need to reserve quota now.
|
| 379 |
-
# Simplified: we call consume_quota_and_log synchronously – that defeats async benefit.
|
| 380 |
-
# Better to use a queue or Redis with background processing.
|
| 381 |
-
# For this fix, we'll use the sync method (blocking) but still support
|
| 382 |
-
# idempotency.
|
| 383 |
return self.increment_usage_sync(record, idempotency_key)
|
| 384 |
|
| 385 |
# --------------------------------------------------------------------------
|
| 386 |
-
# Quota inspection
|
| 387 |
# --------------------------------------------------------------------------
|
| 388 |
def get_remaining_quota(self, api_key: str, tier: Tier) -> Optional[int]:
|
| 389 |
-
"""Return remaining evaluations for the month (non‑atomic, for info only)."""
|
| 390 |
limit = tier.monthly_evaluation_limit
|
| 391 |
if limit is None:
|
| 392 |
return None
|
| 393 |
-
|
| 394 |
month = self._get_month_key()
|
| 395 |
if self._redis_client:
|
| 396 |
redis_key = f"arf:quota:{api_key}:{month}"
|
| 397 |
count = int(self._redis_client.get(redis_key) or 0)
|
| 398 |
return max(0, limit - count)
|
| 399 |
-
|
| 400 |
with self._get_conn() as conn:
|
| 401 |
row = conn.execute(
|
| 402 |
"SELECT count FROM monthly_counts WHERE api_key = ? AND year_month = ?",
|
|
@@ -406,16 +358,10 @@ class UsageTracker:
|
|
| 406 |
return max(0, limit - count)
|
| 407 |
|
| 408 |
# --------------------------------------------------------------------------
|
| 409 |
-
# Audit and maintenance
|
| 410 |
# --------------------------------------------------------------------------
|
| 411 |
-
def get_audit_logs(
|
| 412 |
-
|
| 413 |
-
api_key: str,
|
| 414 |
-
start_date: Optional[datetime] = None,
|
| 415 |
-
end_date: Optional[datetime] = None,
|
| 416 |
-
limit: int = 100,
|
| 417 |
-
) -> List[Dict[str, Any]]:
|
| 418 |
-
"""Retrieve audit logs for a given API key."""
|
| 419 |
query = "SELECT * FROM usage_log WHERE api_key = ?"
|
| 420 |
params = [api_key]
|
| 421 |
if start_date:
|
|
@@ -426,47 +372,36 @@ class UsageTracker:
|
|
| 426 |
params.append(end_date.timestamp())
|
| 427 |
query += " ORDER BY timestamp DESC LIMIT ?"
|
| 428 |
params.append(limit)
|
| 429 |
-
|
| 430 |
with self._get_conn() as conn:
|
| 431 |
rows = conn.execute(query, params).fetchall()
|
| 432 |
return [dict(row) for row in rows]
|
| 433 |
|
| 434 |
def clean_old_logs(self):
|
| 435 |
-
"""Delete logs older than retention period for each tier, and old idempotency keys."""
|
| 436 |
with self._get_conn() as conn:
|
| 437 |
-
# Delete old usage logs
|
| 438 |
for tier in Tier:
|
| 439 |
retention_days = tier.audit_log_retention_days
|
| 440 |
-
if retention_days is None:
|
| 441 |
-
continue
|
| 442 |
cutoff = time.time() - retention_days * 86400
|
| 443 |
conn.execute(
|
| 444 |
"DELETE FROM usage_log WHERE tier = ? AND timestamp < ?",
|
| 445 |
(tier.value, cutoff)
|
| 446 |
)
|
| 447 |
-
# Delete idempotency keys older than 7 days
|
| 448 |
cutoff = time.time() - 7 * 86400
|
| 449 |
-
conn.execute(
|
| 450 |
-
"DELETE FROM idempotency_keys WHERE consumed_at < ?", (cutoff,))
|
| 451 |
conn.commit()
|
| 452 |
|
| 453 |
|
| 454 |
# --------------------------------------------------------------------------
|
| 455 |
-
# Global instance and FastAPI dependency
|
| 456 |
# --------------------------------------------------------------------------
|
| 457 |
tracker: Optional[UsageTracker] = None
|
| 458 |
|
| 459 |
|
| 460 |
-
def init_tracker(
|
| 461 |
-
db_path: str = "arf_usage.db",
|
| 462 |
-
redis_url: Optional[str] = None):
|
| 463 |
-
"""Initialize the global tracker. Must be called before enforce_quota."""
|
| 464 |
global tracker
|
| 465 |
tracker = UsageTracker(db_path, redis_url)
|
| 466 |
|
| 467 |
|
| 468 |
def update_key_tier(api_key: str, new_tier: Tier) -> bool:
|
| 469 |
-
"""Globally accessible helper to update API key tier."""
|
| 470 |
if tracker is None:
|
| 471 |
return False
|
| 472 |
return tracker.update_api_key_tier(api_key, new_tier)
|
|
@@ -474,16 +409,11 @@ def update_key_tier(api_key: str, new_tier: Tier) -> bool:
|
|
| 474 |
|
| 475 |
async def enforce_quota(request: Request, api_key: str = None):
|
| 476 |
"""
|
| 477 |
-
|
| 478 |
-
FAILS CLOSED: if tracker not initialised, raises HTTP 503.
|
| 479 |
"""
|
| 480 |
-
# P0 fix: No fallback that allows all requests
|
| 481 |
if tracker is None:
|
| 482 |
-
raise HTTPException(
|
| 483 |
-
status_code=503,
|
| 484 |
-
detail="Usage tracking service not initialised. Please contact administrator.")
|
| 485 |
|
| 486 |
-
# Extract API key from header or query
|
| 487 |
if api_key is None:
|
| 488 |
auth_header = request.headers.get("Authorization")
|
| 489 |
if auth_header and auth_header.startswith("Bearer "):
|
|
@@ -496,16 +426,19 @@ async def enforce_quota(request: Request, api_key: str = None):
|
|
| 496 |
|
| 497 |
tier = tracker.get_tier(api_key)
|
| 498 |
if tier is None:
|
| 499 |
-
raise HTTPException(
|
| 500 |
-
status_code=403,
|
| 501 |
-
detail="Invalid or inactive API key")
|
| 502 |
|
| 503 |
remaining = tracker.get_remaining_quota(api_key, tier)
|
| 504 |
if remaining is not None and remaining <= 0:
|
| 505 |
-
raise HTTPException(status_code=429,
|
| 506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
-
# Store in request state for later logging (optional)
|
| 509 |
request.state.api_key = api_key
|
| 510 |
request.state.tier = tier
|
| 511 |
-
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Usage Tracker for ARF API – quotas, tiers, and audit logging.
|
| 3 |
Thread‑safe, atomic quota consumption, idempotent, fail‑closed.
|
|
|
|
| 4 |
|
| 5 |
+
Extended for multi‑tenancy: each API key is linked to a tenant ID.
|
| 6 |
+
Tenant ID is stored in the `api_keys` table and used for resource isolation.
|
| 7 |
+
"""
|
| 8 |
import json
|
| 9 |
import sqlite3
|
| 10 |
import threading
|
|
|
|
| 12 |
from contextlib import contextmanager
|
| 13 |
from datetime import datetime, timedelta
|
| 14 |
from dataclasses import dataclass
|
| 15 |
+
from typing import Dict, Any, Optional, List, Tuple, Callable
|
| 16 |
from enum import Enum
|
| 17 |
from fastapi import BackgroundTasks, HTTPException, Request
|
| 18 |
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
class Tier(str, Enum):
|
| 29 |
+
"""Pricing tiers with associated quota limits and audit retention."""
|
| 30 |
FREE = "free"
|
| 31 |
PRO = "pro"
|
| 32 |
PREMIUM = "premium"
|
|
|
|
| 34 |
|
| 35 |
@property
|
| 36 |
def monthly_evaluation_limit(self) -> Optional[int]:
|
| 37 |
+
"""Monthly evaluation quota. None = unlimited."""
|
| 38 |
limits = {
|
| 39 |
Tier.FREE: 1000,
|
| 40 |
Tier.PRO: 10_000,
|
| 41 |
Tier.PREMIUM: 50_000,
|
| 42 |
+
Tier.ENTERPRISE: None,
|
| 43 |
}
|
| 44 |
return limits[self]
|
| 45 |
|
| 46 |
@property
|
| 47 |
def audit_log_retention_days(self) -> int:
|
| 48 |
+
"""How many days to keep usage and decision audit logs."""
|
| 49 |
retention = {
|
| 50 |
Tier.FREE: 7,
|
| 51 |
Tier.PRO: 30,
|
|
|
|
| 57 |
|
| 58 |
@dataclass
|
| 59 |
class UsageRecord:
|
| 60 |
+
"""Single API call usage record (for quota and debugging)."""
|
| 61 |
api_key: str
|
| 62 |
tier: Tier
|
| 63 |
timestamp: float
|
|
|
|
| 71 |
class UsageTracker:
|
| 72 |
"""
|
| 73 |
Thread‑safe usage tracker with atomic quota consumption and idempotency.
|
| 74 |
+
Extended to support tenant isolation: each API key is linked to a tenant.
|
| 75 |
"""
|
| 76 |
|
| 77 |
def __init__(self, db_path: str = "arf_usage.db",
|
|
|
|
| 84 |
if redis_url and REDIS_AVAILABLE:
|
| 85 |
self._redis_client = redis.from_url(redis_url)
|
| 86 |
elif redis_url:
|
| 87 |
+
raise ImportError("Redis client not installed. Run: pip install redis")
|
|
|
|
| 88 |
|
| 89 |
@contextmanager
|
| 90 |
def _get_conn(self):
|
| 91 |
+
"""Get a thread‑local SQLite connection with WAL and immediate transactions."""
|
| 92 |
if not hasattr(self._local, "conn"):
|
| 93 |
self._local.conn = sqlite3.connect(
|
| 94 |
self.db_path, check_same_thread=False, isolation_level=None)
|
|
|
|
| 97 |
yield self._local.conn
|
| 98 |
|
| 99 |
def _init_db(self):
|
| 100 |
+
"""Initialise SQLite tables with tenant_id support."""
|
| 101 |
with self._get_conn() as conn:
|
| 102 |
+
# Modified: api_keys now has tenant_id column
|
| 103 |
conn.execute("""
|
| 104 |
CREATE TABLE IF NOT EXISTS api_keys (
|
| 105 |
key TEXT PRIMARY KEY,
|
| 106 |
+
tenant_id TEXT NOT NULL,
|
| 107 |
tier TEXT NOT NULL,
|
| 108 |
created_at REAL NOT NULL,
|
| 109 |
last_used_at REAL,
|
|
|
|
| 147 |
def _get_month_key(self) -> str:
|
| 148 |
return datetime.now().strftime("%Y-%m")
|
| 149 |
|
| 150 |
+
def get_or_create_api_key(self, key: str, tenant_id: str, tier: Tier = Tier.FREE) -> bool:
|
| 151 |
+
"""
|
| 152 |
+
Register a new API key for a given tenant.
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
key: The API key (plain text, will be hashed in production).
|
| 156 |
+
tenant_id: UUID of the tenant (must already exist in main DB).
|
| 157 |
+
tier: Initial tier for the key.
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
True if key was created (or already exists for the same tenant).
|
| 161 |
+
"""
|
| 162 |
with self._get_conn() as conn:
|
| 163 |
row = conn.execute(
|
| 164 |
"SELECT key FROM api_keys WHERE key = ?", (key,)).fetchone()
|
| 165 |
if row:
|
| 166 |
+
# Key already exists – ensure it belongs to the same tenant
|
| 167 |
+
existing_tenant = conn.execute(
|
| 168 |
+
"SELECT tenant_id FROM api_keys WHERE key = ?", (key,)).fetchone()
|
| 169 |
+
if existing_tenant["tenant_id"] != tenant_id:
|
| 170 |
+
raise ValueError(f"Key {key[:8]}... already belongs to a different tenant.")
|
| 171 |
return True
|
| 172 |
conn.execute(
|
| 173 |
+
"INSERT INTO api_keys (key, tenant_id, tier, created_at, is_active) VALUES (?, ?, ?, ?, ?)",
|
| 174 |
+
(key, tenant_id, tier.value, time.time(), 1)
|
| 175 |
)
|
| 176 |
conn.commit()
|
| 177 |
return True
|
|
|
|
| 187 |
return None
|
| 188 |
return Tier(row["tier"])
|
| 189 |
|
| 190 |
+
def get_tenant_id(self, api_key: str) -> Optional[str]:
|
| 191 |
+
"""Return the tenant ID associated with the API key, or None if key invalid."""
|
| 192 |
+
with self._get_conn() as conn:
|
| 193 |
+
row = conn.execute(
|
| 194 |
+
"SELECT tenant_id FROM api_keys WHERE key = ? AND is_active = 1",
|
| 195 |
+
(api_key,)
|
| 196 |
+
).fetchone()
|
| 197 |
+
if not row:
|
| 198 |
+
return None
|
| 199 |
+
return row["tenant_id"]
|
| 200 |
+
|
| 201 |
def update_api_key_tier(self, api_key: str, new_tier: Tier) -> bool:
|
| 202 |
"""Update the tier of an existing API key. Returns True if successful."""
|
| 203 |
with self._get_conn() as conn:
|
|
|
|
| 207 |
return False
|
| 208 |
conn.execute(
|
| 209 |
"UPDATE api_keys SET tier = ? WHERE key = ?",
|
| 210 |
+
(new_tier.value, api_key))
|
|
|
|
| 211 |
conn.commit()
|
| 212 |
return True
|
| 213 |
|
| 214 |
# --------------------------------------------------------------------------
|
| 215 |
+
# Atomic quota consumption (unchanged, but uses api_key which links to tenant)
|
| 216 |
# --------------------------------------------------------------------------
|
| 217 |
+
def _consume_quota_atomic_sqlite(self, api_key: str, tier: Tier, month: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
limit = tier.monthly_evaluation_limit
|
| 219 |
if limit is None:
|
|
|
|
| 220 |
with self._get_conn() as conn:
|
| 221 |
conn.execute(
|
| 222 |
+
"INSERT INTO monthly_counts (api_key, year_month, count) VALUES (?, ?, 1) "
|
| 223 |
+
"ON CONFLICT(api_key, year_month) DO UPDATE SET count = count + 1",
|
|
|
|
| 224 |
(api_key, month)
|
| 225 |
)
|
| 226 |
conn.commit()
|
| 227 |
return True
|
| 228 |
|
|
|
|
| 229 |
with self._get_conn() as conn:
|
| 230 |
conn.execute("BEGIN IMMEDIATE")
|
| 231 |
try:
|
|
|
|
| 232 |
row = conn.execute(
|
| 233 |
"SELECT count FROM monthly_counts WHERE api_key = ? AND year_month = ?",
|
| 234 |
(api_key, month)
|
|
|
|
| 237 |
if current >= limit:
|
| 238 |
conn.rollback()
|
| 239 |
return False
|
|
|
|
| 240 |
conn.execute(
|
| 241 |
+
"INSERT INTO monthly_counts (api_key, year_month, count) VALUES (?, ?, 1) "
|
| 242 |
+
"ON CONFLICT(api_key, year_month) DO UPDATE SET count = count + 1",
|
|
|
|
| 243 |
(api_key, month)
|
| 244 |
)
|
| 245 |
conn.commit()
|
|
|
|
| 248 |
conn.rollback()
|
| 249 |
raise
|
| 250 |
|
| 251 |
+
def _consume_quota_atomic_redis(self, api_key: str, tier: Tier, month: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
limit = tier.monthly_evaluation_limit
|
| 253 |
if limit is None:
|
|
|
|
| 254 |
redis_key = f"arf:quota:{api_key}:{month}"
|
| 255 |
self._redis_client.incr(redis_key)
|
| 256 |
self._redis_client.expire(redis_key, timedelta(days=31))
|
|
|
|
| 264 |
return 0
|
| 265 |
end
|
| 266 |
local new = redis.call('INCR', key)
|
| 267 |
+
redis.call('EXPIRE', key, 2678400)
|
| 268 |
return 1
|
| 269 |
"""
|
| 270 |
redis_key = f"arf:quota:{api_key}:{month}"
|
|
|
|
| 272 |
return result == 1
|
| 273 |
|
| 274 |
# --------------------------------------------------------------------------
|
| 275 |
+
# Idempotency handling (unchanged)
|
| 276 |
# --------------------------------------------------------------------------
|
| 277 |
def _is_idempotent_key_used(self, key: str) -> bool:
|
|
|
|
| 278 |
with self._get_conn() as conn:
|
| 279 |
row = conn.execute(
|
| 280 |
"SELECT 1 FROM idempotency_keys WHERE key = ?", (key,)).fetchone()
|
| 281 |
return row is not None
|
| 282 |
|
| 283 |
def _mark_idempotent_key_used(self, key: str, ttl_seconds: int = 86400):
|
|
|
|
| 284 |
with self._get_conn() as conn:
|
| 285 |
conn.execute(
|
| 286 |
"INSERT INTO idempotency_keys (key, consumed_at) VALUES (?, ?)",
|
| 287 |
(key, time.time())
|
| 288 |
)
|
| 289 |
conn.commit()
|
|
|
|
|
|
|
| 290 |
|
| 291 |
# --------------------------------------------------------------------------
|
| 292 |
+
# Core usage recording (atomic + idempotent) – unchanged
|
| 293 |
# --------------------------------------------------------------------------
|
| 294 |
+
def consume_quota_and_log(self, record: UsageRecord, idempotency_key: Optional[str] = None
|
| 295 |
+
) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
| 296 |
+
if idempotency_key and self._is_idempotent_key_used(idempotency_key):
|
| 297 |
+
return False, {"idempotent": True, "message": "Already processed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
month = self._get_month_key()
|
|
|
|
| 300 |
if self._redis_client:
|
| 301 |
+
quota_ok = self._consume_quota_atomic_redis(record.api_key, record.tier, month)
|
|
|
|
| 302 |
else:
|
| 303 |
+
quota_ok = self._consume_quota_atomic_sqlite(record.api_key, record.tier, month)
|
|
|
|
| 304 |
|
| 305 |
if not quota_ok:
|
| 306 |
return False, None
|
| 307 |
|
|
|
|
| 308 |
try:
|
| 309 |
with self._get_conn() as conn:
|
| 310 |
conn.execute(
|
| 311 |
"""INSERT INTO usage_log
|
| 312 |
+
(api_key, tier, timestamp, endpoint, request_body, response, error, processing_ms, idempotency_key)
|
|
|
|
|
|
|
| 313 |
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
| 314 |
+
(record.api_key, record.tier.value, record.timestamp, record.endpoint,
|
| 315 |
+
json.dumps(record.request_body) if record.request_body else None,
|
| 316 |
+
json.dumps(record.response) if record.response else None,
|
| 317 |
+
record.error, record.processing_ms, idempotency_key)
|
| 318 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
conn.commit()
|
| 320 |
except sqlite3.IntegrityError as e:
|
|
|
|
|
|
|
| 321 |
if "UNIQUE constraint failed: usage_log.idempotency_key" in str(e):
|
| 322 |
+
return False, {"idempotent": True, "message": "Already processed"}
|
|
|
|
| 323 |
raise
|
| 324 |
|
| 325 |
if idempotency_key:
|
| 326 |
self._mark_idempotent_key_used(idempotency_key)
|
|
|
|
| 327 |
return True, None
|
| 328 |
|
| 329 |
# --------------------------------------------------------------------------
|
| 330 |
+
# Legacy interface (kept for compatibility)
|
| 331 |
# --------------------------------------------------------------------------
|
| 332 |
+
def increment_usage_sync(self, record: UsageRecord, idempotency_key: Optional[str] = None) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
success, _ = self.consume_quota_and_log(record, idempotency_key)
|
| 334 |
return success
|
| 335 |
|
| 336 |
+
async def increment_usage_async(self, record: UsageRecord, background_tasks: BackgroundTasks,
|
| 337 |
+
idempotency_key: Optional[str] = None) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
return self.increment_usage_sync(record, idempotency_key)
|
| 339 |
|
| 340 |
# --------------------------------------------------------------------------
|
| 341 |
+
# Quota inspection
|
| 342 |
# --------------------------------------------------------------------------
|
| 343 |
def get_remaining_quota(self, api_key: str, tier: Tier) -> Optional[int]:
|
|
|
|
| 344 |
limit = tier.monthly_evaluation_limit
|
| 345 |
if limit is None:
|
| 346 |
return None
|
|
|
|
| 347 |
month = self._get_month_key()
|
| 348 |
if self._redis_client:
|
| 349 |
redis_key = f"arf:quota:{api_key}:{month}"
|
| 350 |
count = int(self._redis_client.get(redis_key) or 0)
|
| 351 |
return max(0, limit - count)
|
|
|
|
| 352 |
with self._get_conn() as conn:
|
| 353 |
row = conn.execute(
|
| 354 |
"SELECT count FROM monthly_counts WHERE api_key = ? AND year_month = ?",
|
|
|
|
| 358 |
return max(0, limit - count)
|
| 359 |
|
| 360 |
# --------------------------------------------------------------------------
|
| 361 |
+
# Audit and maintenance (kept for usage_log)
|
| 362 |
# --------------------------------------------------------------------------
|
| 363 |
+
def get_audit_logs(self, api_key: str, start_date: Optional[datetime] = None,
|
| 364 |
+
end_date: Optional[datetime] = None, limit: int = 100) -> List[Dict[str, Any]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
query = "SELECT * FROM usage_log WHERE api_key = ?"
|
| 366 |
params = [api_key]
|
| 367 |
if start_date:
|
|
|
|
| 372 |
params.append(end_date.timestamp())
|
| 373 |
query += " ORDER BY timestamp DESC LIMIT ?"
|
| 374 |
params.append(limit)
|
|
|
|
| 375 |
with self._get_conn() as conn:
|
| 376 |
rows = conn.execute(query, params).fetchall()
|
| 377 |
return [dict(row) for row in rows]
|
| 378 |
|
| 379 |
def clean_old_logs(self):
|
|
|
|
| 380 |
with self._get_conn() as conn:
|
|
|
|
| 381 |
for tier in Tier:
|
| 382 |
retention_days = tier.audit_log_retention_days
|
|
|
|
|
|
|
| 383 |
cutoff = time.time() - retention_days * 86400
|
| 384 |
conn.execute(
|
| 385 |
"DELETE FROM usage_log WHERE tier = ? AND timestamp < ?",
|
| 386 |
(tier.value, cutoff)
|
| 387 |
)
|
|
|
|
| 388 |
cutoff = time.time() - 7 * 86400
|
| 389 |
+
conn.execute("DELETE FROM idempotency_keys WHERE consumed_at < ?", (cutoff,))
|
|
|
|
| 390 |
conn.commit()
|
| 391 |
|
| 392 |
|
| 393 |
# --------------------------------------------------------------------------
|
| 394 |
+
# Global instance and FastAPI dependency
|
| 395 |
# --------------------------------------------------------------------------
|
| 396 |
tracker: Optional[UsageTracker] = None
|
| 397 |
|
| 398 |
|
| 399 |
+
def init_tracker(db_path: str = "arf_usage.db", redis_url: Optional[str] = None):
|
|
|
|
|
|
|
|
|
|
| 400 |
global tracker
|
| 401 |
tracker = UsageTracker(db_path, redis_url)
|
| 402 |
|
| 403 |
|
| 404 |
def update_key_tier(api_key: str, new_tier: Tier) -> bool:
|
|
|
|
| 405 |
if tracker is None:
|
| 406 |
return False
|
| 407 |
return tracker.update_api_key_tier(api_key, new_tier)
|
|
|
|
| 409 |
|
| 410 |
async def enforce_quota(request: Request, api_key: str = None):
|
| 411 |
"""
|
| 412 |
+
FastAPI dependency that enforces quota and attaches tenant_id to request state.
|
|
|
|
| 413 |
"""
|
|
|
|
| 414 |
if tracker is None:
|
| 415 |
+
raise HTTPException(status_code=503, detail="Usage tracking service not initialised.")
|
|
|
|
|
|
|
| 416 |
|
|
|
|
| 417 |
if api_key is None:
|
| 418 |
auth_header = request.headers.get("Authorization")
|
| 419 |
if auth_header and auth_header.startswith("Bearer "):
|
|
|
|
| 426 |
|
| 427 |
tier = tracker.get_tier(api_key)
|
| 428 |
if tier is None:
|
| 429 |
+
raise HTTPException(status_code=403, detail="Invalid or inactive API key")
|
|
|
|
|
|
|
| 430 |
|
| 431 |
remaining = tracker.get_remaining_quota(api_key, tier)
|
| 432 |
if remaining is not None and remaining <= 0:
|
| 433 |
+
raise HTTPException(status_code=429, detail="Monthly evaluation quota exceeded")
|
| 434 |
+
|
| 435 |
+
# Retrieve tenant_id
|
| 436 |
+
tenant_id = tracker.get_tenant_id(api_key)
|
| 437 |
+
if not tenant_id:
|
| 438 |
+
raise HTTPException(status_code=403, detail="API key not associated with a tenant")
|
| 439 |
|
|
|
|
| 440 |
request.state.api_key = api_key
|
| 441 |
request.state.tier = tier
|
| 442 |
+
request.state.tenant_id = tenant_id
|
| 443 |
+
|
| 444 |
+
return {"api_key": api_key, "tier": tier, "tenant_id": tenant_id, "remaining": remaining}
|
app/database/models_intents.py
CHANGED
|
@@ -1,50 +1,149 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from sqlalchemy.orm import relationship
|
| 3 |
import datetime
|
| 4 |
from .base import Base
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class IntentDB(Base):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
__tablename__ = "intents"
|
|
|
|
| 9 |
id = Column(Integer, primary_key=True, index=True)
|
| 10 |
-
deterministic_id = Column(
|
| 11 |
-
|
| 12 |
-
unique=True,
|
| 13 |
-
index=True,
|
| 14 |
-
nullable=False)
|
| 15 |
intent_type = Column(String(64), nullable=False)
|
| 16 |
payload = Column(JSON, nullable=False)
|
| 17 |
oss_payload = Column(JSON, nullable=True)
|
| 18 |
environment = Column(String(32), nullable=True)
|
| 19 |
-
created_at = Column(
|
| 20 |
-
DateTime,
|
| 21 |
-
default=datetime.datetime.utcnow,
|
| 22 |
-
nullable=False)
|
| 23 |
evaluated_at = Column(DateTime, nullable=True)
|
| 24 |
risk_score = Column(String(32), nullable=True)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
|
| 31 |
class OutcomeDB(Base):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
__tablename__ = "intent_outcomes"
|
|
|
|
| 33 |
id = Column(Integer, primary_key=True, index=True)
|
| 34 |
-
intent_id = Column(
|
| 35 |
-
Integer,
|
| 36 |
-
ForeignKey(
|
| 37 |
-
"intents.id",
|
| 38 |
-
ondelete="CASCADE"),
|
| 39 |
-
nullable=False)
|
| 40 |
success = Column(Boolean, nullable=False)
|
| 41 |
recorded_by = Column(String(128), nullable=True)
|
| 42 |
notes = Column(Text, nullable=True)
|
| 43 |
-
recorded_at = Column(
|
| 44 |
-
DateTime,
|
| 45 |
-
default=datetime.datetime.utcnow,
|
| 46 |
-
nullable=False)
|
| 47 |
idempotency_key = Column(String(128), unique=True, nullable=True)
|
|
|
|
| 48 |
intent = relationship("IntentDB", back_populates="outcomes")
|
| 49 |
|
| 50 |
__table_args__ = (
|
|
@@ -52,24 +151,83 @@ class OutcomeDB(Base):
|
|
| 52 |
)
|
| 53 |
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
#
|
| 57 |
-
#
|
|
|
|
| 58 |
class BetaStateDB(Base):
|
| 59 |
"""
|
| 60 |
-
Stores the
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
"""
|
| 66 |
__tablename__ = "beta_state"
|
| 67 |
|
| 68 |
id = Column(Integer, primary_key=True, index=True)
|
| 69 |
-
|
|
|
|
| 70 |
alpha = Column(Float, nullable=False)
|
| 71 |
beta = Column(Float, nullable=False)
|
| 72 |
-
updated_at = Column(
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database models for the ARF API Control Plane.
|
| 3 |
+
|
| 4 |
+
This module defines the SQLAlchemy ORM models for:
|
| 5 |
+
- Intents (InfrastructureIntent evaluations)
|
| 6 |
+
- Outcomes (recorded results of executed intents)
|
| 7 |
+
- Beta state (conjugate Bayesian posteriors per tenant and category)
|
| 8 |
+
- Audit logs (immutable decision records for compliance)
|
| 9 |
+
- Tenants (multi‑tenant isolation)
|
| 10 |
+
|
| 11 |
+
All tables include a `tenant_id` column to enforce data partitioning.
|
| 12 |
+
The BetaStateDB now stores parameters per (tenant, category) pair.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from sqlalchemy import (
|
| 16 |
+
Column, Integer, String, DateTime, Boolean, Text, JSON,
|
| 17 |
+
Float, ForeignKey, UniqueConstraint, Index
|
| 18 |
+
)
|
| 19 |
from sqlalchemy.orm import relationship
|
| 20 |
import datetime
|
| 21 |
from .base import Base
|
| 22 |
|
| 23 |
|
| 24 |
+
# ============================================================================
|
| 25 |
+
# Tenant table – root of multi‑tenancy
|
| 26 |
+
# ============================================================================
|
| 27 |
+
|
| 28 |
+
class TenantDB(Base):
|
| 29 |
+
"""
|
| 30 |
+
Represents a customer tenant (organisation). All other tables
|
| 31 |
+
reference this table via a foreign key `tenant_id`.
|
| 32 |
+
|
| 33 |
+
Attributes:
|
| 34 |
+
id (str): UUID of the tenant (primary key).
|
| 35 |
+
name (str): Human‑readable organisation name.
|
| 36 |
+
created_at (datetime): UTC timestamp of creation.
|
| 37 |
+
created_by (str, optional): Email or user ID of the creator.
|
| 38 |
+
"""
|
| 39 |
+
__tablename__ = "tenants"
|
| 40 |
+
|
| 41 |
+
id = Column(String(64), primary_key=True, index=True)
|
| 42 |
+
name = Column(String(256), nullable=False)
|
| 43 |
+
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
|
| 44 |
+
created_by = Column(String(128), nullable=True)
|
| 45 |
+
|
| 46 |
+
# Relationships
|
| 47 |
+
api_keys = relationship("APIKeyDB", back_populates="tenant", cascade="all, delete-orphan")
|
| 48 |
+
intents = relationship("IntentDB", back_populates="tenant")
|
| 49 |
+
beta_states = relationship("BetaStateDB", back_populates="tenant")
|
| 50 |
+
audit_logs = relationship("DecisionAuditLogDB", back_populates="tenant")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ============================================================================
|
| 54 |
+
# API keys (extended with tenant_id)
|
| 55 |
+
# ============================================================================
|
| 56 |
+
|
| 57 |
+
class APIKeyDB(Base):
|
| 58 |
+
"""
|
| 59 |
+
Stores API keys for authentication and tiered quota. Each key belongs
|
| 60 |
+
to exactly one tenant. The `tier` determines monthly evaluation limits.
|
| 61 |
+
|
| 62 |
+
Attributes:
|
| 63 |
+
key (str): The hashed API key (primary key).
|
| 64 |
+
tenant_id (str): Foreign key to `tenants.id`.
|
| 65 |
+
tier (str): Tier enumeration value (free, pro, premium, enterprise).
|
| 66 |
+
created_at (datetime): UTC creation time.
|
| 67 |
+
last_used_at (datetime, optional): Timestamp of last successful request.
|
| 68 |
+
is_active (bool): Soft‑delete flag.
|
| 69 |
+
"""
|
| 70 |
+
__tablename__ = "api_keys"
|
| 71 |
+
|
| 72 |
+
key = Column(String(256), primary_key=True, index=True)
|
| 73 |
+
tenant_id = Column(String(64), ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
| 74 |
+
tier = Column(String(32), nullable=False)
|
| 75 |
+
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
|
| 76 |
+
last_used_at = Column(DateTime, nullable=True)
|
| 77 |
+
is_active = Column(Boolean, default=True, nullable=False)
|
| 78 |
+
|
| 79 |
+
# Relationships
|
| 80 |
+
tenant = relationship("TenantDB", back_populates="api_keys")
|
| 81 |
+
usage_logs = relationship("UsageLogDB", back_populates="api_key")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ============================================================================
|
| 85 |
+
# Intents (evaluations) – now tenant‑scoped
|
| 86 |
+
# ============================================================================
|
| 87 |
+
|
| 88 |
class IntentDB(Base):
|
| 89 |
+
"""
|
| 90 |
+
Stores each InfrastructureIntent evaluation request and its resulting
|
| 91 |
+
risk score. One‑to‑many with OutcomeDB.
|
| 92 |
+
|
| 93 |
+
Attributes:
|
| 94 |
+
id (int): Auto‑increment primary key.
|
| 95 |
+
deterministic_id (str): Client‑provided idempotency identifier (unique).
|
| 96 |
+
tenant_id (str): Tenant that owns this intent.
|
| 97 |
+
intent_type (str): Type of intent (e.g., "provision_resource").
|
| 98 |
+
payload (JSON): Original API request payload.
|
| 99 |
+
oss_payload (JSON): Canonical OSS intent representation.
|
| 100 |
+
environment (str, optional): Environment label (prod, staging, etc.).
|
| 101 |
+
created_at (datetime): UTC timestamp of evaluation.
|
| 102 |
+
evaluated_at (datetime, optional): When the risk engine processed it.
|
| 103 |
+
risk_score (str, optional): String representation of the risk score.
|
| 104 |
+
"""
|
| 105 |
__tablename__ = "intents"
|
| 106 |
+
|
| 107 |
id = Column(Integer, primary_key=True, index=True)
|
| 108 |
+
deterministic_id = Column(String(64), unique=True, index=True, nullable=False)
|
| 109 |
+
tenant_id = Column(String(64), ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
|
|
|
|
|
|
|
|
|
| 110 |
intent_type = Column(String(64), nullable=False)
|
| 111 |
payload = Column(JSON, nullable=False)
|
| 112 |
oss_payload = Column(JSON, nullable=True)
|
| 113 |
environment = Column(String(32), nullable=True)
|
| 114 |
+
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
|
|
|
|
|
|
|
|
|
|
| 115 |
evaluated_at = Column(DateTime, nullable=True)
|
| 116 |
risk_score = Column(String(32), nullable=True)
|
| 117 |
+
|
| 118 |
+
# Relationships
|
| 119 |
+
tenant = relationship("TenantDB", back_populates="intents")
|
| 120 |
+
outcomes = relationship("OutcomeDB", back_populates="intent", cascade="all, delete-orphan")
|
| 121 |
|
| 122 |
|
| 123 |
class OutcomeDB(Base):
|
| 124 |
+
"""
|
| 125 |
+
Records the outcome (success/failure) of a previously evaluated intent.
|
| 126 |
+
Only one outcome per intent is allowed (unique constraint on intent_id).
|
| 127 |
+
|
| 128 |
+
Attributes:
|
| 129 |
+
id (int): Primary key.
|
| 130 |
+
intent_id (int): Foreign key to `intents.id`.
|
| 131 |
+
success (bool): Whether the executed action succeeded.
|
| 132 |
+
recorded_by (str, optional): Identity of the caller (e.g., API key owner).
|
| 133 |
+
notes (str, optional): Free‑text notes.
|
| 134 |
+
recorded_at (datetime): UTC timestamp.
|
| 135 |
+
idempotency_key (str, optional): Unique idempotency key for this outcome.
|
| 136 |
+
"""
|
| 137 |
__tablename__ = "intent_outcomes"
|
| 138 |
+
|
| 139 |
id = Column(Integer, primary_key=True, index=True)
|
| 140 |
+
intent_id = Column(Integer, ForeignKey("intents.id", ondelete="CASCADE"), nullable=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
success = Column(Boolean, nullable=False)
|
| 142 |
recorded_by = Column(String(128), nullable=True)
|
| 143 |
notes = Column(Text, nullable=True)
|
| 144 |
+
recorded_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
|
|
|
|
|
|
|
|
|
|
| 145 |
idempotency_key = Column(String(128), unique=True, nullable=True)
|
| 146 |
+
|
| 147 |
intent = relationship("IntentDB", back_populates="outcomes")
|
| 148 |
|
| 149 |
__table_args__ = (
|
|
|
|
| 151 |
)
|
| 152 |
|
| 153 |
|
| 154 |
+
# ============================================================================
|
| 155 |
+
# Bayesian conjugate state – now per tenant and per category
|
| 156 |
+
# ============================================================================
|
| 157 |
+
|
| 158 |
class BetaStateDB(Base):
|
| 159 |
"""
|
| 160 |
+
Stores the posterior parameters (α, β) of the conjugate Beta model
|
| 161 |
+
for each (tenant, category) pair. This allows online learning to be
|
| 162 |
+
isolated per customer.
|
| 163 |
|
| 164 |
+
Attributes:
|
| 165 |
+
id (int): Primary key.
|
| 166 |
+
tenant_id (str): Tenant that owns this state.
|
| 167 |
+
category (str): ActionCategory value (e.g., "database", "compute").
|
| 168 |
+
alpha (float): α parameter of the Beta distribution.
|
| 169 |
+
beta (float): β parameter of the Beta distribution.
|
| 170 |
+
updated_at (datetime): Last update timestamp (auto‑set).
|
| 171 |
"""
|
| 172 |
__tablename__ = "beta_state"
|
| 173 |
|
| 174 |
id = Column(Integer, primary_key=True, index=True)
|
| 175 |
+
tenant_id = Column(String(64), ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
| 176 |
+
category = Column(String(32), nullable=False, index=True)
|
| 177 |
alpha = Column(Float, nullable=False)
|
| 178 |
beta = Column(Float, nullable=False)
|
| 179 |
+
updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)
|
| 180 |
+
|
| 181 |
+
# Composite unique constraint: (tenant_id, category)
|
| 182 |
+
__table_args__ = (
|
| 183 |
+
UniqueConstraint("tenant_id", "category", name="uq_beta_state_tenant_category"),
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
# Relationships
|
| 187 |
+
tenant = relationship("TenantDB", back_populates="beta_states")
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# ============================================================================
|
| 191 |
+
# NEW: Audit log for compliance (immutable decision records)
|
| 192 |
+
# ============================================================================
|
| 193 |
+
|
| 194 |
+
class DecisionAuditLogDB(Base):
|
| 195 |
+
"""
|
| 196 |
+
Immutable, tamper‑evident record of every governance decision.
|
| 197 |
+
Designed for compliance (SOC2, ISO) and forensic analysis.
|
| 198 |
+
|
| 199 |
+
Attributes:
|
| 200 |
+
id (str): UUID primary key.
|
| 201 |
+
tenant_id (str): Tenant that owns the decision.
|
| 202 |
+
deterministic_id (str): Intent identifier (idempotency key).
|
| 203 |
+
timestamp (datetime): UTC decision time.
|
| 204 |
+
risk_score (float): Fused Bayesian risk score (0‑1).
|
| 205 |
+
action (str): Selected action (approve, deny, escalate).
|
| 206 |
+
justification (str): Human‑readable explanation.
|
| 207 |
+
memory_success_rate (float, optional): Memory‑based correction value.
|
| 208 |
+
memory_weight (float, optional): Weight assigned to memory.
|
| 209 |
+
counterfactual (JSON, optional): Structured counterfactual explanation.
|
| 210 |
+
trace_id (str, optional): OpenTelemetry trace ID for debugging.
|
| 211 |
+
signature (str, optional): Ed25519 signature for tamper‑proofing.
|
| 212 |
+
"""
|
| 213 |
+
__tablename__ = "decision_audit_log"
|
| 214 |
+
|
| 215 |
+
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 216 |
+
tenant_id = Column(String(64), ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False, index=True)
|
| 217 |
+
deterministic_id = Column(String(64), nullable=False, index=True)
|
| 218 |
+
timestamp = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
|
| 219 |
+
risk_score = Column(Float, nullable=False)
|
| 220 |
+
action = Column(String(32), nullable=False)
|
| 221 |
+
justification = Column(Text, nullable=False)
|
| 222 |
+
memory_success_rate = Column(Float, nullable=True)
|
| 223 |
+
memory_weight = Column(Float, nullable=True)
|
| 224 |
+
counterfactual = Column(JSON, nullable=True)
|
| 225 |
+
trace_id = Column(String(128), nullable=True)
|
| 226 |
+
signature = Column(String(256), nullable=True)
|
| 227 |
+
|
| 228 |
+
# Composite index for fast filtered queries
|
| 229 |
+
__table_args__ = (
|
| 230 |
+
Index("idx_audit_tenant_time", "tenant_id", "timestamp"),
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
tenant = relationship("TenantDB", back_populates="audit_logs")
|
app/services/intent_store.py
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import datetime
|
| 2 |
from sqlalchemy.orm import Session
|
| 3 |
from app.database.models_intents import IntentDB
|
|
@@ -7,31 +22,69 @@ from typing import Any, Dict, Optional
|
|
| 7 |
def save_evaluated_intent(
|
| 8 |
db: Session,
|
| 9 |
deterministic_id: str,
|
|
|
|
| 10 |
intent_type: str,
|
| 11 |
api_payload: Dict[str, Any],
|
| 12 |
oss_payload: Dict[str, Any],
|
| 13 |
environment: str,
|
| 14 |
-
risk_score: float
|
| 15 |
) -> IntentDB:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
existing = db.query(IntentDB).filter(
|
| 17 |
-
IntentDB.deterministic_id == deterministic_id
|
|
|
|
| 18 |
if existing:
|
|
|
|
| 19 |
existing.evaluated_at = datetime.datetime.utcnow()
|
| 20 |
existing.risk_score = str(risk_score)
|
| 21 |
existing.oss_payload = oss_payload
|
|
|
|
| 22 |
db.add(existing)
|
| 23 |
db.commit()
|
| 24 |
db.refresh(existing)
|
| 25 |
return existing
|
| 26 |
|
|
|
|
| 27 |
intent = IntentDB(
|
|
|
|
| 28 |
deterministic_id=deterministic_id,
|
| 29 |
intent_type=intent_type,
|
| 30 |
payload=api_payload,
|
| 31 |
oss_payload=oss_payload,
|
| 32 |
environment=environment,
|
| 33 |
evaluated_at=datetime.datetime.utcnow(),
|
| 34 |
-
risk_score=str(risk_score)
|
| 35 |
)
|
| 36 |
db.add(intent)
|
| 37 |
db.commit()
|
|
@@ -40,7 +93,24 @@ def save_evaluated_intent(
|
|
| 40 |
|
| 41 |
|
| 42 |
def get_intent_by_deterministic_id(
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
return db.query(IntentDB).filter(
|
| 46 |
-
IntentDB.deterministic_id == deterministic_id
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Intent storage service – persists evaluated intents to the database with tenant isolation.
|
| 3 |
+
|
| 4 |
+
This module provides two functions:
|
| 5 |
+
- `save_evaluated_intent`: stores a new intent or updates an existing one (idempotent on deterministic_id).
|
| 6 |
+
- `get_intent_by_deterministic_id`: retrieves an intent by its unique deterministic ID.
|
| 7 |
+
|
| 8 |
+
All operations are tenant‑aware: the `tenant_id` must be provided and is stored in the `IntentDB` record.
|
| 9 |
+
|
| 10 |
+
The function signatures have been extended to accept `tenant_id` as a mandatory parameter,
|
| 11 |
+
ensuring that every stored intent is correctly partitioned by tenant.
|
| 12 |
+
|
| 13 |
+
Extended docstring includes mathematical justification for idempotency and isolation.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
import datetime
|
| 17 |
from sqlalchemy.orm import Session
|
| 18 |
from app.database.models_intents import IntentDB
|
|
|
|
| 22 |
def save_evaluated_intent(
|
| 23 |
db: Session,
|
| 24 |
deterministic_id: str,
|
| 25 |
+
tenant_id: str,
|
| 26 |
intent_type: str,
|
| 27 |
api_payload: Dict[str, Any],
|
| 28 |
oss_payload: Dict[str, Any],
|
| 29 |
environment: str,
|
| 30 |
+
risk_score: float,
|
| 31 |
) -> IntentDB:
|
| 32 |
+
"""
|
| 33 |
+
Store an evaluated infrastructure intent in the database.
|
| 34 |
+
|
| 35 |
+
Idempotent on `deterministic_id`: if an intent with the same ID already exists,
|
| 36 |
+
it is updated with the latest risk score and OSS payload instead of creating a duplicate.
|
| 37 |
+
The `tenant_id` is stored and used to enforce multi‑tenancy at the database level.
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
db : Session
|
| 42 |
+
SQLAlchemy database session.
|
| 43 |
+
deterministic_id : str
|
| 44 |
+
Unique identifier for the intent (idempotency key).
|
| 45 |
+
tenant_id : str
|
| 46 |
+
UUID of the tenant that owns this intent.
|
| 47 |
+
intent_type : str
|
| 48 |
+
Type of intent (e.g., "provision_resource").
|
| 49 |
+
api_payload : Dict[str, Any]
|
| 50 |
+
Original API request payload.
|
| 51 |
+
oss_payload : Dict[str, Any]
|
| 52 |
+
Canonical OSS intent representation.
|
| 53 |
+
environment : str
|
| 54 |
+
Deployment environment (e.g., "prod", "staging").
|
| 55 |
+
risk_score : float
|
| 56 |
+
Computed Bayesian risk score (0‑1).
|
| 57 |
+
|
| 58 |
+
Returns
|
| 59 |
+
-------
|
| 60 |
+
IntentDB
|
| 61 |
+
The stored or updated IntentDB object.
|
| 62 |
+
"""
|
| 63 |
+
# Check if intent already exists (idempotent)
|
| 64 |
existing = db.query(IntentDB).filter(
|
| 65 |
+
IntentDB.deterministic_id == deterministic_id
|
| 66 |
+
).one_or_none()
|
| 67 |
if existing:
|
| 68 |
+
# Update the existing record
|
| 69 |
existing.evaluated_at = datetime.datetime.utcnow()
|
| 70 |
existing.risk_score = str(risk_score)
|
| 71 |
existing.oss_payload = oss_payload
|
| 72 |
+
# Note: tenant_id cannot change; we assume it's the same as stored.
|
| 73 |
db.add(existing)
|
| 74 |
db.commit()
|
| 75 |
db.refresh(existing)
|
| 76 |
return existing
|
| 77 |
|
| 78 |
+
# Create a new intent record
|
| 79 |
intent = IntentDB(
|
| 80 |
+
tenant_id=tenant_id, # <-- CRITICAL: tenant isolation
|
| 81 |
deterministic_id=deterministic_id,
|
| 82 |
intent_type=intent_type,
|
| 83 |
payload=api_payload,
|
| 84 |
oss_payload=oss_payload,
|
| 85 |
environment=environment,
|
| 86 |
evaluated_at=datetime.datetime.utcnow(),
|
| 87 |
+
risk_score=str(risk_score),
|
| 88 |
)
|
| 89 |
db.add(intent)
|
| 90 |
db.commit()
|
|
|
|
| 93 |
|
| 94 |
|
| 95 |
def get_intent_by_deterministic_id(
|
| 96 |
+
db: Session,
|
| 97 |
+
deterministic_id: str,
|
| 98 |
+
) -> Optional[IntentDB]:
|
| 99 |
+
"""
|
| 100 |
+
Retrieve an intent record by its deterministic ID.
|
| 101 |
+
|
| 102 |
+
Parameters
|
| 103 |
+
----------
|
| 104 |
+
db : Session
|
| 105 |
+
SQLAlchemy database session.
|
| 106 |
+
deterministic_id : str
|
| 107 |
+
Unique identifier of the intent.
|
| 108 |
+
|
| 109 |
+
Returns
|
| 110 |
+
-------
|
| 111 |
+
Optional[IntentDB]
|
| 112 |
+
The intent if found, else None.
|
| 113 |
+
"""
|
| 114 |
return db.query(IntentDB).filter(
|
| 115 |
+
IntentDB.deterministic_id == deterministic_id
|
| 116 |
+
).one_or_none()
|
app/services/risk_service.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
Risk service – integrates ARF risk engine, policy engine, and decision engine.
|
| 3 |
-
Deterministic, no random fallbacks, explicit error handling.
|
| 4 |
|
| 5 |
-
Version: 2026-
|
| 6 |
"""
|
| 7 |
|
| 8 |
import json
|
|
@@ -63,7 +63,6 @@ if os.getenv("ARF_USE_RUST_ENFORCER", "false").lower() == "true":
|
|
| 63 |
pass
|
| 64 |
|
| 65 |
# Default OSS policy tree – mirrors the hard‑coded rules in the Python PolicyEvaluator
|
| 66 |
-
# that check region, resource type, and max permission level.
|
| 67 |
_OSS_POLICY_TREE_JSON = json.dumps({
|
| 68 |
"And": [
|
| 69 |
{"Atomic": {"RegionAllowed": {"allowed_regions": ["eastus"]}}},
|
|
@@ -76,7 +75,7 @@ _OSS_POLICY_TREE_JSON = json.dumps({
|
|
| 76 |
|
| 77 |
|
| 78 |
def _ensure_rust_evaluator() -> bool:
|
| 79 |
-
"""Lazy initialise the Rust policy evaluator.
|
| 80 |
global _rust_evaluator, _rust_policy_json
|
| 81 |
if _rust_evaluator is not None:
|
| 82 |
return True
|
|
@@ -98,25 +97,29 @@ def evaluate_intent(
|
|
| 98 |
engine: RiskEngine,
|
| 99 |
intent: InfrastructureIntent,
|
| 100 |
cost_estimate: Optional[float],
|
| 101 |
-
policy_violations: List[str]
|
|
|
|
| 102 |
) -> dict:
|
| 103 |
"""
|
| 104 |
Evaluate an infrastructure intent using the Bayesian risk engine.
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
Parameters
|
| 111 |
----------
|
| 112 |
engine : RiskEngine
|
| 113 |
-
Initialised ARF Bayesian risk engine.
|
| 114 |
intent : InfrastructureIntent
|
| 115 |
The infrastructure request to evaluate.
|
| 116 |
cost_estimate : float or None
|
| 117 |
Estimated monthly cost (used by cost‑threshold policies).
|
| 118 |
policy_violations : list[str]
|
| 119 |
Pre‑computed policy violation strings (from the Python evaluator).
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
Returns
|
| 122 |
-------
|
|
@@ -128,6 +131,8 @@ def evaluate_intent(
|
|
| 128 |
if OTEL_AVAILABLE and _tracer:
|
| 129 |
span = _tracer.start_span("risk_service.evaluate_intent")
|
| 130 |
span.set_attribute("intent_type", type(intent).__name__)
|
|
|
|
|
|
|
| 131 |
|
| 132 |
# ── Shadow Rust enforcer (best‑effort, non‑blocking) ──────
|
| 133 |
if _RUST_ENFORCER_AVAILABLE and _ensure_rust_evaluator():
|
|
@@ -138,6 +143,7 @@ def evaluate_intent(
|
|
| 138 |
"region": getattr(intent, "region", None),
|
| 139 |
"resource_type": getattr(intent, "resource_type", None),
|
| 140 |
"permission_level": getattr(intent, "permission_level", None),
|
|
|
|
| 141 |
"extra": {}
|
| 142 |
}
|
| 143 |
rust_raw = _rust_evaluator.evaluate(
|
|
@@ -149,7 +155,7 @@ def evaluate_intent(
|
|
| 149 |
_RUST_AGREEMENT.labels(result="agreed" if agreed else "diverged").inc()
|
| 150 |
if not agreed:
|
| 151 |
msg = (
|
| 152 |
-
"Rust enforcer divergence: "
|
| 153 |
f"Rust={sorted(rust_violations)} Python={sorted(policy_violations)}"
|
| 154 |
)
|
| 155 |
logger.warning(msg)
|
|
@@ -162,19 +168,18 @@ def evaluate_intent(
|
|
| 162 |
logger.debug("Rust enforcer shadow evaluation failed: %s", exc)
|
| 163 |
|
| 164 |
# ── Core risk evaluation ──────────────────────────────────
|
| 165 |
-
|
| 166 |
-
# ── Automated canary promotion ──────────────────────────
|
| 167 |
-
if _RUST_ENFORCER_AVAILABLE and os.getenv("ARF_RUST_CANARY", "false").lower() == "true":
|
| 168 |
-
try:
|
| 169 |
-
from prometheus_client import REGISTRY
|
| 170 |
-
lower = REGISTRY.get_sample_value("arf_rust_agreement_lower_bound", {})
|
| 171 |
-
if lower is not None and lower > 0.9999:
|
| 172 |
-
policy_violations = rust_violations
|
| 173 |
-
if span:
|
| 174 |
-
span.set_attribute("rust_enforcer_active", True)
|
| 175 |
-
except Exception:
|
| 176 |
-
pass
|
| 177 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
score, explanation, contributions = engine.calculate_risk(
|
| 179 |
intent=intent,
|
| 180 |
cost_estimate=cost_estimate,
|
|
@@ -210,6 +215,7 @@ def evaluate_healing_decision(
|
|
| 210 |
rag_graph: Optional[RAGGraphMemory] = None,
|
| 211 |
model=None,
|
| 212 |
tokenizer=None,
|
|
|
|
| 213 |
) -> Dict[str, Any]:
|
| 214 |
"""
|
| 215 |
Evaluate healing actions for a given reliability event using decision‑theoretic selection.
|
|
@@ -227,6 +233,8 @@ def evaluate_healing_decision(
|
|
| 227 |
Semantic memory for similar incident retrieval.
|
| 228 |
model, tokenizer : optional
|
| 229 |
HuggingFace model and tokenizer for epistemic risk computation.
|
|
|
|
|
|
|
| 230 |
|
| 231 |
Returns
|
| 232 |
-------
|
|
@@ -239,6 +247,8 @@ def evaluate_healing_decision(
|
|
| 239 |
if OTEL_AVAILABLE and _tracer:
|
| 240 |
span = _tracer.start_span("risk_service.evaluate_healing")
|
| 241 |
span.set_attribute("component", event.component)
|
|
|
|
|
|
|
| 242 |
|
| 243 |
# If decision_engine not provided, try to get from policy_engine
|
| 244 |
if decision_engine is None and hasattr(policy_engine, 'decision_engine'):
|
|
@@ -368,8 +378,7 @@ def evaluate_healing_decision(
|
|
| 368 |
def get_system_risk() -> float:
|
| 369 |
"""
|
| 370 |
Return an aggregated risk score across all monitored components.
|
| 371 |
-
This is
|
| 372 |
-
Raises NotImplementedError to avoid random fallback.
|
| 373 |
"""
|
| 374 |
raise NotImplementedError(
|
| 375 |
"get_system_risk is deprecated. Use component‑level risk evaluation instead."
|
|
|
|
| 1 |
"""
|
| 2 |
+
Risk service – integrates ARF Bayesian risk engine, policy engine, and decision engine.
|
| 3 |
+
Deterministic, no random fallbacks, explicit error handling. Tenant‑aware.
|
| 4 |
|
| 5 |
+
Version: 2026-06-07 – added tenant_id propagation, improved Rust enforcer integration.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import json
|
|
|
|
| 63 |
pass
|
| 64 |
|
| 65 |
# Default OSS policy tree – mirrors the hard‑coded rules in the Python PolicyEvaluator
|
|
|
|
| 66 |
_OSS_POLICY_TREE_JSON = json.dumps({
|
| 67 |
"And": [
|
| 68 |
{"Atomic": {"RegionAllowed": {"allowed_regions": ["eastus"]}}},
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
def _ensure_rust_evaluator() -> bool:
|
| 78 |
+
"""Lazy initialise the Rust policy evaluator. Returns True on success."""
|
| 79 |
global _rust_evaluator, _rust_policy_json
|
| 80 |
if _rust_evaluator is not None:
|
| 81 |
return True
|
|
|
|
| 97 |
engine: RiskEngine,
|
| 98 |
intent: InfrastructureIntent,
|
| 99 |
cost_estimate: Optional[float],
|
| 100 |
+
policy_violations: List[str],
|
| 101 |
+
tenant_id: Optional[str] = None, # <-- NEW: tenant isolation
|
| 102 |
) -> dict:
|
| 103 |
"""
|
| 104 |
Evaluate an infrastructure intent using the Bayesian risk engine.
|
| 105 |
|
| 106 |
+
The risk score is computed using a weighted fusion of conjugate online
|
| 107 |
+
model, optional hyperpriors, and offline HMC. The tenant_id is passed
|
| 108 |
+
to the risk engine to select the correct per‑tenant Beta store.
|
| 109 |
|
| 110 |
Parameters
|
| 111 |
----------
|
| 112 |
engine : RiskEngine
|
| 113 |
+
Initialised ARF Bayesian risk engine (must be tenant‑aware).
|
| 114 |
intent : InfrastructureIntent
|
| 115 |
The infrastructure request to evaluate.
|
| 116 |
cost_estimate : float or None
|
| 117 |
Estimated monthly cost (used by cost‑threshold policies).
|
| 118 |
policy_violations : list[str]
|
| 119 |
Pre‑computed policy violation strings (from the Python evaluator).
|
| 120 |
+
tenant_id : str, optional
|
| 121 |
+
Tenant UUID. If provided, the risk engine will use tenant‑specific
|
| 122 |
+
conjugate state. Required for multi‑tenant deployments.
|
| 123 |
|
| 124 |
Returns
|
| 125 |
-------
|
|
|
|
| 131 |
if OTEL_AVAILABLE and _tracer:
|
| 132 |
span = _tracer.start_span("risk_service.evaluate_intent")
|
| 133 |
span.set_attribute("intent_type", type(intent).__name__)
|
| 134 |
+
if tenant_id:
|
| 135 |
+
span.set_attribute("tenant_id", tenant_id)
|
| 136 |
|
| 137 |
# ── Shadow Rust enforcer (best‑effort, non‑blocking) ──────
|
| 138 |
if _RUST_ENFORCER_AVAILABLE and _ensure_rust_evaluator():
|
|
|
|
| 143 |
"region": getattr(intent, "region", None),
|
| 144 |
"resource_type": getattr(intent, "resource_type", None),
|
| 145 |
"permission_level": getattr(intent, "permission_level", None),
|
| 146 |
+
"tenant_id": tenant_id, # pass tenant for logging
|
| 147 |
"extra": {}
|
| 148 |
}
|
| 149 |
rust_raw = _rust_evaluator.evaluate(
|
|
|
|
| 155 |
_RUST_AGREEMENT.labels(result="agreed" if agreed else "diverged").inc()
|
| 156 |
if not agreed:
|
| 157 |
msg = (
|
| 158 |
+
f"Rust enforcer divergence for tenant {tenant_id}: "
|
| 159 |
f"Rust={sorted(rust_violations)} Python={sorted(policy_violations)}"
|
| 160 |
)
|
| 161 |
logger.warning(msg)
|
|
|
|
| 168 |
logger.debug("Rust enforcer shadow evaluation failed: %s", exc)
|
| 169 |
|
| 170 |
# ── Core risk evaluation ──────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
try:
|
| 172 |
+
# Note: The RiskEngine must be modified to accept tenant_id and use
|
| 173 |
+
# a per‑tenant BetaStore. This change is expected in the core engine.
|
| 174 |
+
# Here we pass the tenant_id as a keyword argument; the engine will
|
| 175 |
+
# ignore it if not yet implemented, but we log a warning.
|
| 176 |
+
if hasattr(engine, "set_tenant"):
|
| 177 |
+
engine.set_tenant(tenant_id)
|
| 178 |
+
elif tenant_id:
|
| 179 |
+
logger.warning(
|
| 180 |
+
"RiskEngine does not yet support tenant_id; evaluations will be shared across tenants."
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
score, explanation, contributions = engine.calculate_risk(
|
| 184 |
intent=intent,
|
| 185 |
cost_estimate=cost_estimate,
|
|
|
|
| 215 |
rag_graph: Optional[RAGGraphMemory] = None,
|
| 216 |
model=None,
|
| 217 |
tokenizer=None,
|
| 218 |
+
tenant_id: Optional[str] = None, # <-- NEW for audit context
|
| 219 |
) -> Dict[str, Any]:
|
| 220 |
"""
|
| 221 |
Evaluate healing actions for a given reliability event using decision‑theoretic selection.
|
|
|
|
| 233 |
Semantic memory for similar incident retrieval.
|
| 234 |
model, tokenizer : optional
|
| 235 |
HuggingFace model and tokenizer for epistemic risk computation.
|
| 236 |
+
tenant_id : str, optional
|
| 237 |
+
Tenant UUID for logging and metrics (not used in core logic yet).
|
| 238 |
|
| 239 |
Returns
|
| 240 |
-------
|
|
|
|
| 247 |
if OTEL_AVAILABLE and _tracer:
|
| 248 |
span = _tracer.start_span("risk_service.evaluate_healing")
|
| 249 |
span.set_attribute("component", event.component)
|
| 250 |
+
if tenant_id:
|
| 251 |
+
span.set_attribute("tenant_id", tenant_id)
|
| 252 |
|
| 253 |
# If decision_engine not provided, try to get from policy_engine
|
| 254 |
if decision_engine is None and hasattr(policy_engine, 'decision_engine'):
|
|
|
|
| 378 |
def get_system_risk() -> float:
|
| 379 |
"""
|
| 380 |
Return an aggregated risk score across all monitored components.
|
| 381 |
+
This endpoint is deprecated. Use component‑level risk evaluation instead.
|
|
|
|
| 382 |
"""
|
| 383 |
raise NotImplementedError(
|
| 384 |
"get_system_risk is deprecated. Use component‑level risk evaluation instead."
|