This commit is contained in:
nearlynithin
2025-11-07 14:25:27 +05:30
42 changed files with 2380 additions and 649 deletions

19
.gitignore vendored
View File

@@ -11,7 +11,7 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
@@ -48,9 +48,11 @@ Thumbs.db
# Streamlit
.streamlit/secrets.toml
# Reports
# Reports and generated files
reports/*.json
reports/*.pdf
reports/*.csv
reports/*.html
# Logs
*.log
@@ -59,4 +61,17 @@ reports/*.pdf
*.csv
!Datasets/loan_data.csv
# Node.js & Next.js
node_modules/
.next/
out/
.vercel
*.tsbuildinfo
next-env.d.ts
# Frontend build artifacts
frontend/nordic-privacy-ai/.next/
frontend/nordic-privacy-ai/out/
frontend/nordic-privacy-ai/node_modules/
Data

1296
README.md

File diff suppressed because it is too large Load Diff

View File

@@ -21,6 +21,7 @@ class DataProcessor:
self.categorical_features = []
self.feature_names = []
self.encoders = {}
self.target_encoder = None # Add target encoder
self.scaler = StandardScaler()
self.X_train = None
@@ -75,6 +76,13 @@ class DataProcessor:
X = self.df[feature_cols].copy()
y = self.df[self.target_column].copy()
# Encode target variable if it's categorical
if y.dtype == 'object' or y.dtype.name == 'category':
self.target_encoder = LabelEncoder()
y_encoded = self.target_encoder.fit_transform(y)
y = pd.Series(y_encoded, index=y.index)
print(f"Target '{self.target_column}' encoded: {dict(enumerate(self.target_encoder.classes_))}")
# Encode categorical variables
for col in self.categorical_features:
if col in X.columns:

1
api/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Empty __init__.py files for Python package structure

72
api/main.py Normal file
View File

@@ -0,0 +1,72 @@
"""
FastAPI Backend for Nordic Privacy AI
Provides endpoints for AI Governance analysis and data cleaning
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
import os
from api.routers import analyze, clean
# Create FastAPI app
app = FastAPI(
title="Nordic Privacy AI API",
description="AI-powered GDPR compliance, bias detection, and risk analysis",
version="1.0.0"
)
# CORS configuration for Next.js frontend
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000", # Next.js dev server
"http://127.0.0.1:3000",
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount reports directory for file downloads
reports_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "reports")
os.makedirs(reports_dir, exist_ok=True)
app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
# Include routers
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
@app.get("/")
async def root():
"""Health check endpoint"""
return {
"status": "online",
"service": "Nordic Privacy AI API",
"version": "1.0.0",
"endpoints": {
"analyze": "/api/analyze",
"clean": "/api/clean",
"docs": "/docs"
}
}
@app.get("/health")
async def health_check():
"""Detailed health check"""
try:
import torch
cuda_available = torch.cuda.is_available()
gpu_name = torch.cuda.get_device_name(0) if cuda_available else None
except:
cuda_available = False
gpu_name = None
return {
"status": "healthy",
"gpu_acceleration": {
"available": cuda_available,
"device": gpu_name or "CPU"
}
}

1
api/routers/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Empty __init__.py files for Python package structure

141
api/routers/analyze.py Normal file
View File

@@ -0,0 +1,141 @@
"""
AI Governance Analysis Router
Handles bias detection and risk analysis endpoints
"""
from fastapi import APIRouter, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import pandas as pd
import numpy as np
import io
import os
import json
from datetime import datetime
from typing import Dict, Any
# Import AI Governance modules
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from ai_governance import AIGovernanceAnalyzer
router = APIRouter()
def convert_to_serializable(obj):
"""Convert numpy/pandas types to native Python types for JSON serialization"""
if isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
elif isinstance(obj, (np.floating, np.float64, np.float32)):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, dict):
return {key: convert_to_serializable(value) for key, value in obj.items()}
elif isinstance(obj, list):
return [convert_to_serializable(item) for item in obj]
return obj
@router.post("/analyze")
async def analyze_dataset(file: UploadFile = File(...)):
"""
Analyze uploaded dataset for bias and risk
- **file**: CSV file to analyze
Returns:
- Analysis results (bias metrics, risk assessment)
- Report file path for download
"""
# Validate file type
if not file.filename.endswith('.csv'):
raise HTTPException(status_code=400, detail="Only CSV files are supported")
try:
# Read uploaded file
contents = await file.read()
df = pd.read_csv(io.BytesIO(contents))
if df.empty:
raise HTTPException(status_code=400, detail="Uploaded file is empty")
# Initialize AI Governance Analyzer
analyzer = AIGovernanceAnalyzer()
# Auto-detect target column and protected attributes
# Target: Last column (common convention) or first binary/categorical column
target_column = df.columns[-1]
# Protected attributes: Common sensitive columns
protected_keywords = ['gender', 'age', 'race', 'sex', 'ethnicity', 'religion', 'nationality']
protected_attributes = [col for col in df.columns
if any(keyword in col.lower() for keyword in protected_keywords)]
# If no protected attributes found, use first few categorical columns
if not protected_attributes:
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
protected_attributes = [col for col in categorical_cols if col != target_column][:3]
print(f"Analyzing dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
print(f"Target column: {target_column}")
print(f"Protected attributes: {protected_attributes}")
# Run analysis
report = analyzer.analyze_dataframe(df, target_column, protected_attributes)
# Generate report filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_filename = (file.filename or "dataset").replace('.csv', '')
report_filename = f"governance_report_{safe_filename}_{timestamp}.json"
report_path = os.path.join("reports", report_filename)
# Save full report to disk
full_report_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
report_path
)
analyzer.save_report(report, full_report_path)
# Prepare response with summary
response_data = {
"status": "success",
"filename": file.filename,
"dataset_info": {
"rows": len(df),
"columns": len(df.columns),
"features": list(df.columns)
},
"model_performance": {
"accuracy": report.get("model_metrics", {}).get("accuracy", 0),
"precision": report.get("model_metrics", {}).get("precision", 0),
"recall": report.get("model_metrics", {}).get("recall", 0),
"f1_score": report.get("model_metrics", {}).get("f1_score", 0)
},
"bias_metrics": {
"overall_bias_score": report.get("bias_metrics", {}).get("overall_bias_score", 0),
"disparate_impact": report.get("bias_metrics", {}).get("disparate_impact", {}),
"statistical_parity": report.get("bias_metrics", {}).get("statistical_parity_difference", {}),
"violations_detected": report.get("bias_metrics", {}).get("fairness_violations", [])
},
"risk_assessment": {
"overall_risk_score": report.get("risk_metrics", {}).get("overall_risk_score", 0),
"privacy_risks": report.get("risk_metrics", {}).get("privacy_risks", []),
"ethical_risks": report.get("risk_metrics", {}).get("ethical_risks", []),
"compliance_risks": report.get("risk_metrics", {}).get("compliance_risks", []),
"data_quality_risks": report.get("risk_metrics", {}).get("data_quality_risks", [])
},
"recommendations": report.get("recommendations", []),
"report_file": f"/{report_path}",
"timestamp": datetime.now().isoformat()
}
# Convert all numpy/pandas types to native Python types
response_data = convert_to_serializable(response_data)
return JSONResponse(content=response_data)
except pd.errors.EmptyDataError:
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
except Exception as e:
print(f"Error during analysis: {str(e)}")
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")

142
api/routers/clean.py Normal file
View File

@@ -0,0 +1,142 @@
"""
Data Cleaning Router
Handles PII detection and anonymization endpoints
"""
from fastapi import APIRouter, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import pandas as pd
import numpy as np
import io
import os
from datetime import datetime
from typing import Dict, Any
# Import cleaning module
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from data_cleaning import DataCleaner
router = APIRouter()
def convert_to_serializable(obj):
"""Convert numpy/pandas types to native Python types for JSON serialization"""
if isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
elif isinstance(obj, (np.floating, np.float64, np.float32)):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, dict):
return {key: convert_to_serializable(value) for key, value in obj.items()}
elif isinstance(obj, list):
return [convert_to_serializable(item) for item in obj]
return obj
@router.post("/clean")
async def clean_dataset(file: UploadFile = File(...)):
"""
Clean uploaded dataset - detect and anonymize PII
- **file**: CSV file to clean
Returns:
- Cleaned dataset statistics
- PII detections and anonymization actions
- Report file path for download
- Cleaned CSV file path for download
"""
# Validate file type
if not file.filename.endswith('.csv'):
raise HTTPException(status_code=400, detail="Only CSV files are supported")
try:
# Read uploaded file
contents = await file.read()
df = pd.read_csv(io.BytesIO(contents))
if df.empty:
raise HTTPException(status_code=400, detail="Uploaded file is empty")
# Initialize Data Cleaner (with GPU if available)
print(f"Cleaning dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
cleaner = DataCleaner(df, use_gpu=True)
# Run cleaning (non-interactive mode for API)
cleaned_df, audit_report = cleaner.clean(
risky_features=None, # Auto-detect
interactive=False, # No user prompts in API mode
scan_all_cells=True
)
# Generate filenames
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_filename = file.filename.replace('.csv', '')
# Save cleaned CSV
cleaned_csv_filename = f"cleaned_{safe_filename}_{timestamp}.csv"
cleaned_csv_path = os.path.join("reports", cleaned_csv_filename)
full_cleaned_csv_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
cleaned_csv_path
)
cleaner.save_cleaned_data(cleaned_df, full_cleaned_csv_path)
# Save audit report
audit_report_filename = f"cleaning_audit_{safe_filename}_{timestamp}.json"
audit_report_path = os.path.join("reports", audit_report_filename)
full_audit_report_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
audit_report_path
)
cleaner.save_audit_report(audit_report, full_audit_report_path)
# Prepare response
response_data = {
"status": "success",
"filename": file.filename,
"dataset_info": {
"original_rows": int(audit_report["metadata"]["original_rows"]),
"original_columns": int(audit_report["metadata"]["original_columns"]),
"cleaned_rows": int(audit_report["metadata"]["cleaned_rows"]),
"cleaned_columns": int(audit_report["metadata"]["cleaned_columns"])
},
"gpu_acceleration": audit_report["metadata"].get("gpu_acceleration", {
"enabled": False,
"device": "CPU"
}),
"summary": {
"columns_removed": audit_report["summary"]["columns_removed"],
"columns_anonymized": audit_report["summary"]["columns_anonymized"],
"total_cells_affected": int(audit_report["summary"]["total_cells_affected"])
},
"pii_detections": {
col: {
"action": details["action"],
"entity_types": details["entity_types_found"],
"num_affected_rows": int(details.get("num_affected_rows", 0)),
"examples": details.get("examples", [])[:2] # Show 2 examples
}
for col, details in audit_report["details"].items()
},
"gdpr_compliance": audit_report["compliance"]["gdpr_articles_applied"],
"files": {
"cleaned_csv": f"/{cleaned_csv_path}",
"audit_report": f"/{audit_report_path}"
},
"timestamp": datetime.now().isoformat()
}
# Convert all numpy/pandas types to native Python types
response_data = convert_to_serializable(response_data)
return JSONResponse(content=response_data)
except pd.errors.EmptyDataError:
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
except Exception as e:
print(f"Error during cleaning: {str(e)}")
raise HTTPException(status_code=500, detail=f"Cleaning failed: {str(e)}")

1
api/utils/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Empty __init__.py files for Python package structure

12
data_cleaning/__init__.py Normal file
View File

@@ -0,0 +1,12 @@
"""
Data Cleaning Module
Automated PII detection and GDPR-compliant anonymization
"""
from .cleaner import DataCleaner
__version__ = '1.0.0'
__all__ = [
'DataCleaner'
]

View File

@@ -14,7 +14,7 @@ export default function TryPage() {
<div className="flex flex-1 min-h-0">
<Sidebar value={tab} onChange={setTab} />
<div className="flex-1 min-h-0 flex">
<div className="flex-1 min-h-0"><CenterPanel tab={tab} /></div>
<div className="flex-1 min-h-0 min-w-0"><CenterPanel tab={tab} onAnalyze={() => setTab("bias-analysis")} /></div>
<div className="w-[360px] hidden xl:block"><ChatbotPanel /></div>
</div>
</div>

View File

@@ -0,0 +1,620 @@
"use client";
import { TryTab } from "./Sidebar";
import { useState, useRef, useCallback, useEffect } from "react";
import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb";
import { analyzeDataset, cleanDataset, getReportUrl, type AnalyzeResponse, type CleanResponse } from "../../lib/api";
interface CenterPanelProps {
tab: TryTab;
onAnalyze?: () => void;
}
interface UploadedFileMeta {
name: string;
size: number;
type: string;
contentPreview: string;
}
interface TablePreviewData {
headers: string[];
rows: string[][];
origin: 'csv';
}
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
const [isDragging, setIsDragging] = useState(false);
const [progress, setProgress] = useState<number>(0);
const [progressLabel, setProgressLabel] = useState<string>("Processing");
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
const inputRef = useRef<HTMLInputElement | null>(null);
const [loadedFromCache, setLoadedFromCache] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [error, setError] = useState<string | null>(null);
// Analysis results
const [analyzeResult, setAnalyzeResult] = useState<AnalyzeResponse | null>(null);
const [cleanResult, setCleanResult] = useState<CleanResponse | null>(null);
const reset = () => {
setFileMeta(null);
setUploadedFile(null);
setProgress(0);
setProgressLabel("Processing");
setTablePreview(null);
setError(null);
};
// Handle API calls
const handleAnalyze = async () => {
if (!uploadedFile) {
setError("No file uploaded");
return;
}
setIsProcessing(true);
setError(null);
setProgressLabel("Analyzing dataset...");
try {
const result = await analyzeDataset(uploadedFile);
setAnalyzeResult(result);
setProgressLabel("Analysis complete!");
onAnalyze?.(); // Navigate to bias-analysis tab
} catch (err: any) {
setError(err.message || "Analysis failed");
} finally {
setIsProcessing(false);
}
};
const handleClean = async () => {
if (!uploadedFile) {
setError("No file uploaded");
return;
}
setIsProcessing(true);
setError(null);
setProgressLabel("Cleaning dataset...");
try {
const result = await cleanDataset(uploadedFile);
setCleanResult(result);
setProgressLabel("Cleaning complete!");
} catch (err: any) {
setError(err.message || "Cleaning failed");
} finally {
setIsProcessing(false);
}
}; function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
if (lines.length < 2) return null;
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
if (commaDensity < 2) return null;
const parseLine = (line: string) => {
const out: string[] = [];
let cur = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (ch === '"') {
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
} else if (ch === ',' && !inQuotes) {
out.push(cur);
cur = '';
} else { cur += ch; }
}
out.push(cur);
return out.map(c => c.trim());
};
const raw = lines.slice(0, maxRows).map(parseLine);
if (raw.length === 0) return null;
const headers = raw[0];
const colCount = Math.min(headers.length, maxCols);
const rows = raw.slice(1).map(r => r.slice(0, colCount));
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
}
// We no longer build table preview for JSON; revert JSON to raw text view.
const processFile = useCallback(async (f: File) => {
if (!f) return;
const isCSV = /\.csv$/i.test(f.name);
setProgress(0);
setUploadedFile(f); // Save the file for API calls
// For large files, show a progress bar while reading the file stream (no preview)
if (f.size > 1024 * 1024) {
setProgressLabel("Uploading");
const metaObj: UploadedFileMeta = {
name: f.name,
size: f.size,
type: f.type || "unknown",
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
};
setFileMeta(metaObj);
setTablePreview(null);
// Save to IndexedDB immediately so it persists without needing full read
(async () => {
try { await saveLatestUpload(f, metaObj); } catch {}
})();
// Read head slice for partial preview & possible CSV table extraction
try {
const headBlob = f.slice(0, PREVIEW_BYTES);
const headReader = new FileReader();
headReader.onload = async () => {
try {
const buf = headReader.result as ArrayBuffer;
const decoder = new TextDecoder();
const text = decoder.decode(buf);
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
if (isCSV) {
const parsed = tryParseCSV(text);
setTablePreview(parsed);
} else {
setTablePreview(null);
}
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
} catch { /* ignore */ }
};
headReader.readAsArrayBuffer(headBlob);
} catch { /* ignore */ }
// Use streaming read for progress without buffering entire file in memory
try {
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
if (stream && typeof stream.getReader === "function") {
const reader = stream.getReader();
let loaded = 0;
const total = f.size || 1;
for (;;) {
const { done, value } = await reader.read();
if (done) break;
loaded += value ? value.length : 0;
const pct = Math.min(100, Math.round((loaded / total) * 100));
setProgress(pct);
}
setProgress(100);
} else {
// Fallback to FileReader progress events
const reader = new FileReader();
reader.onprogress = (evt) => {
if (evt.lengthComputable) {
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
setProgress(pct);
} else {
setProgress((p) => (p < 90 ? p + 5 : p));
}
};
reader.onloadend = () => setProgress(100);
reader.onerror = () => setProgress(0);
reader.readAsArrayBuffer(f);
}
} catch {
setProgress(100);
}
return;
}
const reader = new FileReader();
reader.onprogress = (evt) => {
if (evt.lengthComputable) {
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
setProgress(pct);
} else {
setProgress((p) => (p < 90 ? p + 5 : p));
}
};
reader.onload = async () => {
try {
const buf = reader.result as ArrayBuffer;
const decoder = new TextDecoder();
const text = decoder.decode(buf);
const metaObj: UploadedFileMeta = {
name: f.name,
size: f.size,
type: f.type || "unknown",
contentPreview: text.slice(0, 4000),
};
setFileMeta(metaObj);
if (isCSV) {
const parsed = tryParseCSV(text);
setTablePreview(parsed);
} else {
setTablePreview(null);
}
// Save file blob and meta to browser cache (IndexedDB)
try {
await saveLatestUpload(f, metaObj);
} catch {}
setProgressLabel("Processing");
setProgress(100);
} catch (e) {
const metaObj: UploadedFileMeta = {
name: f.name,
size: f.size,
type: f.type || "unknown",
contentPreview: "Unable to decode preview.",
};
setFileMeta(metaObj);
setTablePreview(null);
try {
await saveLatestUpload(f, metaObj);
} catch {}
setProgressLabel("Processing");
setProgress(100);
}
};
reader.onerror = () => {
setProgress(0);
};
reader.readAsArrayBuffer(f);
}, []);
function handleFileChange(e: React.ChangeEvent<HTMLInputElement>) {
const f = e.target.files?.[0];
processFile(f as File);
}
const onDragOver = (e: React.DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const onDragLeave = () => setIsDragging(false);
const onDrop = (e: React.DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const f = e.dataTransfer.files?.[0];
processFile(f as File);
};
// Load last cached upload on mount (processing tab only)
useEffect(() => {
let ignore = false;
if (tab !== "processing") return;
(async () => {
try {
const { file, meta } = await getLatestUpload();
if (!ignore && meta) {
setFileMeta(meta as UploadedFileMeta);
if (file) {
setUploadedFile(file);
}
setLoadedFromCache(true);
}
} catch {}
})();
return () => {
ignore = true;
};
}, [tab]); function renderTabContent() {
switch (tab) {
case "processing":
return (
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
<div className="flex flex-col gap-3 min-w-0">
<div
onDragOver={onDragOver}
onDragLeave={onDragLeave}
onDrop={onDrop}
className={
"rounded-lg border-2 border-dashed p-6 text-center transition-colors " +
(isDragging ? "border-brand-600 bg-brand-50" : "border-slate-300 hover:border-brand-300")
}
>
<p className="text-sm text-slate-600">Drag & drop a CSV / JSON / TXT here, or click to browse.</p>
<div className="mt-3">
<button
type="button"
onClick={() => inputRef.current?.click()}
className="inline-flex items-center rounded-md bg-brand-600 px-4 py-2 text-white text-sm font-medium shadow hover:bg-brand-500"
>
Choose file
</button>
</div>
</div>
<input
ref={inputRef}
type="file"
accept=".csv,.json,.txt"
onChange={handleFileChange}
className="hidden"
aria-hidden
/>
{progress > 0 && (
<div className="w-full">
<div className="h-2 w-full rounded-full bg-slate-200 overflow-hidden">
<div
className="h-2 bg-brand-600 transition-all"
style={{ width: `${progress}%` }}
/>
</div>
<div className="mt-1 text-xs text-slate-500">{progressLabel} {progress}%</div>
</div>
)}
{fileMeta && (
<div className="rounded-md border border-slate-200 p-4 bg-white shadow-sm">
<div className="flex items-center justify-between mb-2">
<div className="text-sm font-medium">{fileMeta.name}</div>
<div className="text-xs text-slate-500">{Math.round(fileMeta.size / 1024)} KB</div>
</div>
{loadedFromCache && (
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
)}
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
{/* Table preview when structured data detected; otherwise show text */}
{tablePreview && tablePreview.origin === 'csv' ? (
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
<table className="min-w-full text-xs">
<thead className="sticky top-0 bg-slate-100">
<tr>
{tablePreview.headers.map((h, idx) => (
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
))}
</tr>
</thead>
<tbody>
{tablePreview.rows.map((r, i) => (
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
{r.map((c, j) => (
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
))}
</tr>
))}
</tbody>
</table>
</div>
) : (
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
{fileMeta.contentPreview || "(no preview)"}
</pre>
)}
{error && (
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-md text-sm text-red-700">
{error}
</div>
)}
{analyzeResult && (
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
Analysis complete! View results in tabs.
<a
href={getReportUrl(analyzeResult.report_file)}
target="_blank"
rel="noopener noreferrer"
className="ml-2 underline"
>
Download Report
</a>
</div>
)}
{cleanResult && (
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
Cleaning complete! {cleanResult.summary.total_cells_affected} cells anonymized.
<div className="mt-2 flex gap-2">
<a
href={getReportUrl(cleanResult.files.cleaned_csv)}
download
className="underline"
>
Download Cleaned CSV
</a>
<a
href={getReportUrl(cleanResult.files.audit_report)}
target="_blank"
rel="noopener noreferrer"
className="underline"
>
View Audit Report
</a>
</div>
</div>
)}
<div className="mt-3 flex justify-end gap-2">
<button
type="button"
onClick={async () => {
reset();
try { await deleteLatestUpload(); } catch {}
setLoadedFromCache(false);
setAnalyzeResult(null);
setCleanResult(null);
}}
className="text-xs rounded-md border px-3 py-1.5 hover:bg-slate-50"
>
Clear
</button>
<button
type="button"
onClick={handleClean}
disabled={isProcessing}
className="text-xs rounded-md bg-green-600 text-white px-3 py-1.5 hover:bg-green-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isProcessing ? "Processing..." : "Clean (PII)"}
</button>
<button
type="button"
onClick={handleAnalyze}
disabled={isProcessing}
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isProcessing ? "Processing..." : "Analyze"}
</button>
</div>
</div>
)}
</div>
</div>
);
case "bias-analysis":
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">Bias Analysis</h2>
{analyzeResult ? (
<div className="space-y-4">
<div className="grid grid-cols-2 gap-4">
<div className="p-4 bg-white rounded-lg border">
<div className="text-sm text-slate-600">Overall Bias Score</div>
<div className="text-2xl font-bold">{(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
</div>
<div className="p-4 bg-white rounded-lg border">
<div className="text-sm text-slate-600">Violations Detected</div>
<div className="text-2xl font-bold">{analyzeResult.bias_metrics.violations_detected.length}</div>
</div>
</div>
<div className="p-4 bg-white rounded-lg border">
<h3 className="font-semibold mb-2">Model Performance</h3>
<div className="grid grid-cols-4 gap-2 text-sm">
<div>
<div className="text-slate-600">Accuracy</div>
<div className="font-medium">{(analyzeResult.model_performance.accuracy * 100).toFixed(1)}%</div>
</div>
<div>
<div className="text-slate-600">Precision</div>
<div className="font-medium">{(analyzeResult.model_performance.precision * 100).toFixed(1)}%</div>
</div>
<div>
<div className="text-slate-600">Recall</div>
<div className="font-medium">{(analyzeResult.model_performance.recall * 100).toFixed(1)}%</div>
</div>
<div>
<div className="text-slate-600">F1 Score</div>
<div className="font-medium">{(analyzeResult.model_performance.f1_score * 100).toFixed(1)}%</div>
</div>
</div>
</div>
</div>
) : (
<p className="text-sm text-slate-600">Upload and analyze a dataset to see bias metrics.</p>
)}
</div>
);
case "risk-analysis":
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">Risk Analysis</h2>
{analyzeResult ? (
<div className="space-y-4">
<div className="p-4 bg-white rounded-lg border">
<div className="text-sm text-slate-600">Overall Risk Score</div>
<div className="text-2xl font-bold">{(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
</div>
{cleanResult && (
<div className="p-4 bg-white rounded-lg border">
<h3 className="font-semibold mb-2">PII Detection Results</h3>
<div className="text-sm space-y-1">
<div>Cells Anonymized: <span className="font-medium">{cleanResult.summary.total_cells_affected}</span></div>
<div>Columns Removed: <span className="font-medium">{cleanResult.summary.columns_removed.length}</span></div>
<div>Columns Anonymized: <span className="font-medium">{cleanResult.summary.columns_anonymized.length}</span></div>
</div>
</div>
)}
</div>
) : (
<p className="text-sm text-slate-600">Upload and analyze a dataset to see risk assessment.</p>
)}
</div>
);
case "bias-risk-mitigation":
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">Mitigation Suggestions</h2>
{analyzeResult && analyzeResult.recommendations.length > 0 ? (
<div className="space-y-2">
{analyzeResult.recommendations.map((rec, i) => (
<div key={i} className="p-3 bg-blue-50 border border-blue-200 rounded-md text-sm">
{rec}
</div>
))}
</div>
) : (
<p className="text-sm text-slate-600">
Recommendations will appear here after analysis.
</p>
)}
</div>
);
case "results":
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">Results Summary</h2>
{(analyzeResult || cleanResult) ? (
<div className="space-y-4">
{analyzeResult && (
<div className="p-4 bg-white rounded-lg border">
<h3 className="font-semibold mb-2">Analysis Results</h3>
<div className="text-sm space-y-1">
<div>Dataset: {analyzeResult.filename}</div>
<div>Rows: {analyzeResult.dataset_info.rows}</div>
<div>Columns: {analyzeResult.dataset_info.columns}</div>
<div>Bias Score: {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
<div>Risk Score: {(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
</div>
<a
href={getReportUrl(analyzeResult.report_file)}
target="_blank"
rel="noopener noreferrer"
className="mt-3 inline-block text-sm text-brand-600 underline"
>
Download Full Report
</a>
</div>
)}
{cleanResult && (
<div className="p-4 bg-white rounded-lg border">
<h3 className="font-semibold mb-2">Cleaning Results</h3>
<div className="text-sm space-y-1">
<div>Original: {cleanResult.dataset_info.original_rows} rows × {cleanResult.dataset_info.original_columns} cols</div>
<div>Cleaned: {cleanResult.dataset_info.cleaned_rows} rows × {cleanResult.dataset_info.cleaned_columns} cols</div>
<div>Cells Anonymized: {cleanResult.summary.total_cells_affected}</div>
<div>Columns Removed: {cleanResult.summary.columns_removed.length}</div>
<div>GDPR Compliant: {cleanResult.gdpr_compliance.length} articles applied</div>
</div>
<div className="mt-3 flex gap-2">
<a
href={getReportUrl(cleanResult.files.cleaned_csv)}
download
className="text-sm text-brand-600 underline"
>
Download Cleaned CSV
</a>
<a
href={getReportUrl(cleanResult.files.audit_report)}
target="_blank"
rel="noopener noreferrer"
className="text-sm text-brand-600 underline"
>
View Audit Report
</a>
</div>
</div>
)}
</div>
) : (
<p className="text-sm text-slate-600">
Process a dataset to see aggregated results.
</p>
)}
</div>
);
default:
return null;
}
}
return (
<div className="h-full overflow-y-auto p-6 bg-white/60">
{renderTabContent()}
</div>
);
}

View File

@@ -4,6 +4,7 @@ import { useState, useRef, useCallback, useEffect } from "react";
interface CenterPanelProps {
tab: TryTab;
onAnalyze?: () => void;
}
interface UploadedFileMeta {
@@ -13,11 +14,19 @@ interface UploadedFileMeta {
contentPreview: string;
}
export function CenterPanel({ tab }: CenterPanelProps) {
interface TablePreviewData {
headers: string[];
rows: string[][];
origin: 'csv';
}
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
const [isDragging, setIsDragging] = useState(false);
const [progress, setProgress] = useState<number>(0);
const [progressLabel, setProgressLabel] = useState<string>("Processing");
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
const inputRef = useRef<HTMLInputElement | null>(null);
const [loadedFromCache, setLoadedFromCache] = useState(false);
@@ -25,10 +34,43 @@ export function CenterPanel({ tab }: CenterPanelProps) {
setFileMeta(null);
setProgress(0);
setProgressLabel("Processing");
setTablePreview(null);
};
function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
if (lines.length < 2) return null;
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
if (commaDensity < 2) return null;
const parseLine = (line: string) => {
const out: string[] = [];
let cur = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (ch === '"') {
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
} else if (ch === ',' && !inQuotes) {
out.push(cur);
cur = '';
} else { cur += ch; }
}
out.push(cur);
return out.map(c => c.trim());
};
const raw = lines.slice(0, maxRows).map(parseLine);
if (raw.length === 0) return null;
const headers = raw[0];
const colCount = Math.min(headers.length, maxCols);
const rows = raw.slice(1).map(r => r.slice(0, colCount));
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
}
// We no longer build table preview for JSON; revert JSON to raw text view.
const processFile = useCallback(async (f: File) => {
if (!f) return;
const isCSV = /\.csv$/i.test(f.name);
setProgress(0);
// For large files, show a progress bar while reading the file stream (no preview)
if (f.size > 1024 * 1024) {
@@ -37,13 +79,35 @@ export function CenterPanel({ tab }: CenterPanelProps) {
name: f.name,
size: f.size,
type: f.type || "unknown",
contentPreview: "File too large for preview (limit 1MB).",
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
};
setFileMeta(metaObj);
setTablePreview(null);
// Save to IndexedDB immediately so it persists without needing full read
(async () => {
try { await saveLatestUpload(f, metaObj); } catch {}
})();
// Read head slice for partial preview & possible CSV table extraction
try {
const headBlob = f.slice(0, PREVIEW_BYTES);
const headReader = new FileReader();
headReader.onload = async () => {
try {
const buf = headReader.result as ArrayBuffer;
const decoder = new TextDecoder();
const text = decoder.decode(buf);
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
if (isCSV) {
const parsed = tryParseCSV(text);
setTablePreview(parsed);
} else {
setTablePreview(null);
}
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
} catch { /* ignore */ }
};
headReader.readAsArrayBuffer(headBlob);
} catch { /* ignore */ }
// Use streaming read for progress without buffering entire file in memory
try {
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
@@ -100,6 +164,12 @@ export function CenterPanel({ tab }: CenterPanelProps) {
contentPreview: text.slice(0, 4000),
};
setFileMeta(metaObj);
if (isCSV) {
const parsed = tryParseCSV(text);
setTablePreview(parsed);
} else {
setTablePreview(null);
}
// Save file blob and meta to browser cache (IndexedDB)
try {
await saveLatestUpload(f, metaObj);
@@ -114,6 +184,7 @@ export function CenterPanel({ tab }: CenterPanelProps) {
contentPreview: "Unable to decode preview.",
};
setFileMeta(metaObj);
setTablePreview(null);
try {
await saveLatestUpload(f, metaObj);
} catch {}
@@ -163,13 +234,13 @@ export function CenterPanel({ tab }: CenterPanelProps) {
}, [tab]);
function renderTabContent() {
switch (tab) {
switch (tab) {
case "processing":
return (
<div className="space-y-4">
return (
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
<div className="flex flex-col gap-3">
<div className="flex flex-col gap-3 min-w-0">
<div
onDragOver={onDragOver}
onDragLeave={onDragLeave}
@@ -219,10 +290,34 @@ export function CenterPanel({ tab }: CenterPanelProps) {
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
)}
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
{fileMeta.contentPreview || "(no preview)"}
</pre>
<div className="mt-3 flex justify-end">
{/* Table preview when structured data detected; otherwise show text */}
{tablePreview && tablePreview.origin === 'csv' ? (
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
<table className="min-w-full text-xs">
<thead className="sticky top-0 bg-slate-100">
<tr>
{tablePreview.headers.map((h, idx) => (
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
))}
</tr>
</thead>
<tbody>
{tablePreview.rows.map((r, i) => (
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
{r.map((c, j) => (
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
))}
</tr>
))}
</tbody>
</table>
</div>
) : (
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
{fileMeta.contentPreview || "(no preview)"}
</pre>
)}
<div className="mt-3 flex justify-end gap-2">
<button
type="button"
onClick={async () => {
@@ -234,6 +329,13 @@ export function CenterPanel({ tab }: CenterPanelProps) {
>
Clear
</button>
<button
type="button"
onClick={() => onAnalyze?.()}
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500"
>
Analyze
</button>
</div>
</div>
)}

View File

@@ -1,28 +0,0 @@
# Nordic Privacy AI
AI-Powered GDPR compliance & personal data protection platform tailored for Nordic ecosystems (BankID, MitID, Suomi.fi).
## Tech Stack
- Next.js (App Router, TypeScript)
- Tailwind CSS
## Getting Started
```powershell
npm install
npm run dev
```
Visit http://localhost:3000 to view the landing page.
## Scripts
- `npm run dev` Start dev server
- `npm run build` Production build
- `npm start` Run built app
- `npm run lint` ESLint
## Next Steps
- Implement /try page workflow
- Add feature sections & agent explanations
- Integrate backend services for data upload & scanning
## License
Internal hackathon prototype

View File

@@ -0,0 +1,113 @@
/**
* IndexedDB utilities for persisting file uploads in the browser.
* Stores the latest uploaded file and its metadata for recovery across sessions.
*/
const DB_NAME = "NordicPrivacyAI";
const DB_VERSION = 1;
const STORE_NAME = "latestUpload";
interface UploadedFileMeta {
name: string;
size: number;
type: string;
contentPreview: string;
}
interface LatestUploadData {
file: File;
meta: UploadedFileMeta;
timestamp: number;
}
/**
* Open or create the IndexedDB database
*/
function openDB(): Promise<IDBDatabase> {
return new Promise((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve(request.result);
request.onupgradeneeded = (event) => {
const db = (event.target as IDBOpenDBRequest).result;
// Create object store if it doesn't exist
if (!db.objectStoreNames.contains(STORE_NAME)) {
db.createObjectStore(STORE_NAME);
}
};
});
}
/**
* Save the latest uploaded file and its metadata to IndexedDB
*/
export async function saveLatestUpload(
file: File,
meta: UploadedFileMeta
): Promise<void> {
const db = await openDB();
return new Promise((resolve, reject) => {
const transaction = db.transaction([STORE_NAME], "readwrite");
const store = transaction.objectStore(STORE_NAME);
const data: LatestUploadData = {
file,
meta,
timestamp: Date.now(),
};
const request = store.put(data, "latest");
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve();
transaction.oncomplete = () => db.close();
});
}
/**
* Retrieve the latest uploaded file and metadata from IndexedDB
*/
export async function getLatestUpload(): Promise<LatestUploadData> {
const db = await openDB();
return new Promise((resolve, reject) => {
const transaction = db.transaction([STORE_NAME], "readonly");
const store = transaction.objectStore(STORE_NAME);
const request = store.get("latest");
request.onerror = () => reject(request.error);
request.onsuccess = () => {
const result = request.result as LatestUploadData | undefined;
if (result) {
resolve(result);
} else {
reject(new Error("No cached upload found"));
}
};
transaction.oncomplete = () => db.close();
});
}
/**
* Delete the latest upload from IndexedDB
*/
export async function deleteLatestUpload(): Promise<void> {
const db = await openDB();
return new Promise((resolve, reject) => {
const transaction = db.transaction([STORE_NAME], "readwrite");
const store = transaction.objectStore(STORE_NAME);
const request = store.delete("latest");
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve();
transaction.oncomplete = () => db.close();
});
}

View File

@@ -1,5 +0,0 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.

View File

@@ -10,3 +10,14 @@ presidio-analyzer>=2.2.0
presidio-anonymizer>=2.2.0
spacy>=3.7.0
# Download spaCy model with: python -m spacy download en_core_web_sm
# FastAPI Backend
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.6
# Optional: GPU Support (uncomment if you have CUDA)
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
# Chatbot (WIP - not exposed in API yet)
gpt4all>=2.0.0

View File

@@ -1,145 +0,0 @@
"""
AI Governance Module - Bias Detection and Risk Analysis
"""
from .data_processor import DataProcessor
from .model_trainer import GeneralizedModelTrainer
from .bias_analyzer import BiasAnalyzer
from .risk_analyzer import RiskAnalyzer
from .report_generator import ReportGenerator, NumpyEncoder
import pandas as pd
import json
__version__ = '1.0.0'
__all__ = [
'DataProcessor',
'GeneralizedModelTrainer',
'BiasAnalyzer',
'RiskAnalyzer',
'ReportGenerator',
'NumpyEncoder',
'AIGovernanceAnalyzer'
]
class AIGovernanceAnalyzer:
"""
Main interface for AI Governance analysis
Example:
>>> analyzer = AIGovernanceAnalyzer()
>>> report = analyzer.analyze('data.csv', 'target', ['gender', 'age'])
>>> print(f"Bias Score: {report['summary']['overall_bias_score']:.3f}")
"""
def __init__(self):
"""Initialize the analyzer"""
self.processor = None
self.trainer = None
self.bias_analyzer = None
self.risk_analyzer = None
self.report_generator = None
def analyze(self, data_path, target_column, protected_attributes):
"""
Run complete AI governance analysis from file
Args:
data_path (str): Path to CSV file
target_column (str): Name of target column
protected_attributes (list): List of protected attribute column names
Returns:
dict: Complete analysis report
"""
df = pd.read_csv(data_path)
return self.analyze_dataframe(df, target_column, protected_attributes)
def analyze_dataframe(self, df, target_column, protected_attributes):
"""
Run complete AI governance analysis from DataFrame
Args:
df (pd.DataFrame): Input dataframe
target_column (str): Name of target column
protected_attributes (list): List of protected attribute column names
Returns:
dict: Complete analysis report
"""
# Step 1: Process data
self.processor = DataProcessor(df)
self.processor.target_column = target_column
self.processor.protected_attributes = protected_attributes
self.processor.prepare_data()
# Step 2: Train model
self.trainer = GeneralizedModelTrainer(
self.processor.X_train,
self.processor.X_test,
self.processor.y_train,
self.processor.y_test,
self.processor.feature_names
)
self.trainer.train()
self.trainer.evaluate()
# Step 3: Analyze bias
self.bias_analyzer = BiasAnalyzer(
self.processor.X_test,
self.processor.y_test,
self.trainer.y_pred,
self.processor.df,
self.processor.protected_attributes,
self.processor.target_column
)
bias_results = self.bias_analyzer.analyze()
# Step 4: Assess risks
self.risk_analyzer = RiskAnalyzer(
self.processor.df,
self.trainer.results,
bias_results,
self.processor.protected_attributes,
self.processor.target_column
)
risk_results = self.risk_analyzer.analyze()
# Step 5: Generate report
self.report_generator = ReportGenerator(
self.trainer.results,
bias_results,
risk_results,
self.processor.df
)
return self.report_generator.generate_report()
def save_report(self, report, output_path):
"""
Save report to JSON file
Args:
report (dict): Analysis report
output_path (str): Path to save JSON file
Returns:
str: Path to saved file
"""
with open(output_path, 'w') as f:
json.dump(report, f, indent=2, cls=NumpyEncoder)
return output_path
def get_summary(self, report):
"""
Get executive summary from report
Args:
report (dict): Analysis report
Returns:
dict: Summary metrics
"""
return report.get('summary', {})

View File

@@ -1,263 +0,0 @@
"""
Report Generator Module
Generates comprehensive JSON reports
"""
import json
import numpy as np
from datetime import datetime
class NumpyEncoder(json.JSONEncoder):
"""Custom JSON encoder for numpy types"""
def default(self, obj):
if isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
elif isinstance(obj, (np.floating, np.float64, np.float32)):
return float(obj)
elif isinstance(obj, (np.ndarray,)):
return obj.tolist()
elif isinstance(obj, (np.bool_,)):
return bool(obj)
return super(NumpyEncoder, self).default(obj)
class ReportGenerator:
"""Generate comprehensive analysis reports"""
def __init__(self, model_results, bias_results, risk_results, df):
self.model_results = model_results
self.bias_results = bias_results
self.risk_results = risk_results
self.df = df
def generate_report(self):
"""Generate comprehensive JSON report"""
report = {
'metadata': self._generate_metadata(),
'summary': self._generate_summary(),
'model_performance': self._format_model_results(),
'bias_analysis': self._format_bias_results(),
'risk_assessment': self._format_risk_results(),
'key_findings': self._extract_key_findings(),
'recommendations': self._compile_recommendations(),
'detailed_metrics': self._compile_detailed_metrics()
}
return report
def _generate_metadata(self):
"""Generate report metadata"""
return {
'report_id': f"AIGov_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
'generated_at': datetime.now().isoformat(),
'report_version': '1.0',
'dataset_info': {
'total_records': len(self.df),
'total_features': len(self.df.columns),
'columns': list(self.df.columns)
}
}
def _generate_summary(self):
"""Generate executive summary"""
model_metrics = self.model_results.get('metrics', {})
return {
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
'model_accuracy': model_metrics.get('accuracy', 0.0),
'fairness_violations_count': len(self.bias_results.get('fairness_violations', [])),
'passes_fairness_threshold': self.bias_results.get('fairness_assessment', {}).get('passes_fairness_threshold', False)
}
def _format_model_results(self):
"""Format model performance results"""
return {
'model_type': self.model_results.get('model_type', 'Unknown'),
'metrics': self.model_results.get('metrics', {}),
'confusion_matrix': self.model_results.get('confusion_matrix', []),
'top_features': dict(list(self.model_results.get('feature_importance', {}).items())[:10])
}
def _format_bias_results(self):
"""Format bias analysis results"""
return {
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
'fairness_metrics': self.bias_results.get('fairness_metrics', {}),
'fairness_violations': self.bias_results.get('fairness_violations', []),
'fairness_assessment': self.bias_results.get('fairness_assessment', {}),
'demographic_bias_summary': self._summarize_demographic_bias()
}
def _format_risk_results(self):
"""Format risk assessment results"""
return {
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
'risk_categories': self.risk_results.get('risk_categories', {}),
'privacy_risks': self._summarize_privacy_risks(),
'ethical_risks': self._summarize_ethical_risks()
}
def _summarize_demographic_bias(self):
"""Summarize demographic bias"""
demo_bias = self.bias_results.get('demographic_bias', {})
summary = {}
for attr, data in demo_bias.items():
summary[attr] = {
'max_disparity': data.get('max_disparity', 0),
'groups_analyzed': len(data.get('approval_rates', {}))
}
return summary
def _summarize_privacy_risks(self):
"""Summarize privacy risks"""
privacy = self.risk_results.get('privacy_risks', {})
return {
'pii_count': len(privacy.get('pii_detected', [])),
'anonymization_level': privacy.get('anonymization_level', 'UNKNOWN'),
'exposure_risk_count': len(privacy.get('exposure_risks', [])),
'gdpr_compliance_score': privacy.get('gdpr_compliance', {}).get('compliance_score', 0)
}
def _summarize_ethical_risks(self):
"""Summarize ethical risks"""
ethical = self.risk_results.get('ethical_risks', {})
return {
'fairness_issues_count': len(ethical.get('fairness_issues', [])),
'transparency_score': ethical.get('transparency_score', 0),
'bias_amplification_risk': ethical.get('bias_amplification_risk', 'UNKNOWN'),
'social_impact': ethical.get('social_impact_assessment', {})
}
def _extract_key_findings(self):
"""Extract key findings from analysis"""
findings = []
# Model performance findings
accuracy = self.model_results.get('metrics', {}).get('accuracy', 0)
if accuracy >= 0.8:
findings.append(f"✓ Model achieves good accuracy ({accuracy:.2%})")
else:
findings.append(f"⚠ Model accuracy is below optimal ({accuracy:.2%})")
# Bias findings
bias_score = self.bias_results.get('overall_bias_score', 0)
if bias_score < 0.3:
findings.append("✓ Low bias detected across protected attributes")
elif bias_score < 0.5:
findings.append("⚠ Moderate bias detected - monitoring recommended")
else:
findings.append("❌ High bias detected - immediate action required")
# Fairness violations
violations = self.bias_results.get('fairness_violations', [])
if violations:
high_sev = sum(1 for v in violations if v['severity'] == 'HIGH')
findings.append(f"{len(violations)} fairness violations detected ({high_sev} high severity)")
else:
findings.append("✓ No fairness violations detected")
# Privacy findings
privacy = self.risk_results.get('privacy_risks', {})
pii_count = len(privacy.get('pii_detected', []))
if pii_count > 0:
findings.append(f"{pii_count} columns contain potential PII")
else:
findings.append("✓ No obvious PII detected in dataset")
# Risk level
risk_level = self.risk_results.get('risk_level', 'UNKNOWN')
findings.append(f"Overall Risk Level: {risk_level}")
return findings
def _compile_recommendations(self):
"""Compile all recommendations"""
recommendations = []
# Get recommendations from each component
privacy_recs = self.risk_results.get('privacy_risks', {}).get('recommendations', [])
ethical_recs = self.risk_results.get('ethical_risks', {}).get('recommendations', [])
performance_recs = self.risk_results.get('model_performance_risks', {}).get('recommendations', [])
compliance_recs = self.risk_results.get('compliance_risks', {}).get('recommendations', [])
# Prioritize recommendations
all_recs = []
# High priority (from violations and high risks)
violations = self.bias_results.get('fairness_violations', [])
if violations:
all_recs.append({
'priority': 'HIGH',
'category': 'Fairness',
'recommendation': 'Address fairness violations in protected attributes'
})
if len(privacy_recs) > 0:
all_recs.append({
'priority': 'HIGH',
'category': 'Privacy',
'recommendation': privacy_recs[0]
})
# Medium priority
for rec in ethical_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Ethics',
'recommendation': rec
})
# Lower priority
for rec in performance_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Performance',
'recommendation': rec
})
for rec in compliance_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Compliance',
'recommendation': rec
})
# Convert to simple list with formatting
recommendations = [
f"[{r['priority']}] {r['category']}: {r['recommendation']}"
for r in all_recs[:10] # Limit to top 10
]
return recommendations
def _compile_detailed_metrics(self):
"""Compile detailed metrics for analysis"""
return {
'bias_metrics': {
'by_attribute': self.bias_results.get('fairness_metrics', {}),
'demographic_analysis': self.bias_results.get('demographic_bias', {})
},
'risk_breakdown': {
'privacy': self.risk_results.get('privacy_risks', {}),
'ethical': self.risk_results.get('ethical_risks', {}),
'compliance': self.risk_results.get('compliance_risks', {}),
'data_quality': self.risk_results.get('data_quality_risks', {})
},
'model_details': {
'classification_report': self.model_results.get('classification_report', {}),
'feature_importance': self.model_results.get('feature_importance', {})
}
}
def save_report(self, filepath):
"""Save report to JSON file"""
report = self.generate_report()
with open(filepath, 'w') as f:
json.dump(report, f, indent=2, cls=NumpyEncoder)
return filepath

21
start_api.py Normal file
View File

@@ -0,0 +1,21 @@
"""
Start the FastAPI server
Run: python start_api.py
"""
import uvicorn
if __name__ == "__main__":
print("🚀 Starting Nordic Privacy AI API Server...")
print("📍 API will be available at: http://localhost:8000")
print("📖 Interactive docs at: http://localhost:8000/docs")
print("🔗 Frontend should run at: http://localhost:3000")
print("\nPress CTRL+C to stop\n")
uvicorn.run(
"api.main:app",
host="0.0.0.0",
port=8000,
reload=True, # Auto-reload on code changes
log_level="info"
)

View File

@@ -10,7 +10,7 @@ import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from cleaning import DataCleaner, CleaningConfig
from data_cleaning import DataCleaner
def test_basic_cleaning():