mirror of
https://github.com/PlatypusPus/MushroomEmpire.git
synced 2026-02-07 22:18:59 +00:00
merge
This commit is contained in:
19
.gitignore
vendored
19
.gitignore
vendored
@@ -11,7 +11,7 @@ dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
@@ -48,9 +48,11 @@ Thumbs.db
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
|
||||
# Reports
|
||||
# Reports and generated files
|
||||
reports/*.json
|
||||
reports/*.pdf
|
||||
reports/*.csv
|
||||
reports/*.html
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
@@ -59,4 +61,17 @@ reports/*.pdf
|
||||
*.csv
|
||||
!Datasets/loan_data.csv
|
||||
|
||||
# Node.js & Next.js
|
||||
node_modules/
|
||||
.next/
|
||||
out/
|
||||
.vercel
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
# Frontend build artifacts
|
||||
frontend/nordic-privacy-ai/.next/
|
||||
frontend/nordic-privacy-ai/out/
|
||||
frontend/nordic-privacy-ai/node_modules/
|
||||
|
||||
Data
|
||||
@@ -21,6 +21,7 @@ class DataProcessor:
|
||||
self.categorical_features = []
|
||||
self.feature_names = []
|
||||
self.encoders = {}
|
||||
self.target_encoder = None # Add target encoder
|
||||
self.scaler = StandardScaler()
|
||||
|
||||
self.X_train = None
|
||||
@@ -75,6 +76,13 @@ class DataProcessor:
|
||||
X = self.df[feature_cols].copy()
|
||||
y = self.df[self.target_column].copy()
|
||||
|
||||
# Encode target variable if it's categorical
|
||||
if y.dtype == 'object' or y.dtype.name == 'category':
|
||||
self.target_encoder = LabelEncoder()
|
||||
y_encoded = self.target_encoder.fit_transform(y)
|
||||
y = pd.Series(y_encoded, index=y.index)
|
||||
print(f"Target '{self.target_column}' encoded: {dict(enumerate(self.target_encoder.classes_))}")
|
||||
|
||||
# Encode categorical variables
|
||||
for col in self.categorical_features:
|
||||
if col in X.columns:
|
||||
|
||||
1
api/__init__.py
Normal file
1
api/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Empty __init__.py files for Python package structure
|
||||
72
api/main.py
Normal file
72
api/main.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""
|
||||
FastAPI Backend for Nordic Privacy AI
|
||||
Provides endpoints for AI Governance analysis and data cleaning
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
import os
|
||||
|
||||
from api.routers import analyze, clean
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Nordic Privacy AI API",
|
||||
description="AI-powered GDPR compliance, bias detection, and risk analysis",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# CORS configuration for Next.js frontend
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=[
|
||||
"http://localhost:3000", # Next.js dev server
|
||||
"http://127.0.0.1:3000",
|
||||
],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount reports directory for file downloads
|
||||
reports_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "reports")
|
||||
os.makedirs(reports_dir, exist_ok=True)
|
||||
app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
|
||||
|
||||
# Include routers
|
||||
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
|
||||
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "online",
|
||||
"service": "Nordic Privacy AI API",
|
||||
"version": "1.0.0",
|
||||
"endpoints": {
|
||||
"analyze": "/api/analyze",
|
||||
"clean": "/api/clean",
|
||||
"docs": "/docs"
|
||||
}
|
||||
}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Detailed health check"""
|
||||
try:
|
||||
import torch
|
||||
cuda_available = torch.cuda.is_available()
|
||||
gpu_name = torch.cuda.get_device_name(0) if cuda_available else None
|
||||
except:
|
||||
cuda_available = False
|
||||
gpu_name = None
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"gpu_acceleration": {
|
||||
"available": cuda_available,
|
||||
"device": gpu_name or "CPU"
|
||||
}
|
||||
}
|
||||
1
api/routers/__init__.py
Normal file
1
api/routers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Empty __init__.py files for Python package structure
|
||||
141
api/routers/analyze.py
Normal file
141
api/routers/analyze.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
AI Governance Analysis Router
|
||||
Handles bias detection and risk analysis endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import io
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
# Import AI Governance modules
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
from ai_governance import AIGovernanceAnalyzer
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def convert_to_serializable(obj):
|
||||
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||
return float(obj)
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, dict):
|
||||
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [convert_to_serializable(item) for item in obj]
|
||||
return obj
|
||||
|
||||
@router.post("/analyze")
|
||||
async def analyze_dataset(file: UploadFile = File(...)):
|
||||
"""
|
||||
Analyze uploaded dataset for bias and risk
|
||||
|
||||
- **file**: CSV file to analyze
|
||||
|
||||
Returns:
|
||||
- Analysis results (bias metrics, risk assessment)
|
||||
- Report file path for download
|
||||
"""
|
||||
|
||||
# Validate file type
|
||||
if not file.filename.endswith('.csv'):
|
||||
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
||||
|
||||
try:
|
||||
# Read uploaded file
|
||||
contents = await file.read()
|
||||
df = pd.read_csv(io.BytesIO(contents))
|
||||
|
||||
if df.empty:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||
|
||||
# Initialize AI Governance Analyzer
|
||||
analyzer = AIGovernanceAnalyzer()
|
||||
|
||||
# Auto-detect target column and protected attributes
|
||||
# Target: Last column (common convention) or first binary/categorical column
|
||||
target_column = df.columns[-1]
|
||||
|
||||
# Protected attributes: Common sensitive columns
|
||||
protected_keywords = ['gender', 'age', 'race', 'sex', 'ethnicity', 'religion', 'nationality']
|
||||
protected_attributes = [col for col in df.columns
|
||||
if any(keyword in col.lower() for keyword in protected_keywords)]
|
||||
|
||||
# If no protected attributes found, use first few categorical columns
|
||||
if not protected_attributes:
|
||||
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
||||
protected_attributes = [col for col in categorical_cols if col != target_column][:3]
|
||||
|
||||
print(f"Analyzing dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
||||
print(f"Target column: {target_column}")
|
||||
print(f"Protected attributes: {protected_attributes}")
|
||||
|
||||
# Run analysis
|
||||
report = analyzer.analyze_dataframe(df, target_column, protected_attributes)
|
||||
|
||||
# Generate report filename
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_filename = (file.filename or "dataset").replace('.csv', '')
|
||||
report_filename = f"governance_report_{safe_filename}_{timestamp}.json"
|
||||
report_path = os.path.join("reports", report_filename)
|
||||
|
||||
# Save full report to disk
|
||||
full_report_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
report_path
|
||||
)
|
||||
analyzer.save_report(report, full_report_path)
|
||||
|
||||
# Prepare response with summary
|
||||
response_data = {
|
||||
"status": "success",
|
||||
"filename": file.filename,
|
||||
"dataset_info": {
|
||||
"rows": len(df),
|
||||
"columns": len(df.columns),
|
||||
"features": list(df.columns)
|
||||
},
|
||||
"model_performance": {
|
||||
"accuracy": report.get("model_metrics", {}).get("accuracy", 0),
|
||||
"precision": report.get("model_metrics", {}).get("precision", 0),
|
||||
"recall": report.get("model_metrics", {}).get("recall", 0),
|
||||
"f1_score": report.get("model_metrics", {}).get("f1_score", 0)
|
||||
},
|
||||
"bias_metrics": {
|
||||
"overall_bias_score": report.get("bias_metrics", {}).get("overall_bias_score", 0),
|
||||
"disparate_impact": report.get("bias_metrics", {}).get("disparate_impact", {}),
|
||||
"statistical_parity": report.get("bias_metrics", {}).get("statistical_parity_difference", {}),
|
||||
"violations_detected": report.get("bias_metrics", {}).get("fairness_violations", [])
|
||||
},
|
||||
"risk_assessment": {
|
||||
"overall_risk_score": report.get("risk_metrics", {}).get("overall_risk_score", 0),
|
||||
"privacy_risks": report.get("risk_metrics", {}).get("privacy_risks", []),
|
||||
"ethical_risks": report.get("risk_metrics", {}).get("ethical_risks", []),
|
||||
"compliance_risks": report.get("risk_metrics", {}).get("compliance_risks", []),
|
||||
"data_quality_risks": report.get("risk_metrics", {}).get("data_quality_risks", [])
|
||||
},
|
||||
"recommendations": report.get("recommendations", []),
|
||||
"report_file": f"/{report_path}",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Convert all numpy/pandas types to native Python types
|
||||
response_data = convert_to_serializable(response_data)
|
||||
|
||||
return JSONResponse(content=response_data)
|
||||
|
||||
except pd.errors.EmptyDataError:
|
||||
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||
except Exception as e:
|
||||
print(f"Error during analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
||||
142
api/routers/clean.py
Normal file
142
api/routers/clean.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Data Cleaning Router
|
||||
Handles PII detection and anonymization endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import io
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
# Import cleaning module
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
from data_cleaning import DataCleaner
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def convert_to_serializable(obj):
|
||||
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||
return float(obj)
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, dict):
|
||||
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [convert_to_serializable(item) for item in obj]
|
||||
return obj
|
||||
|
||||
|
||||
@router.post("/clean")
|
||||
async def clean_dataset(file: UploadFile = File(...)):
|
||||
"""
|
||||
Clean uploaded dataset - detect and anonymize PII
|
||||
|
||||
- **file**: CSV file to clean
|
||||
|
||||
Returns:
|
||||
- Cleaned dataset statistics
|
||||
- PII detections and anonymization actions
|
||||
- Report file path for download
|
||||
- Cleaned CSV file path for download
|
||||
"""
|
||||
|
||||
# Validate file type
|
||||
if not file.filename.endswith('.csv'):
|
||||
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
||||
|
||||
try:
|
||||
# Read uploaded file
|
||||
contents = await file.read()
|
||||
df = pd.read_csv(io.BytesIO(contents))
|
||||
|
||||
if df.empty:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||
|
||||
# Initialize Data Cleaner (with GPU if available)
|
||||
print(f"Cleaning dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
||||
cleaner = DataCleaner(df, use_gpu=True)
|
||||
|
||||
# Run cleaning (non-interactive mode for API)
|
||||
cleaned_df, audit_report = cleaner.clean(
|
||||
risky_features=None, # Auto-detect
|
||||
interactive=False, # No user prompts in API mode
|
||||
scan_all_cells=True
|
||||
)
|
||||
|
||||
# Generate filenames
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_filename = file.filename.replace('.csv', '')
|
||||
|
||||
# Save cleaned CSV
|
||||
cleaned_csv_filename = f"cleaned_{safe_filename}_{timestamp}.csv"
|
||||
cleaned_csv_path = os.path.join("reports", cleaned_csv_filename)
|
||||
full_cleaned_csv_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
cleaned_csv_path
|
||||
)
|
||||
cleaner.save_cleaned_data(cleaned_df, full_cleaned_csv_path)
|
||||
|
||||
# Save audit report
|
||||
audit_report_filename = f"cleaning_audit_{safe_filename}_{timestamp}.json"
|
||||
audit_report_path = os.path.join("reports", audit_report_filename)
|
||||
full_audit_report_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
audit_report_path
|
||||
)
|
||||
cleaner.save_audit_report(audit_report, full_audit_report_path)
|
||||
|
||||
# Prepare response
|
||||
response_data = {
|
||||
"status": "success",
|
||||
"filename": file.filename,
|
||||
"dataset_info": {
|
||||
"original_rows": int(audit_report["metadata"]["original_rows"]),
|
||||
"original_columns": int(audit_report["metadata"]["original_columns"]),
|
||||
"cleaned_rows": int(audit_report["metadata"]["cleaned_rows"]),
|
||||
"cleaned_columns": int(audit_report["metadata"]["cleaned_columns"])
|
||||
},
|
||||
"gpu_acceleration": audit_report["metadata"].get("gpu_acceleration", {
|
||||
"enabled": False,
|
||||
"device": "CPU"
|
||||
}),
|
||||
"summary": {
|
||||
"columns_removed": audit_report["summary"]["columns_removed"],
|
||||
"columns_anonymized": audit_report["summary"]["columns_anonymized"],
|
||||
"total_cells_affected": int(audit_report["summary"]["total_cells_affected"])
|
||||
},
|
||||
"pii_detections": {
|
||||
col: {
|
||||
"action": details["action"],
|
||||
"entity_types": details["entity_types_found"],
|
||||
"num_affected_rows": int(details.get("num_affected_rows", 0)),
|
||||
"examples": details.get("examples", [])[:2] # Show 2 examples
|
||||
}
|
||||
for col, details in audit_report["details"].items()
|
||||
},
|
||||
"gdpr_compliance": audit_report["compliance"]["gdpr_articles_applied"],
|
||||
"files": {
|
||||
"cleaned_csv": f"/{cleaned_csv_path}",
|
||||
"audit_report": f"/{audit_report_path}"
|
||||
},
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Convert all numpy/pandas types to native Python types
|
||||
response_data = convert_to_serializable(response_data)
|
||||
|
||||
return JSONResponse(content=response_data)
|
||||
|
||||
except pd.errors.EmptyDataError:
|
||||
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||
except Exception as e:
|
||||
print(f"Error during cleaning: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Cleaning failed: {str(e)}")
|
||||
1
api/utils/__init__.py
Normal file
1
api/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Empty __init__.py files for Python package structure
|
||||
12
data_cleaning/__init__.py
Normal file
12
data_cleaning/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""
|
||||
Data Cleaning Module
|
||||
Automated PII detection and GDPR-compliant anonymization
|
||||
"""
|
||||
|
||||
from .cleaner import DataCleaner
|
||||
|
||||
__version__ = '1.0.0'
|
||||
|
||||
__all__ = [
|
||||
'DataCleaner'
|
||||
]
|
||||
@@ -14,7 +14,7 @@ export default function TryPage() {
|
||||
<div className="flex flex-1 min-h-0">
|
||||
<Sidebar value={tab} onChange={setTab} />
|
||||
<div className="flex-1 min-h-0 flex">
|
||||
<div className="flex-1 min-h-0"><CenterPanel tab={tab} /></div>
|
||||
<div className="flex-1 min-h-0 min-w-0"><CenterPanel tab={tab} onAnalyze={() => setTab("bias-analysis")} /></div>
|
||||
<div className="w-[360px] hidden xl:block"><ChatbotPanel /></div>
|
||||
</div>
|
||||
</div>
|
||||
620
frontend/components/try/CenterPanel.tsx
Normal file
620
frontend/components/try/CenterPanel.tsx
Normal file
@@ -0,0 +1,620 @@
|
||||
"use client";
|
||||
import { TryTab } from "./Sidebar";
|
||||
import { useState, useRef, useCallback, useEffect } from "react";
|
||||
import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb";
|
||||
import { analyzeDataset, cleanDataset, getReportUrl, type AnalyzeResponse, type CleanResponse } from "../../lib/api";
|
||||
|
||||
interface CenterPanelProps {
|
||||
tab: TryTab;
|
||||
onAnalyze?: () => void;
|
||||
}
|
||||
|
||||
interface UploadedFileMeta {
|
||||
name: string;
|
||||
size: number;
|
||||
type: string;
|
||||
contentPreview: string;
|
||||
}
|
||||
|
||||
interface TablePreviewData {
|
||||
headers: string[];
|
||||
rows: string[][];
|
||||
origin: 'csv';
|
||||
}
|
||||
|
||||
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
||||
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
|
||||
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
|
||||
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const [progress, setProgress] = useState<number>(0);
|
||||
const [progressLabel, setProgressLabel] = useState<string>("Processing");
|
||||
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
|
||||
const inputRef = useRef<HTMLInputElement | null>(null);
|
||||
const [loadedFromCache, setLoadedFromCache] = useState(false);
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Analysis results
|
||||
const [analyzeResult, setAnalyzeResult] = useState<AnalyzeResponse | null>(null);
|
||||
const [cleanResult, setCleanResult] = useState<CleanResponse | null>(null);
|
||||
|
||||
const reset = () => {
|
||||
setFileMeta(null);
|
||||
setUploadedFile(null);
|
||||
setProgress(0);
|
||||
setProgressLabel("Processing");
|
||||
setTablePreview(null);
|
||||
setError(null);
|
||||
};
|
||||
|
||||
// Handle API calls
|
||||
const handleAnalyze = async () => {
|
||||
if (!uploadedFile) {
|
||||
setError("No file uploaded");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsProcessing(true);
|
||||
setError(null);
|
||||
setProgressLabel("Analyzing dataset...");
|
||||
|
||||
try {
|
||||
const result = await analyzeDataset(uploadedFile);
|
||||
setAnalyzeResult(result);
|
||||
setProgressLabel("Analysis complete!");
|
||||
onAnalyze?.(); // Navigate to bias-analysis tab
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Analysis failed");
|
||||
} finally {
|
||||
setIsProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleClean = async () => {
|
||||
if (!uploadedFile) {
|
||||
setError("No file uploaded");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsProcessing(true);
|
||||
setError(null);
|
||||
setProgressLabel("Cleaning dataset...");
|
||||
|
||||
try {
|
||||
const result = await cleanDataset(uploadedFile);
|
||||
setCleanResult(result);
|
||||
setProgressLabel("Cleaning complete!");
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Cleaning failed");
|
||||
} finally {
|
||||
setIsProcessing(false);
|
||||
}
|
||||
}; function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
|
||||
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
|
||||
if (lines.length < 2) return null;
|
||||
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
|
||||
if (commaDensity < 2) return null;
|
||||
const parseLine = (line: string) => {
|
||||
const out: string[] = [];
|
||||
let cur = '';
|
||||
let inQuotes = false;
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const ch = line[i];
|
||||
if (ch === '"') {
|
||||
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
|
||||
} else if (ch === ',' && !inQuotes) {
|
||||
out.push(cur);
|
||||
cur = '';
|
||||
} else { cur += ch; }
|
||||
}
|
||||
out.push(cur);
|
||||
return out.map(c => c.trim());
|
||||
};
|
||||
const raw = lines.slice(0, maxRows).map(parseLine);
|
||||
if (raw.length === 0) return null;
|
||||
const headers = raw[0];
|
||||
const colCount = Math.min(headers.length, maxCols);
|
||||
const rows = raw.slice(1).map(r => r.slice(0, colCount));
|
||||
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
|
||||
}
|
||||
|
||||
// We no longer build table preview for JSON; revert JSON to raw text view.
|
||||
|
||||
const processFile = useCallback(async (f: File) => {
|
||||
if (!f) return;
|
||||
const isCSV = /\.csv$/i.test(f.name);
|
||||
setProgress(0);
|
||||
setUploadedFile(f); // Save the file for API calls
|
||||
|
||||
// For large files, show a progress bar while reading the file stream (no preview)
|
||||
if (f.size > 1024 * 1024) {
|
||||
setProgressLabel("Uploading");
|
||||
const metaObj: UploadedFileMeta = {
|
||||
name: f.name,
|
||||
size: f.size,
|
||||
type: f.type || "unknown",
|
||||
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
setTablePreview(null);
|
||||
// Save to IndexedDB immediately so it persists without needing full read
|
||||
(async () => {
|
||||
try { await saveLatestUpload(f, metaObj); } catch {}
|
||||
})();
|
||||
// Read head slice for partial preview & possible CSV table extraction
|
||||
try {
|
||||
const headBlob = f.slice(0, PREVIEW_BYTES);
|
||||
const headReader = new FileReader();
|
||||
headReader.onload = async () => {
|
||||
try {
|
||||
const buf = headReader.result as ArrayBuffer;
|
||||
const decoder = new TextDecoder();
|
||||
const text = decoder.decode(buf);
|
||||
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
|
||||
if (isCSV) {
|
||||
const parsed = tryParseCSV(text);
|
||||
setTablePreview(parsed);
|
||||
} else {
|
||||
setTablePreview(null);
|
||||
}
|
||||
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
|
||||
} catch { /* ignore */ }
|
||||
};
|
||||
headReader.readAsArrayBuffer(headBlob);
|
||||
} catch { /* ignore */ }
|
||||
// Use streaming read for progress without buffering entire file in memory
|
||||
try {
|
||||
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
|
||||
if (stream && typeof stream.getReader === "function") {
|
||||
const reader = stream.getReader();
|
||||
let loaded = 0;
|
||||
const total = f.size || 1;
|
||||
for (;;) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
loaded += value ? value.length : 0;
|
||||
const pct = Math.min(100, Math.round((loaded / total) * 100));
|
||||
setProgress(pct);
|
||||
}
|
||||
setProgress(100);
|
||||
} else {
|
||||
// Fallback to FileReader progress events
|
||||
const reader = new FileReader();
|
||||
reader.onprogress = (evt) => {
|
||||
if (evt.lengthComputable) {
|
||||
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
|
||||
setProgress(pct);
|
||||
} else {
|
||||
setProgress((p) => (p < 90 ? p + 5 : p));
|
||||
}
|
||||
};
|
||||
reader.onloadend = () => setProgress(100);
|
||||
reader.onerror = () => setProgress(0);
|
||||
reader.readAsArrayBuffer(f);
|
||||
}
|
||||
} catch {
|
||||
setProgress(100);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const reader = new FileReader();
|
||||
reader.onprogress = (evt) => {
|
||||
if (evt.lengthComputable) {
|
||||
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
|
||||
setProgress(pct);
|
||||
} else {
|
||||
setProgress((p) => (p < 90 ? p + 5 : p));
|
||||
}
|
||||
};
|
||||
reader.onload = async () => {
|
||||
try {
|
||||
const buf = reader.result as ArrayBuffer;
|
||||
const decoder = new TextDecoder();
|
||||
const text = decoder.decode(buf);
|
||||
const metaObj: UploadedFileMeta = {
|
||||
name: f.name,
|
||||
size: f.size,
|
||||
type: f.type || "unknown",
|
||||
contentPreview: text.slice(0, 4000),
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
if (isCSV) {
|
||||
const parsed = tryParseCSV(text);
|
||||
setTablePreview(parsed);
|
||||
} else {
|
||||
setTablePreview(null);
|
||||
}
|
||||
// Save file blob and meta to browser cache (IndexedDB)
|
||||
try {
|
||||
await saveLatestUpload(f, metaObj);
|
||||
} catch {}
|
||||
setProgressLabel("Processing");
|
||||
setProgress(100);
|
||||
} catch (e) {
|
||||
const metaObj: UploadedFileMeta = {
|
||||
name: f.name,
|
||||
size: f.size,
|
||||
type: f.type || "unknown",
|
||||
contentPreview: "Unable to decode preview.",
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
setTablePreview(null);
|
||||
try {
|
||||
await saveLatestUpload(f, metaObj);
|
||||
} catch {}
|
||||
setProgressLabel("Processing");
|
||||
setProgress(100);
|
||||
}
|
||||
};
|
||||
reader.onerror = () => {
|
||||
setProgress(0);
|
||||
};
|
||||
reader.readAsArrayBuffer(f);
|
||||
}, []);
|
||||
|
||||
function handleFileChange(e: React.ChangeEvent<HTMLInputElement>) {
|
||||
const f = e.target.files?.[0];
|
||||
processFile(f as File);
|
||||
}
|
||||
|
||||
const onDragOver = (e: React.DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
const onDragLeave = () => setIsDragging(false);
|
||||
const onDrop = (e: React.DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
const f = e.dataTransfer.files?.[0];
|
||||
processFile(f as File);
|
||||
};
|
||||
|
||||
// Load last cached upload on mount (processing tab only)
|
||||
useEffect(() => {
|
||||
let ignore = false;
|
||||
if (tab !== "processing") return;
|
||||
(async () => {
|
||||
try {
|
||||
const { file, meta } = await getLatestUpload();
|
||||
if (!ignore && meta) {
|
||||
setFileMeta(meta as UploadedFileMeta);
|
||||
if (file) {
|
||||
setUploadedFile(file);
|
||||
}
|
||||
setLoadedFromCache(true);
|
||||
}
|
||||
} catch {}
|
||||
})();
|
||||
return () => {
|
||||
ignore = true;
|
||||
};
|
||||
}, [tab]); function renderTabContent() {
|
||||
switch (tab) {
|
||||
case "processing":
|
||||
return (
|
||||
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
|
||||
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
|
||||
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
|
||||
<div className="flex flex-col gap-3 min-w-0">
|
||||
<div
|
||||
onDragOver={onDragOver}
|
||||
onDragLeave={onDragLeave}
|
||||
onDrop={onDrop}
|
||||
className={
|
||||
"rounded-lg border-2 border-dashed p-6 text-center transition-colors " +
|
||||
(isDragging ? "border-brand-600 bg-brand-50" : "border-slate-300 hover:border-brand-300")
|
||||
}
|
||||
>
|
||||
<p className="text-sm text-slate-600">Drag & drop a CSV / JSON / TXT here, or click to browse.</p>
|
||||
<div className="mt-3">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => inputRef.current?.click()}
|
||||
className="inline-flex items-center rounded-md bg-brand-600 px-4 py-2 text-white text-sm font-medium shadow hover:bg-brand-500"
|
||||
>
|
||||
Choose file
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="file"
|
||||
accept=".csv,.json,.txt"
|
||||
onChange={handleFileChange}
|
||||
className="hidden"
|
||||
aria-hidden
|
||||
/>
|
||||
{progress > 0 && (
|
||||
<div className="w-full">
|
||||
<div className="h-2 w-full rounded-full bg-slate-200 overflow-hidden">
|
||||
<div
|
||||
className="h-2 bg-brand-600 transition-all"
|
||||
style={{ width: `${progress}%` }}
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-1 text-xs text-slate-500">{progressLabel} {progress}%</div>
|
||||
</div>
|
||||
)}
|
||||
{fileMeta && (
|
||||
<div className="rounded-md border border-slate-200 p-4 bg-white shadow-sm">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<div className="text-sm font-medium">{fileMeta.name}</div>
|
||||
<div className="text-xs text-slate-500">{Math.round(fileMeta.size / 1024)} KB</div>
|
||||
</div>
|
||||
{loadedFromCache && (
|
||||
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
|
||||
)}
|
||||
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
|
||||
{/* Table preview when structured data detected; otherwise show text */}
|
||||
{tablePreview && tablePreview.origin === 'csv' ? (
|
||||
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
|
||||
<table className="min-w-full text-xs">
|
||||
<thead className="sticky top-0 bg-slate-100">
|
||||
<tr>
|
||||
{tablePreview.headers.map((h, idx) => (
|
||||
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{tablePreview.rows.map((r, i) => (
|
||||
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
|
||||
{r.map((c, j) => (
|
||||
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
||||
{fileMeta.contentPreview || "(no preview)"}
|
||||
</pre>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-md text-sm text-red-700">
|
||||
❌ {error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{analyzeResult && (
|
||||
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
||||
✅ Analysis complete! View results in tabs.
|
||||
<a
|
||||
href={getReportUrl(analyzeResult.report_file)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="ml-2 underline"
|
||||
>
|
||||
Download Report
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{cleanResult && (
|
||||
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
||||
✅ Cleaning complete! {cleanResult.summary.total_cells_affected} cells anonymized.
|
||||
<div className="mt-2 flex gap-2">
|
||||
<a
|
||||
href={getReportUrl(cleanResult.files.cleaned_csv)}
|
||||
download
|
||||
className="underline"
|
||||
>
|
||||
Download Cleaned CSV
|
||||
</a>
|
||||
<a
|
||||
href={getReportUrl(cleanResult.files.audit_report)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
>
|
||||
View Audit Report
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="mt-3 flex justify-end gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={async () => {
|
||||
reset();
|
||||
try { await deleteLatestUpload(); } catch {}
|
||||
setLoadedFromCache(false);
|
||||
setAnalyzeResult(null);
|
||||
setCleanResult(null);
|
||||
}}
|
||||
className="text-xs rounded-md border px-3 py-1.5 hover:bg-slate-50"
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClean}
|
||||
disabled={isProcessing}
|
||||
className="text-xs rounded-md bg-green-600 text-white px-3 py-1.5 hover:bg-green-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{isProcessing ? "Processing..." : "Clean (PII)"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleAnalyze}
|
||||
disabled={isProcessing}
|
||||
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{isProcessing ? "Processing..." : "Analyze"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
case "bias-analysis":
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<h2 className="text-xl font-semibold">Bias Analysis</h2>
|
||||
{analyzeResult ? (
|
||||
<div className="space-y-4">
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<div className="text-sm text-slate-600">Overall Bias Score</div>
|
||||
<div className="text-2xl font-bold">{(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<div className="text-sm text-slate-600">Violations Detected</div>
|
||||
<div className="text-2xl font-bold">{analyzeResult.bias_metrics.violations_detected.length}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<h3 className="font-semibold mb-2">Model Performance</h3>
|
||||
<div className="grid grid-cols-4 gap-2 text-sm">
|
||||
<div>
|
||||
<div className="text-slate-600">Accuracy</div>
|
||||
<div className="font-medium">{(analyzeResult.model_performance.accuracy * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-slate-600">Precision</div>
|
||||
<div className="font-medium">{(analyzeResult.model_performance.precision * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-slate-600">Recall</div>
|
||||
<div className="font-medium">{(analyzeResult.model_performance.recall * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-slate-600">F1 Score</div>
|
||||
<div className="font-medium">{(analyzeResult.model_performance.f1_score * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-slate-600">Upload and analyze a dataset to see bias metrics.</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
case "risk-analysis":
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<h2 className="text-xl font-semibold">Risk Analysis</h2>
|
||||
{analyzeResult ? (
|
||||
<div className="space-y-4">
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<div className="text-sm text-slate-600">Overall Risk Score</div>
|
||||
<div className="text-2xl font-bold">{(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
|
||||
{cleanResult && (
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<h3 className="font-semibold mb-2">PII Detection Results</h3>
|
||||
<div className="text-sm space-y-1">
|
||||
<div>Cells Anonymized: <span className="font-medium">{cleanResult.summary.total_cells_affected}</span></div>
|
||||
<div>Columns Removed: <span className="font-medium">{cleanResult.summary.columns_removed.length}</span></div>
|
||||
<div>Columns Anonymized: <span className="font-medium">{cleanResult.summary.columns_anonymized.length}</span></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-slate-600">Upload and analyze a dataset to see risk assessment.</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
case "bias-risk-mitigation":
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<h2 className="text-xl font-semibold">Mitigation Suggestions</h2>
|
||||
{analyzeResult && analyzeResult.recommendations.length > 0 ? (
|
||||
<div className="space-y-2">
|
||||
{analyzeResult.recommendations.map((rec, i) => (
|
||||
<div key={i} className="p-3 bg-blue-50 border border-blue-200 rounded-md text-sm">
|
||||
{rec}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-slate-600">
|
||||
Recommendations will appear here after analysis.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
case "results":
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<h2 className="text-xl font-semibold">Results Summary</h2>
|
||||
{(analyzeResult || cleanResult) ? (
|
||||
<div className="space-y-4">
|
||||
{analyzeResult && (
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<h3 className="font-semibold mb-2">Analysis Results</h3>
|
||||
<div className="text-sm space-y-1">
|
||||
<div>Dataset: {analyzeResult.filename}</div>
|
||||
<div>Rows: {analyzeResult.dataset_info.rows}</div>
|
||||
<div>Columns: {analyzeResult.dataset_info.columns}</div>
|
||||
<div>Bias Score: {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
|
||||
<div>Risk Score: {(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<a
|
||||
href={getReportUrl(analyzeResult.report_file)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="mt-3 inline-block text-sm text-brand-600 underline"
|
||||
>
|
||||
Download Full Report →
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{cleanResult && (
|
||||
<div className="p-4 bg-white rounded-lg border">
|
||||
<h3 className="font-semibold mb-2">Cleaning Results</h3>
|
||||
<div className="text-sm space-y-1">
|
||||
<div>Original: {cleanResult.dataset_info.original_rows} rows × {cleanResult.dataset_info.original_columns} cols</div>
|
||||
<div>Cleaned: {cleanResult.dataset_info.cleaned_rows} rows × {cleanResult.dataset_info.cleaned_columns} cols</div>
|
||||
<div>Cells Anonymized: {cleanResult.summary.total_cells_affected}</div>
|
||||
<div>Columns Removed: {cleanResult.summary.columns_removed.length}</div>
|
||||
<div>GDPR Compliant: {cleanResult.gdpr_compliance.length} articles applied</div>
|
||||
</div>
|
||||
<div className="mt-3 flex gap-2">
|
||||
<a
|
||||
href={getReportUrl(cleanResult.files.cleaned_csv)}
|
||||
download
|
||||
className="text-sm text-brand-600 underline"
|
||||
>
|
||||
Download Cleaned CSV →
|
||||
</a>
|
||||
<a
|
||||
href={getReportUrl(cleanResult.files.audit_report)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-brand-600 underline"
|
||||
>
|
||||
View Audit Report →
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-slate-600">
|
||||
Process a dataset to see aggregated results.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="h-full overflow-y-auto p-6 bg-white/60">
|
||||
{renderTabContent()}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import { useState, useRef, useCallback, useEffect } from "react";
|
||||
|
||||
interface CenterPanelProps {
|
||||
tab: TryTab;
|
||||
onAnalyze?: () => void;
|
||||
}
|
||||
|
||||
interface UploadedFileMeta {
|
||||
@@ -13,11 +14,19 @@ interface UploadedFileMeta {
|
||||
contentPreview: string;
|
||||
}
|
||||
|
||||
export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
interface TablePreviewData {
|
||||
headers: string[];
|
||||
rows: string[][];
|
||||
origin: 'csv';
|
||||
}
|
||||
|
||||
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
||||
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
|
||||
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const [progress, setProgress] = useState<number>(0);
|
||||
const [progressLabel, setProgressLabel] = useState<string>("Processing");
|
||||
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
|
||||
const inputRef = useRef<HTMLInputElement | null>(null);
|
||||
const [loadedFromCache, setLoadedFromCache] = useState(false);
|
||||
|
||||
@@ -25,10 +34,43 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
setFileMeta(null);
|
||||
setProgress(0);
|
||||
setProgressLabel("Processing");
|
||||
setTablePreview(null);
|
||||
};
|
||||
|
||||
function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
|
||||
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
|
||||
if (lines.length < 2) return null;
|
||||
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
|
||||
if (commaDensity < 2) return null;
|
||||
const parseLine = (line: string) => {
|
||||
const out: string[] = [];
|
||||
let cur = '';
|
||||
let inQuotes = false;
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const ch = line[i];
|
||||
if (ch === '"') {
|
||||
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
|
||||
} else if (ch === ',' && !inQuotes) {
|
||||
out.push(cur);
|
||||
cur = '';
|
||||
} else { cur += ch; }
|
||||
}
|
||||
out.push(cur);
|
||||
return out.map(c => c.trim());
|
||||
};
|
||||
const raw = lines.slice(0, maxRows).map(parseLine);
|
||||
if (raw.length === 0) return null;
|
||||
const headers = raw[0];
|
||||
const colCount = Math.min(headers.length, maxCols);
|
||||
const rows = raw.slice(1).map(r => r.slice(0, colCount));
|
||||
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
|
||||
}
|
||||
|
||||
// We no longer build table preview for JSON; revert JSON to raw text view.
|
||||
|
||||
const processFile = useCallback(async (f: File) => {
|
||||
if (!f) return;
|
||||
const isCSV = /\.csv$/i.test(f.name);
|
||||
setProgress(0);
|
||||
// For large files, show a progress bar while reading the file stream (no preview)
|
||||
if (f.size > 1024 * 1024) {
|
||||
@@ -37,13 +79,35 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
name: f.name,
|
||||
size: f.size,
|
||||
type: f.type || "unknown",
|
||||
contentPreview: "File too large for preview (limit 1MB).",
|
||||
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
setTablePreview(null);
|
||||
// Save to IndexedDB immediately so it persists without needing full read
|
||||
(async () => {
|
||||
try { await saveLatestUpload(f, metaObj); } catch {}
|
||||
})();
|
||||
// Read head slice for partial preview & possible CSV table extraction
|
||||
try {
|
||||
const headBlob = f.slice(0, PREVIEW_BYTES);
|
||||
const headReader = new FileReader();
|
||||
headReader.onload = async () => {
|
||||
try {
|
||||
const buf = headReader.result as ArrayBuffer;
|
||||
const decoder = new TextDecoder();
|
||||
const text = decoder.decode(buf);
|
||||
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
|
||||
if (isCSV) {
|
||||
const parsed = tryParseCSV(text);
|
||||
setTablePreview(parsed);
|
||||
} else {
|
||||
setTablePreview(null);
|
||||
}
|
||||
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
|
||||
} catch { /* ignore */ }
|
||||
};
|
||||
headReader.readAsArrayBuffer(headBlob);
|
||||
} catch { /* ignore */ }
|
||||
// Use streaming read for progress without buffering entire file in memory
|
||||
try {
|
||||
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
|
||||
@@ -100,6 +164,12 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
contentPreview: text.slice(0, 4000),
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
if (isCSV) {
|
||||
const parsed = tryParseCSV(text);
|
||||
setTablePreview(parsed);
|
||||
} else {
|
||||
setTablePreview(null);
|
||||
}
|
||||
// Save file blob and meta to browser cache (IndexedDB)
|
||||
try {
|
||||
await saveLatestUpload(f, metaObj);
|
||||
@@ -114,6 +184,7 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
contentPreview: "Unable to decode preview.",
|
||||
};
|
||||
setFileMeta(metaObj);
|
||||
setTablePreview(null);
|
||||
try {
|
||||
await saveLatestUpload(f, metaObj);
|
||||
} catch {}
|
||||
@@ -163,13 +234,13 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
}, [tab]);
|
||||
|
||||
function renderTabContent() {
|
||||
switch (tab) {
|
||||
switch (tab) {
|
||||
case "processing":
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
return (
|
||||
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
|
||||
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
|
||||
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
|
||||
<div className="flex flex-col gap-3">
|
||||
<div className="flex flex-col gap-3 min-w-0">
|
||||
<div
|
||||
onDragOver={onDragOver}
|
||||
onDragLeave={onDragLeave}
|
||||
@@ -219,10 +290,34 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
|
||||
)}
|
||||
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
|
||||
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
||||
{fileMeta.contentPreview || "(no preview)"}
|
||||
</pre>
|
||||
<div className="mt-3 flex justify-end">
|
||||
{/* Table preview when structured data detected; otherwise show text */}
|
||||
{tablePreview && tablePreview.origin === 'csv' ? (
|
||||
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
|
||||
<table className="min-w-full text-xs">
|
||||
<thead className="sticky top-0 bg-slate-100">
|
||||
<tr>
|
||||
{tablePreview.headers.map((h, idx) => (
|
||||
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{tablePreview.rows.map((r, i) => (
|
||||
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
|
||||
{r.map((c, j) => (
|
||||
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
||||
{fileMeta.contentPreview || "(no preview)"}
|
||||
</pre>
|
||||
)}
|
||||
<div className="mt-3 flex justify-end gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={async () => {
|
||||
@@ -234,6 +329,13 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onAnalyze?.()}
|
||||
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500"
|
||||
>
|
||||
Analyze
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
@@ -1,28 +0,0 @@
|
||||
# Nordic Privacy AI
|
||||
|
||||
AI-Powered GDPR compliance & personal data protection platform tailored for Nordic ecosystems (BankID, MitID, Suomi.fi).
|
||||
|
||||
## Tech Stack
|
||||
- Next.js (App Router, TypeScript)
|
||||
- Tailwind CSS
|
||||
|
||||
## Getting Started
|
||||
```powershell
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
Visit http://localhost:3000 to view the landing page.
|
||||
|
||||
## Scripts
|
||||
- `npm run dev` – Start dev server
|
||||
- `npm run build` – Production build
|
||||
- `npm start` – Run built app
|
||||
- `npm run lint` – ESLint
|
||||
|
||||
## Next Steps
|
||||
- Implement /try page workflow
|
||||
- Add feature sections & agent explanations
|
||||
- Integrate backend services for data upload & scanning
|
||||
|
||||
## License
|
||||
Internal hackathon prototype
|
||||
113
frontend/nordic-privacy-ai/lib/idb.ts
Normal file
113
frontend/nordic-privacy-ai/lib/idb.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* IndexedDB utilities for persisting file uploads in the browser.
|
||||
* Stores the latest uploaded file and its metadata for recovery across sessions.
|
||||
*/
|
||||
|
||||
const DB_NAME = "NordicPrivacyAI";
|
||||
const DB_VERSION = 1;
|
||||
const STORE_NAME = "latestUpload";
|
||||
|
||||
interface UploadedFileMeta {
|
||||
name: string;
|
||||
size: number;
|
||||
type: string;
|
||||
contentPreview: string;
|
||||
}
|
||||
|
||||
interface LatestUploadData {
|
||||
file: File;
|
||||
meta: UploadedFileMeta;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open or create the IndexedDB database
|
||||
*/
|
||||
function openDB(): Promise<IDBDatabase> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const request = indexedDB.open(DB_NAME, DB_VERSION);
|
||||
|
||||
request.onerror = () => reject(request.error);
|
||||
request.onsuccess = () => resolve(request.result);
|
||||
|
||||
request.onupgradeneeded = (event) => {
|
||||
const db = (event.target as IDBOpenDBRequest).result;
|
||||
|
||||
// Create object store if it doesn't exist
|
||||
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
||||
db.createObjectStore(STORE_NAME);
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the latest uploaded file and its metadata to IndexedDB
|
||||
*/
|
||||
export async function saveLatestUpload(
|
||||
file: File,
|
||||
meta: UploadedFileMeta
|
||||
): Promise<void> {
|
||||
const db = await openDB();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const transaction = db.transaction([STORE_NAME], "readwrite");
|
||||
const store = transaction.objectStore(STORE_NAME);
|
||||
|
||||
const data: LatestUploadData = {
|
||||
file,
|
||||
meta,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
const request = store.put(data, "latest");
|
||||
|
||||
request.onerror = () => reject(request.error);
|
||||
request.onsuccess = () => resolve();
|
||||
|
||||
transaction.oncomplete = () => db.close();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the latest uploaded file and metadata from IndexedDB
|
||||
*/
|
||||
export async function getLatestUpload(): Promise<LatestUploadData> {
|
||||
const db = await openDB();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const transaction = db.transaction([STORE_NAME], "readonly");
|
||||
const store = transaction.objectStore(STORE_NAME);
|
||||
const request = store.get("latest");
|
||||
|
||||
request.onerror = () => reject(request.error);
|
||||
request.onsuccess = () => {
|
||||
const result = request.result as LatestUploadData | undefined;
|
||||
if (result) {
|
||||
resolve(result);
|
||||
} else {
|
||||
reject(new Error("No cached upload found"));
|
||||
}
|
||||
};
|
||||
|
||||
transaction.oncomplete = () => db.close();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the latest upload from IndexedDB
|
||||
*/
|
||||
export async function deleteLatestUpload(): Promise<void> {
|
||||
const db = await openDB();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const transaction = db.transaction([STORE_NAME], "readwrite");
|
||||
const store = transaction.objectStore(STORE_NAME);
|
||||
const request = store.delete("latest");
|
||||
|
||||
request.onerror = () => reject(request.error);
|
||||
request.onsuccess = () => resolve();
|
||||
|
||||
transaction.oncomplete = () => db.close();
|
||||
});
|
||||
}
|
||||
5
frontend/nordic-privacy-ai/next-env.d.ts
vendored
5
frontend/nordic-privacy-ai/next-env.d.ts
vendored
@@ -1,5 +0,0 @@
|
||||
/// <reference types="next" />
|
||||
/// <reference types="next/image-types/global" />
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
||||
@@ -10,3 +10,14 @@ presidio-analyzer>=2.2.0
|
||||
presidio-anonymizer>=2.2.0
|
||||
spacy>=3.7.0
|
||||
# Download spaCy model with: python -m spacy download en_core_web_sm
|
||||
|
||||
# FastAPI Backend
|
||||
fastapi>=0.109.0
|
||||
uvicorn[standard]>=0.27.0
|
||||
python-multipart>=0.0.6
|
||||
|
||||
# Optional: GPU Support (uncomment if you have CUDA)
|
||||
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# Chatbot (WIP - not exposed in API yet)
|
||||
gpt4all>=2.0.0
|
||||
145
src/__init__.py
145
src/__init__.py
@@ -1,145 +0,0 @@
|
||||
"""
|
||||
AI Governance Module - Bias Detection and Risk Analysis
|
||||
"""
|
||||
|
||||
from .data_processor import DataProcessor
|
||||
from .model_trainer import GeneralizedModelTrainer
|
||||
from .bias_analyzer import BiasAnalyzer
|
||||
from .risk_analyzer import RiskAnalyzer
|
||||
from .report_generator import ReportGenerator, NumpyEncoder
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
__version__ = '1.0.0'
|
||||
|
||||
__all__ = [
|
||||
'DataProcessor',
|
||||
'GeneralizedModelTrainer',
|
||||
'BiasAnalyzer',
|
||||
'RiskAnalyzer',
|
||||
'ReportGenerator',
|
||||
'NumpyEncoder',
|
||||
'AIGovernanceAnalyzer'
|
||||
]
|
||||
|
||||
|
||||
class AIGovernanceAnalyzer:
|
||||
"""
|
||||
Main interface for AI Governance analysis
|
||||
|
||||
Example:
|
||||
>>> analyzer = AIGovernanceAnalyzer()
|
||||
>>> report = analyzer.analyze('data.csv', 'target', ['gender', 'age'])
|
||||
>>> print(f"Bias Score: {report['summary']['overall_bias_score']:.3f}")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the analyzer"""
|
||||
self.processor = None
|
||||
self.trainer = None
|
||||
self.bias_analyzer = None
|
||||
self.risk_analyzer = None
|
||||
self.report_generator = None
|
||||
|
||||
def analyze(self, data_path, target_column, protected_attributes):
|
||||
"""
|
||||
Run complete AI governance analysis from file
|
||||
|
||||
Args:
|
||||
data_path (str): Path to CSV file
|
||||
target_column (str): Name of target column
|
||||
protected_attributes (list): List of protected attribute column names
|
||||
|
||||
Returns:
|
||||
dict: Complete analysis report
|
||||
"""
|
||||
df = pd.read_csv(data_path)
|
||||
return self.analyze_dataframe(df, target_column, protected_attributes)
|
||||
|
||||
def analyze_dataframe(self, df, target_column, protected_attributes):
|
||||
"""
|
||||
Run complete AI governance analysis from DataFrame
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Input dataframe
|
||||
target_column (str): Name of target column
|
||||
protected_attributes (list): List of protected attribute column names
|
||||
|
||||
Returns:
|
||||
dict: Complete analysis report
|
||||
"""
|
||||
# Step 1: Process data
|
||||
self.processor = DataProcessor(df)
|
||||
self.processor.target_column = target_column
|
||||
self.processor.protected_attributes = protected_attributes
|
||||
self.processor.prepare_data()
|
||||
|
||||
# Step 2: Train model
|
||||
self.trainer = GeneralizedModelTrainer(
|
||||
self.processor.X_train,
|
||||
self.processor.X_test,
|
||||
self.processor.y_train,
|
||||
self.processor.y_test,
|
||||
self.processor.feature_names
|
||||
)
|
||||
self.trainer.train()
|
||||
self.trainer.evaluate()
|
||||
|
||||
# Step 3: Analyze bias
|
||||
self.bias_analyzer = BiasAnalyzer(
|
||||
self.processor.X_test,
|
||||
self.processor.y_test,
|
||||
self.trainer.y_pred,
|
||||
self.processor.df,
|
||||
self.processor.protected_attributes,
|
||||
self.processor.target_column
|
||||
)
|
||||
bias_results = self.bias_analyzer.analyze()
|
||||
|
||||
# Step 4: Assess risks
|
||||
self.risk_analyzer = RiskAnalyzer(
|
||||
self.processor.df,
|
||||
self.trainer.results,
|
||||
bias_results,
|
||||
self.processor.protected_attributes,
|
||||
self.processor.target_column
|
||||
)
|
||||
risk_results = self.risk_analyzer.analyze()
|
||||
|
||||
# Step 5: Generate report
|
||||
self.report_generator = ReportGenerator(
|
||||
self.trainer.results,
|
||||
bias_results,
|
||||
risk_results,
|
||||
self.processor.df
|
||||
)
|
||||
|
||||
return self.report_generator.generate_report()
|
||||
|
||||
def save_report(self, report, output_path):
|
||||
"""
|
||||
Save report to JSON file
|
||||
|
||||
Args:
|
||||
report (dict): Analysis report
|
||||
output_path (str): Path to save JSON file
|
||||
|
||||
Returns:
|
||||
str: Path to saved file
|
||||
"""
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(report, f, indent=2, cls=NumpyEncoder)
|
||||
return output_path
|
||||
|
||||
def get_summary(self, report):
|
||||
"""
|
||||
Get executive summary from report
|
||||
|
||||
Args:
|
||||
report (dict): Analysis report
|
||||
|
||||
Returns:
|
||||
dict: Summary metrics
|
||||
"""
|
||||
return report.get('summary', {})
|
||||
@@ -1,263 +0,0 @@
|
||||
"""
|
||||
Report Generator Module
|
||||
Generates comprehensive JSON reports
|
||||
"""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
||||
class NumpyEncoder(json.JSONEncoder):
|
||||
"""Custom JSON encoder for numpy types"""
|
||||
def default(self, obj):
|
||||
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||
return float(obj)
|
||||
elif isinstance(obj, (np.ndarray,)):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, (np.bool_,)):
|
||||
return bool(obj)
|
||||
return super(NumpyEncoder, self).default(obj)
|
||||
|
||||
class ReportGenerator:
|
||||
"""Generate comprehensive analysis reports"""
|
||||
|
||||
def __init__(self, model_results, bias_results, risk_results, df):
|
||||
self.model_results = model_results
|
||||
self.bias_results = bias_results
|
||||
self.risk_results = risk_results
|
||||
self.df = df
|
||||
|
||||
def generate_report(self):
|
||||
"""Generate comprehensive JSON report"""
|
||||
report = {
|
||||
'metadata': self._generate_metadata(),
|
||||
'summary': self._generate_summary(),
|
||||
'model_performance': self._format_model_results(),
|
||||
'bias_analysis': self._format_bias_results(),
|
||||
'risk_assessment': self._format_risk_results(),
|
||||
'key_findings': self._extract_key_findings(),
|
||||
'recommendations': self._compile_recommendations(),
|
||||
'detailed_metrics': self._compile_detailed_metrics()
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def _generate_metadata(self):
|
||||
"""Generate report metadata"""
|
||||
return {
|
||||
'report_id': f"AIGov_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'report_version': '1.0',
|
||||
'dataset_info': {
|
||||
'total_records': len(self.df),
|
||||
'total_features': len(self.df.columns),
|
||||
'columns': list(self.df.columns)
|
||||
}
|
||||
}
|
||||
|
||||
def _generate_summary(self):
|
||||
"""Generate executive summary"""
|
||||
model_metrics = self.model_results.get('metrics', {})
|
||||
|
||||
return {
|
||||
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
|
||||
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
|
||||
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
|
||||
'model_accuracy': model_metrics.get('accuracy', 0.0),
|
||||
'fairness_violations_count': len(self.bias_results.get('fairness_violations', [])),
|
||||
'passes_fairness_threshold': self.bias_results.get('fairness_assessment', {}).get('passes_fairness_threshold', False)
|
||||
}
|
||||
|
||||
def _format_model_results(self):
|
||||
"""Format model performance results"""
|
||||
return {
|
||||
'model_type': self.model_results.get('model_type', 'Unknown'),
|
||||
'metrics': self.model_results.get('metrics', {}),
|
||||
'confusion_matrix': self.model_results.get('confusion_matrix', []),
|
||||
'top_features': dict(list(self.model_results.get('feature_importance', {}).items())[:10])
|
||||
}
|
||||
|
||||
def _format_bias_results(self):
|
||||
"""Format bias analysis results"""
|
||||
return {
|
||||
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
|
||||
'fairness_metrics': self.bias_results.get('fairness_metrics', {}),
|
||||
'fairness_violations': self.bias_results.get('fairness_violations', []),
|
||||
'fairness_assessment': self.bias_results.get('fairness_assessment', {}),
|
||||
'demographic_bias_summary': self._summarize_demographic_bias()
|
||||
}
|
||||
|
||||
def _format_risk_results(self):
|
||||
"""Format risk assessment results"""
|
||||
return {
|
||||
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
|
||||
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
|
||||
'risk_categories': self.risk_results.get('risk_categories', {}),
|
||||
'privacy_risks': self._summarize_privacy_risks(),
|
||||
'ethical_risks': self._summarize_ethical_risks()
|
||||
}
|
||||
|
||||
def _summarize_demographic_bias(self):
|
||||
"""Summarize demographic bias"""
|
||||
demo_bias = self.bias_results.get('demographic_bias', {})
|
||||
summary = {}
|
||||
|
||||
for attr, data in demo_bias.items():
|
||||
summary[attr] = {
|
||||
'max_disparity': data.get('max_disparity', 0),
|
||||
'groups_analyzed': len(data.get('approval_rates', {}))
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
def _summarize_privacy_risks(self):
|
||||
"""Summarize privacy risks"""
|
||||
privacy = self.risk_results.get('privacy_risks', {})
|
||||
|
||||
return {
|
||||
'pii_count': len(privacy.get('pii_detected', [])),
|
||||
'anonymization_level': privacy.get('anonymization_level', 'UNKNOWN'),
|
||||
'exposure_risk_count': len(privacy.get('exposure_risks', [])),
|
||||
'gdpr_compliance_score': privacy.get('gdpr_compliance', {}).get('compliance_score', 0)
|
||||
}
|
||||
|
||||
def _summarize_ethical_risks(self):
|
||||
"""Summarize ethical risks"""
|
||||
ethical = self.risk_results.get('ethical_risks', {})
|
||||
|
||||
return {
|
||||
'fairness_issues_count': len(ethical.get('fairness_issues', [])),
|
||||
'transparency_score': ethical.get('transparency_score', 0),
|
||||
'bias_amplification_risk': ethical.get('bias_amplification_risk', 'UNKNOWN'),
|
||||
'social_impact': ethical.get('social_impact_assessment', {})
|
||||
}
|
||||
|
||||
def _extract_key_findings(self):
|
||||
"""Extract key findings from analysis"""
|
||||
findings = []
|
||||
|
||||
# Model performance findings
|
||||
accuracy = self.model_results.get('metrics', {}).get('accuracy', 0)
|
||||
if accuracy >= 0.8:
|
||||
findings.append(f"✓ Model achieves good accuracy ({accuracy:.2%})")
|
||||
else:
|
||||
findings.append(f"⚠ Model accuracy is below optimal ({accuracy:.2%})")
|
||||
|
||||
# Bias findings
|
||||
bias_score = self.bias_results.get('overall_bias_score', 0)
|
||||
if bias_score < 0.3:
|
||||
findings.append("✓ Low bias detected across protected attributes")
|
||||
elif bias_score < 0.5:
|
||||
findings.append("⚠ Moderate bias detected - monitoring recommended")
|
||||
else:
|
||||
findings.append("❌ High bias detected - immediate action required")
|
||||
|
||||
# Fairness violations
|
||||
violations = self.bias_results.get('fairness_violations', [])
|
||||
if violations:
|
||||
high_sev = sum(1 for v in violations if v['severity'] == 'HIGH')
|
||||
findings.append(f"❌ {len(violations)} fairness violations detected ({high_sev} high severity)")
|
||||
else:
|
||||
findings.append("✓ No fairness violations detected")
|
||||
|
||||
# Privacy findings
|
||||
privacy = self.risk_results.get('privacy_risks', {})
|
||||
pii_count = len(privacy.get('pii_detected', []))
|
||||
if pii_count > 0:
|
||||
findings.append(f"⚠ {pii_count} columns contain potential PII")
|
||||
else:
|
||||
findings.append("✓ No obvious PII detected in dataset")
|
||||
|
||||
# Risk level
|
||||
risk_level = self.risk_results.get('risk_level', 'UNKNOWN')
|
||||
findings.append(f"Overall Risk Level: {risk_level}")
|
||||
|
||||
return findings
|
||||
|
||||
def _compile_recommendations(self):
|
||||
"""Compile all recommendations"""
|
||||
recommendations = []
|
||||
|
||||
# Get recommendations from each component
|
||||
privacy_recs = self.risk_results.get('privacy_risks', {}).get('recommendations', [])
|
||||
ethical_recs = self.risk_results.get('ethical_risks', {}).get('recommendations', [])
|
||||
performance_recs = self.risk_results.get('model_performance_risks', {}).get('recommendations', [])
|
||||
compliance_recs = self.risk_results.get('compliance_risks', {}).get('recommendations', [])
|
||||
|
||||
# Prioritize recommendations
|
||||
all_recs = []
|
||||
|
||||
# High priority (from violations and high risks)
|
||||
violations = self.bias_results.get('fairness_violations', [])
|
||||
if violations:
|
||||
all_recs.append({
|
||||
'priority': 'HIGH',
|
||||
'category': 'Fairness',
|
||||
'recommendation': 'Address fairness violations in protected attributes'
|
||||
})
|
||||
|
||||
if len(privacy_recs) > 0:
|
||||
all_recs.append({
|
||||
'priority': 'HIGH',
|
||||
'category': 'Privacy',
|
||||
'recommendation': privacy_recs[0]
|
||||
})
|
||||
|
||||
# Medium priority
|
||||
for rec in ethical_recs[:2]:
|
||||
all_recs.append({
|
||||
'priority': 'MEDIUM',
|
||||
'category': 'Ethics',
|
||||
'recommendation': rec
|
||||
})
|
||||
|
||||
# Lower priority
|
||||
for rec in performance_recs[:2]:
|
||||
all_recs.append({
|
||||
'priority': 'MEDIUM',
|
||||
'category': 'Performance',
|
||||
'recommendation': rec
|
||||
})
|
||||
|
||||
for rec in compliance_recs[:2]:
|
||||
all_recs.append({
|
||||
'priority': 'MEDIUM',
|
||||
'category': 'Compliance',
|
||||
'recommendation': rec
|
||||
})
|
||||
|
||||
# Convert to simple list with formatting
|
||||
recommendations = [
|
||||
f"[{r['priority']}] {r['category']}: {r['recommendation']}"
|
||||
for r in all_recs[:10] # Limit to top 10
|
||||
]
|
||||
|
||||
return recommendations
|
||||
|
||||
def _compile_detailed_metrics(self):
|
||||
"""Compile detailed metrics for analysis"""
|
||||
return {
|
||||
'bias_metrics': {
|
||||
'by_attribute': self.bias_results.get('fairness_metrics', {}),
|
||||
'demographic_analysis': self.bias_results.get('demographic_bias', {})
|
||||
},
|
||||
'risk_breakdown': {
|
||||
'privacy': self.risk_results.get('privacy_risks', {}),
|
||||
'ethical': self.risk_results.get('ethical_risks', {}),
|
||||
'compliance': self.risk_results.get('compliance_risks', {}),
|
||||
'data_quality': self.risk_results.get('data_quality_risks', {})
|
||||
},
|
||||
'model_details': {
|
||||
'classification_report': self.model_results.get('classification_report', {}),
|
||||
'feature_importance': self.model_results.get('feature_importance', {})
|
||||
}
|
||||
}
|
||||
|
||||
def save_report(self, filepath):
|
||||
"""Save report to JSON file"""
|
||||
report = self.generate_report()
|
||||
with open(filepath, 'w') as f:
|
||||
json.dump(report, f, indent=2, cls=NumpyEncoder)
|
||||
return filepath
|
||||
21
start_api.py
Normal file
21
start_api.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Start the FastAPI server
|
||||
Run: python start_api.py
|
||||
"""
|
||||
|
||||
import uvicorn
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("🚀 Starting Nordic Privacy AI API Server...")
|
||||
print("📍 API will be available at: http://localhost:8000")
|
||||
print("📖 Interactive docs at: http://localhost:8000/docs")
|
||||
print("🔗 Frontend should run at: http://localhost:3000")
|
||||
print("\nPress CTRL+C to stop\n")
|
||||
|
||||
uvicorn.run(
|
||||
"api.main:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=True, # Auto-reload on code changes
|
||||
log_level="info"
|
||||
)
|
||||
@@ -10,7 +10,7 @@ import os
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from cleaning import DataCleaner, CleaningConfig
|
||||
from data_cleaning import DataCleaner
|
||||
|
||||
|
||||
def test_basic_cleaning():
|
||||
|
||||
Reference in New Issue
Block a user