mirror of
https://github.com/PlatypusPus/MushroomEmpire.git
synced 2026-02-07 22:18:59 +00:00
merge
This commit is contained in:
19
.gitignore
vendored
19
.gitignore
vendored
@@ -11,7 +11,7 @@ dist/
|
|||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
lib/
|
|
||||||
lib64/
|
lib64/
|
||||||
parts/
|
parts/
|
||||||
sdist/
|
sdist/
|
||||||
@@ -48,9 +48,11 @@ Thumbs.db
|
|||||||
# Streamlit
|
# Streamlit
|
||||||
.streamlit/secrets.toml
|
.streamlit/secrets.toml
|
||||||
|
|
||||||
# Reports
|
# Reports and generated files
|
||||||
reports/*.json
|
reports/*.json
|
||||||
reports/*.pdf
|
reports/*.pdf
|
||||||
|
reports/*.csv
|
||||||
|
reports/*.html
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
@@ -59,4 +61,17 @@ reports/*.pdf
|
|||||||
*.csv
|
*.csv
|
||||||
!Datasets/loan_data.csv
|
!Datasets/loan_data.csv
|
||||||
|
|
||||||
|
# Node.js & Next.js
|
||||||
|
node_modules/
|
||||||
|
.next/
|
||||||
|
out/
|
||||||
|
.vercel
|
||||||
|
*.tsbuildinfo
|
||||||
|
next-env.d.ts
|
||||||
|
|
||||||
|
# Frontend build artifacts
|
||||||
|
frontend/nordic-privacy-ai/.next/
|
||||||
|
frontend/nordic-privacy-ai/out/
|
||||||
|
frontend/nordic-privacy-ai/node_modules/
|
||||||
|
|
||||||
Data
|
Data
|
||||||
@@ -21,6 +21,7 @@ class DataProcessor:
|
|||||||
self.categorical_features = []
|
self.categorical_features = []
|
||||||
self.feature_names = []
|
self.feature_names = []
|
||||||
self.encoders = {}
|
self.encoders = {}
|
||||||
|
self.target_encoder = None # Add target encoder
|
||||||
self.scaler = StandardScaler()
|
self.scaler = StandardScaler()
|
||||||
|
|
||||||
self.X_train = None
|
self.X_train = None
|
||||||
@@ -75,6 +76,13 @@ class DataProcessor:
|
|||||||
X = self.df[feature_cols].copy()
|
X = self.df[feature_cols].copy()
|
||||||
y = self.df[self.target_column].copy()
|
y = self.df[self.target_column].copy()
|
||||||
|
|
||||||
|
# Encode target variable if it's categorical
|
||||||
|
if y.dtype == 'object' or y.dtype.name == 'category':
|
||||||
|
self.target_encoder = LabelEncoder()
|
||||||
|
y_encoded = self.target_encoder.fit_transform(y)
|
||||||
|
y = pd.Series(y_encoded, index=y.index)
|
||||||
|
print(f"Target '{self.target_column}' encoded: {dict(enumerate(self.target_encoder.classes_))}")
|
||||||
|
|
||||||
# Encode categorical variables
|
# Encode categorical variables
|
||||||
for col in self.categorical_features:
|
for col in self.categorical_features:
|
||||||
if col in X.columns:
|
if col in X.columns:
|
||||||
|
|||||||
1
api/__init__.py
Normal file
1
api/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Empty __init__.py files for Python package structure
|
||||||
72
api/main.py
Normal file
72
api/main.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""
|
||||||
|
FastAPI Backend for Nordic Privacy AI
|
||||||
|
Provides endpoints for AI Governance analysis and data cleaning
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
import os
|
||||||
|
|
||||||
|
from api.routers import analyze, clean
|
||||||
|
|
||||||
|
# Create FastAPI app
|
||||||
|
app = FastAPI(
|
||||||
|
title="Nordic Privacy AI API",
|
||||||
|
description="AI-powered GDPR compliance, bias detection, and risk analysis",
|
||||||
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# CORS configuration for Next.js frontend
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=[
|
||||||
|
"http://localhost:3000", # Next.js dev server
|
||||||
|
"http://127.0.0.1:3000",
|
||||||
|
],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mount reports directory for file downloads
|
||||||
|
reports_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "reports")
|
||||||
|
os.makedirs(reports_dir, exist_ok=True)
|
||||||
|
app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
|
||||||
|
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return {
|
||||||
|
"status": "online",
|
||||||
|
"service": "Nordic Privacy AI API",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"endpoints": {
|
||||||
|
"analyze": "/api/analyze",
|
||||||
|
"clean": "/api/clean",
|
||||||
|
"docs": "/docs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""Detailed health check"""
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
cuda_available = torch.cuda.is_available()
|
||||||
|
gpu_name = torch.cuda.get_device_name(0) if cuda_available else None
|
||||||
|
except:
|
||||||
|
cuda_available = False
|
||||||
|
gpu_name = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"gpu_acceleration": {
|
||||||
|
"available": cuda_available,
|
||||||
|
"device": gpu_name or "CPU"
|
||||||
|
}
|
||||||
|
}
|
||||||
1
api/routers/__init__.py
Normal file
1
api/routers/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Empty __init__.py files for Python package structure
|
||||||
141
api/routers/analyze.py
Normal file
141
api/routers/analyze.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
"""
|
||||||
|
AI Governance Analysis Router
|
||||||
|
Handles bias detection and risk analysis endpoints
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
# Import AI Governance modules
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||||
|
from ai_governance import AIGovernanceAnalyzer
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_serializable(obj):
|
||||||
|
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||||
|
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||||
|
return int(obj)
|
||||||
|
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||||
|
return float(obj)
|
||||||
|
elif isinstance(obj, np.ndarray):
|
||||||
|
return obj.tolist()
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [convert_to_serializable(item) for item in obj]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
@router.post("/analyze")
|
||||||
|
async def analyze_dataset(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Analyze uploaded dataset for bias and risk
|
||||||
|
|
||||||
|
- **file**: CSV file to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- Analysis results (bias metrics, risk assessment)
|
||||||
|
- Report file path for download
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Validate file type
|
||||||
|
if not file.filename.endswith('.csv'):
|
||||||
|
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read uploaded file
|
||||||
|
contents = await file.read()
|
||||||
|
df = pd.read_csv(io.BytesIO(contents))
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||||
|
|
||||||
|
# Initialize AI Governance Analyzer
|
||||||
|
analyzer = AIGovernanceAnalyzer()
|
||||||
|
|
||||||
|
# Auto-detect target column and protected attributes
|
||||||
|
# Target: Last column (common convention) or first binary/categorical column
|
||||||
|
target_column = df.columns[-1]
|
||||||
|
|
||||||
|
# Protected attributes: Common sensitive columns
|
||||||
|
protected_keywords = ['gender', 'age', 'race', 'sex', 'ethnicity', 'religion', 'nationality']
|
||||||
|
protected_attributes = [col for col in df.columns
|
||||||
|
if any(keyword in col.lower() for keyword in protected_keywords)]
|
||||||
|
|
||||||
|
# If no protected attributes found, use first few categorical columns
|
||||||
|
if not protected_attributes:
|
||||||
|
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
||||||
|
protected_attributes = [col for col in categorical_cols if col != target_column][:3]
|
||||||
|
|
||||||
|
print(f"Analyzing dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
||||||
|
print(f"Target column: {target_column}")
|
||||||
|
print(f"Protected attributes: {protected_attributes}")
|
||||||
|
|
||||||
|
# Run analysis
|
||||||
|
report = analyzer.analyze_dataframe(df, target_column, protected_attributes)
|
||||||
|
|
||||||
|
# Generate report filename
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
safe_filename = (file.filename or "dataset").replace('.csv', '')
|
||||||
|
report_filename = f"governance_report_{safe_filename}_{timestamp}.json"
|
||||||
|
report_path = os.path.join("reports", report_filename)
|
||||||
|
|
||||||
|
# Save full report to disk
|
||||||
|
full_report_path = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||||
|
report_path
|
||||||
|
)
|
||||||
|
analyzer.save_report(report, full_report_path)
|
||||||
|
|
||||||
|
# Prepare response with summary
|
||||||
|
response_data = {
|
||||||
|
"status": "success",
|
||||||
|
"filename": file.filename,
|
||||||
|
"dataset_info": {
|
||||||
|
"rows": len(df),
|
||||||
|
"columns": len(df.columns),
|
||||||
|
"features": list(df.columns)
|
||||||
|
},
|
||||||
|
"model_performance": {
|
||||||
|
"accuracy": report.get("model_metrics", {}).get("accuracy", 0),
|
||||||
|
"precision": report.get("model_metrics", {}).get("precision", 0),
|
||||||
|
"recall": report.get("model_metrics", {}).get("recall", 0),
|
||||||
|
"f1_score": report.get("model_metrics", {}).get("f1_score", 0)
|
||||||
|
},
|
||||||
|
"bias_metrics": {
|
||||||
|
"overall_bias_score": report.get("bias_metrics", {}).get("overall_bias_score", 0),
|
||||||
|
"disparate_impact": report.get("bias_metrics", {}).get("disparate_impact", {}),
|
||||||
|
"statistical_parity": report.get("bias_metrics", {}).get("statistical_parity_difference", {}),
|
||||||
|
"violations_detected": report.get("bias_metrics", {}).get("fairness_violations", [])
|
||||||
|
},
|
||||||
|
"risk_assessment": {
|
||||||
|
"overall_risk_score": report.get("risk_metrics", {}).get("overall_risk_score", 0),
|
||||||
|
"privacy_risks": report.get("risk_metrics", {}).get("privacy_risks", []),
|
||||||
|
"ethical_risks": report.get("risk_metrics", {}).get("ethical_risks", []),
|
||||||
|
"compliance_risks": report.get("risk_metrics", {}).get("compliance_risks", []),
|
||||||
|
"data_quality_risks": report.get("risk_metrics", {}).get("data_quality_risks", [])
|
||||||
|
},
|
||||||
|
"recommendations": report.get("recommendations", []),
|
||||||
|
"report_file": f"/{report_path}",
|
||||||
|
"timestamp": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert all numpy/pandas types to native Python types
|
||||||
|
response_data = convert_to_serializable(response_data)
|
||||||
|
|
||||||
|
return JSONResponse(content=response_data)
|
||||||
|
|
||||||
|
except pd.errors.EmptyDataError:
|
||||||
|
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during analysis: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
||||||
142
api/routers/clean.py
Normal file
142
api/routers/clean.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
"""
|
||||||
|
Data Cleaning Router
|
||||||
|
Handles PII detection and anonymization endpoints
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
# Import cleaning module
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||||
|
from data_cleaning import DataCleaner
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_serializable(obj):
|
||||||
|
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||||
|
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||||
|
return int(obj)
|
||||||
|
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||||
|
return float(obj)
|
||||||
|
elif isinstance(obj, np.ndarray):
|
||||||
|
return obj.tolist()
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [convert_to_serializable(item) for item in obj]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/clean")
|
||||||
|
async def clean_dataset(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Clean uploaded dataset - detect and anonymize PII
|
||||||
|
|
||||||
|
- **file**: CSV file to clean
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- Cleaned dataset statistics
|
||||||
|
- PII detections and anonymization actions
|
||||||
|
- Report file path for download
|
||||||
|
- Cleaned CSV file path for download
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Validate file type
|
||||||
|
if not file.filename.endswith('.csv'):
|
||||||
|
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read uploaded file
|
||||||
|
contents = await file.read()
|
||||||
|
df = pd.read_csv(io.BytesIO(contents))
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||||
|
|
||||||
|
# Initialize Data Cleaner (with GPU if available)
|
||||||
|
print(f"Cleaning dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
||||||
|
cleaner = DataCleaner(df, use_gpu=True)
|
||||||
|
|
||||||
|
# Run cleaning (non-interactive mode for API)
|
||||||
|
cleaned_df, audit_report = cleaner.clean(
|
||||||
|
risky_features=None, # Auto-detect
|
||||||
|
interactive=False, # No user prompts in API mode
|
||||||
|
scan_all_cells=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate filenames
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
safe_filename = file.filename.replace('.csv', '')
|
||||||
|
|
||||||
|
# Save cleaned CSV
|
||||||
|
cleaned_csv_filename = f"cleaned_{safe_filename}_{timestamp}.csv"
|
||||||
|
cleaned_csv_path = os.path.join("reports", cleaned_csv_filename)
|
||||||
|
full_cleaned_csv_path = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||||
|
cleaned_csv_path
|
||||||
|
)
|
||||||
|
cleaner.save_cleaned_data(cleaned_df, full_cleaned_csv_path)
|
||||||
|
|
||||||
|
# Save audit report
|
||||||
|
audit_report_filename = f"cleaning_audit_{safe_filename}_{timestamp}.json"
|
||||||
|
audit_report_path = os.path.join("reports", audit_report_filename)
|
||||||
|
full_audit_report_path = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||||
|
audit_report_path
|
||||||
|
)
|
||||||
|
cleaner.save_audit_report(audit_report, full_audit_report_path)
|
||||||
|
|
||||||
|
# Prepare response
|
||||||
|
response_data = {
|
||||||
|
"status": "success",
|
||||||
|
"filename": file.filename,
|
||||||
|
"dataset_info": {
|
||||||
|
"original_rows": int(audit_report["metadata"]["original_rows"]),
|
||||||
|
"original_columns": int(audit_report["metadata"]["original_columns"]),
|
||||||
|
"cleaned_rows": int(audit_report["metadata"]["cleaned_rows"]),
|
||||||
|
"cleaned_columns": int(audit_report["metadata"]["cleaned_columns"])
|
||||||
|
},
|
||||||
|
"gpu_acceleration": audit_report["metadata"].get("gpu_acceleration", {
|
||||||
|
"enabled": False,
|
||||||
|
"device": "CPU"
|
||||||
|
}),
|
||||||
|
"summary": {
|
||||||
|
"columns_removed": audit_report["summary"]["columns_removed"],
|
||||||
|
"columns_anonymized": audit_report["summary"]["columns_anonymized"],
|
||||||
|
"total_cells_affected": int(audit_report["summary"]["total_cells_affected"])
|
||||||
|
},
|
||||||
|
"pii_detections": {
|
||||||
|
col: {
|
||||||
|
"action": details["action"],
|
||||||
|
"entity_types": details["entity_types_found"],
|
||||||
|
"num_affected_rows": int(details.get("num_affected_rows", 0)),
|
||||||
|
"examples": details.get("examples", [])[:2] # Show 2 examples
|
||||||
|
}
|
||||||
|
for col, details in audit_report["details"].items()
|
||||||
|
},
|
||||||
|
"gdpr_compliance": audit_report["compliance"]["gdpr_articles_applied"],
|
||||||
|
"files": {
|
||||||
|
"cleaned_csv": f"/{cleaned_csv_path}",
|
||||||
|
"audit_report": f"/{audit_report_path}"
|
||||||
|
},
|
||||||
|
"timestamp": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert all numpy/pandas types to native Python types
|
||||||
|
response_data = convert_to_serializable(response_data)
|
||||||
|
|
||||||
|
return JSONResponse(content=response_data)
|
||||||
|
|
||||||
|
except pd.errors.EmptyDataError:
|
||||||
|
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during cleaning: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Cleaning failed: {str(e)}")
|
||||||
1
api/utils/__init__.py
Normal file
1
api/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Empty __init__.py files for Python package structure
|
||||||
12
data_cleaning/__init__.py
Normal file
12
data_cleaning/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
"""
|
||||||
|
Data Cleaning Module
|
||||||
|
Automated PII detection and GDPR-compliant anonymization
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .cleaner import DataCleaner
|
||||||
|
|
||||||
|
__version__ = '1.0.0'
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'DataCleaner'
|
||||||
|
]
|
||||||
@@ -14,7 +14,7 @@ export default function TryPage() {
|
|||||||
<div className="flex flex-1 min-h-0">
|
<div className="flex flex-1 min-h-0">
|
||||||
<Sidebar value={tab} onChange={setTab} />
|
<Sidebar value={tab} onChange={setTab} />
|
||||||
<div className="flex-1 min-h-0 flex">
|
<div className="flex-1 min-h-0 flex">
|
||||||
<div className="flex-1 min-h-0"><CenterPanel tab={tab} /></div>
|
<div className="flex-1 min-h-0 min-w-0"><CenterPanel tab={tab} onAnalyze={() => setTab("bias-analysis")} /></div>
|
||||||
<div className="w-[360px] hidden xl:block"><ChatbotPanel /></div>
|
<div className="w-[360px] hidden xl:block"><ChatbotPanel /></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
620
frontend/components/try/CenterPanel.tsx
Normal file
620
frontend/components/try/CenterPanel.tsx
Normal file
@@ -0,0 +1,620 @@
|
|||||||
|
"use client";
|
||||||
|
import { TryTab } from "./Sidebar";
|
||||||
|
import { useState, useRef, useCallback, useEffect } from "react";
|
||||||
|
import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb";
|
||||||
|
import { analyzeDataset, cleanDataset, getReportUrl, type AnalyzeResponse, type CleanResponse } from "../../lib/api";
|
||||||
|
|
||||||
|
interface CenterPanelProps {
|
||||||
|
tab: TryTab;
|
||||||
|
onAnalyze?: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface UploadedFileMeta {
|
||||||
|
name: string;
|
||||||
|
size: number;
|
||||||
|
type: string;
|
||||||
|
contentPreview: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TablePreviewData {
|
||||||
|
headers: string[];
|
||||||
|
rows: string[][];
|
||||||
|
origin: 'csv';
|
||||||
|
}
|
||||||
|
|
||||||
|
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
||||||
|
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
|
||||||
|
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
|
||||||
|
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
||||||
|
const [isDragging, setIsDragging] = useState(false);
|
||||||
|
const [progress, setProgress] = useState<number>(0);
|
||||||
|
const [progressLabel, setProgressLabel] = useState<string>("Processing");
|
||||||
|
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
|
||||||
|
const inputRef = useRef<HTMLInputElement | null>(null);
|
||||||
|
const [loadedFromCache, setLoadedFromCache] = useState(false);
|
||||||
|
const [isProcessing, setIsProcessing] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
// Analysis results
|
||||||
|
const [analyzeResult, setAnalyzeResult] = useState<AnalyzeResponse | null>(null);
|
||||||
|
const [cleanResult, setCleanResult] = useState<CleanResponse | null>(null);
|
||||||
|
|
||||||
|
const reset = () => {
|
||||||
|
setFileMeta(null);
|
||||||
|
setUploadedFile(null);
|
||||||
|
setProgress(0);
|
||||||
|
setProgressLabel("Processing");
|
||||||
|
setTablePreview(null);
|
||||||
|
setError(null);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Handle API calls
|
||||||
|
const handleAnalyze = async () => {
|
||||||
|
if (!uploadedFile) {
|
||||||
|
setError("No file uploaded");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsProcessing(true);
|
||||||
|
setError(null);
|
||||||
|
setProgressLabel("Analyzing dataset...");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await analyzeDataset(uploadedFile);
|
||||||
|
setAnalyzeResult(result);
|
||||||
|
setProgressLabel("Analysis complete!");
|
||||||
|
onAnalyze?.(); // Navigate to bias-analysis tab
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(err.message || "Analysis failed");
|
||||||
|
} finally {
|
||||||
|
setIsProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleClean = async () => {
|
||||||
|
if (!uploadedFile) {
|
||||||
|
setError("No file uploaded");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsProcessing(true);
|
||||||
|
setError(null);
|
||||||
|
setProgressLabel("Cleaning dataset...");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await cleanDataset(uploadedFile);
|
||||||
|
setCleanResult(result);
|
||||||
|
setProgressLabel("Cleaning complete!");
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(err.message || "Cleaning failed");
|
||||||
|
} finally {
|
||||||
|
setIsProcessing(false);
|
||||||
|
}
|
||||||
|
}; function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
|
||||||
|
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
|
||||||
|
if (lines.length < 2) return null;
|
||||||
|
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
|
||||||
|
if (commaDensity < 2) return null;
|
||||||
|
const parseLine = (line: string) => {
|
||||||
|
const out: string[] = [];
|
||||||
|
let cur = '';
|
||||||
|
let inQuotes = false;
|
||||||
|
for (let i = 0; i < line.length; i++) {
|
||||||
|
const ch = line[i];
|
||||||
|
if (ch === '"') {
|
||||||
|
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
|
||||||
|
} else if (ch === ',' && !inQuotes) {
|
||||||
|
out.push(cur);
|
||||||
|
cur = '';
|
||||||
|
} else { cur += ch; }
|
||||||
|
}
|
||||||
|
out.push(cur);
|
||||||
|
return out.map(c => c.trim());
|
||||||
|
};
|
||||||
|
const raw = lines.slice(0, maxRows).map(parseLine);
|
||||||
|
if (raw.length === 0) return null;
|
||||||
|
const headers = raw[0];
|
||||||
|
const colCount = Math.min(headers.length, maxCols);
|
||||||
|
const rows = raw.slice(1).map(r => r.slice(0, colCount));
|
||||||
|
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// We no longer build table preview for JSON; revert JSON to raw text view.
|
||||||
|
|
||||||
|
const processFile = useCallback(async (f: File) => {
|
||||||
|
if (!f) return;
|
||||||
|
const isCSV = /\.csv$/i.test(f.name);
|
||||||
|
setProgress(0);
|
||||||
|
setUploadedFile(f); // Save the file for API calls
|
||||||
|
|
||||||
|
// For large files, show a progress bar while reading the file stream (no preview)
|
||||||
|
if (f.size > 1024 * 1024) {
|
||||||
|
setProgressLabel("Uploading");
|
||||||
|
const metaObj: UploadedFileMeta = {
|
||||||
|
name: f.name,
|
||||||
|
size: f.size,
|
||||||
|
type: f.type || "unknown",
|
||||||
|
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
|
||||||
|
};
|
||||||
|
setFileMeta(metaObj);
|
||||||
|
setTablePreview(null);
|
||||||
|
// Save to IndexedDB immediately so it persists without needing full read
|
||||||
|
(async () => {
|
||||||
|
try { await saveLatestUpload(f, metaObj); } catch {}
|
||||||
|
})();
|
||||||
|
// Read head slice for partial preview & possible CSV table extraction
|
||||||
|
try {
|
||||||
|
const headBlob = f.slice(0, PREVIEW_BYTES);
|
||||||
|
const headReader = new FileReader();
|
||||||
|
headReader.onload = async () => {
|
||||||
|
try {
|
||||||
|
const buf = headReader.result as ArrayBuffer;
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
const text = decoder.decode(buf);
|
||||||
|
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
|
||||||
|
if (isCSV) {
|
||||||
|
const parsed = tryParseCSV(text);
|
||||||
|
setTablePreview(parsed);
|
||||||
|
} else {
|
||||||
|
setTablePreview(null);
|
||||||
|
}
|
||||||
|
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
};
|
||||||
|
headReader.readAsArrayBuffer(headBlob);
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
// Use streaming read for progress without buffering entire file in memory
|
||||||
|
try {
|
||||||
|
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
|
||||||
|
if (stream && typeof stream.getReader === "function") {
|
||||||
|
const reader = stream.getReader();
|
||||||
|
let loaded = 0;
|
||||||
|
const total = f.size || 1;
|
||||||
|
for (;;) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
loaded += value ? value.length : 0;
|
||||||
|
const pct = Math.min(100, Math.round((loaded / total) * 100));
|
||||||
|
setProgress(pct);
|
||||||
|
}
|
||||||
|
setProgress(100);
|
||||||
|
} else {
|
||||||
|
// Fallback to FileReader progress events
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onprogress = (evt) => {
|
||||||
|
if (evt.lengthComputable) {
|
||||||
|
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
|
||||||
|
setProgress(pct);
|
||||||
|
} else {
|
||||||
|
setProgress((p) => (p < 90 ? p + 5 : p));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
reader.onloadend = () => setProgress(100);
|
||||||
|
reader.onerror = () => setProgress(0);
|
||||||
|
reader.readAsArrayBuffer(f);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
setProgress(100);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onprogress = (evt) => {
|
||||||
|
if (evt.lengthComputable) {
|
||||||
|
const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100));
|
||||||
|
setProgress(pct);
|
||||||
|
} else {
|
||||||
|
setProgress((p) => (p < 90 ? p + 5 : p));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
reader.onload = async () => {
|
||||||
|
try {
|
||||||
|
const buf = reader.result as ArrayBuffer;
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
const text = decoder.decode(buf);
|
||||||
|
const metaObj: UploadedFileMeta = {
|
||||||
|
name: f.name,
|
||||||
|
size: f.size,
|
||||||
|
type: f.type || "unknown",
|
||||||
|
contentPreview: text.slice(0, 4000),
|
||||||
|
};
|
||||||
|
setFileMeta(metaObj);
|
||||||
|
if (isCSV) {
|
||||||
|
const parsed = tryParseCSV(text);
|
||||||
|
setTablePreview(parsed);
|
||||||
|
} else {
|
||||||
|
setTablePreview(null);
|
||||||
|
}
|
||||||
|
// Save file blob and meta to browser cache (IndexedDB)
|
||||||
|
try {
|
||||||
|
await saveLatestUpload(f, metaObj);
|
||||||
|
} catch {}
|
||||||
|
setProgressLabel("Processing");
|
||||||
|
setProgress(100);
|
||||||
|
} catch (e) {
|
||||||
|
const metaObj: UploadedFileMeta = {
|
||||||
|
name: f.name,
|
||||||
|
size: f.size,
|
||||||
|
type: f.type || "unknown",
|
||||||
|
contentPreview: "Unable to decode preview.",
|
||||||
|
};
|
||||||
|
setFileMeta(metaObj);
|
||||||
|
setTablePreview(null);
|
||||||
|
try {
|
||||||
|
await saveLatestUpload(f, metaObj);
|
||||||
|
} catch {}
|
||||||
|
setProgressLabel("Processing");
|
||||||
|
setProgress(100);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
reader.onerror = () => {
|
||||||
|
setProgress(0);
|
||||||
|
};
|
||||||
|
reader.readAsArrayBuffer(f);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
function handleFileChange(e: React.ChangeEvent<HTMLInputElement>) {
|
||||||
|
const f = e.target.files?.[0];
|
||||||
|
processFile(f as File);
|
||||||
|
}
|
||||||
|
|
||||||
|
const onDragOver = (e: React.DragEvent<HTMLDivElement>) => {
|
||||||
|
e.preventDefault();
|
||||||
|
setIsDragging(true);
|
||||||
|
};
|
||||||
|
const onDragLeave = () => setIsDragging(false);
|
||||||
|
const onDrop = (e: React.DragEvent<HTMLDivElement>) => {
|
||||||
|
e.preventDefault();
|
||||||
|
setIsDragging(false);
|
||||||
|
const f = e.dataTransfer.files?.[0];
|
||||||
|
processFile(f as File);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Load last cached upload on mount (processing tab only)
|
||||||
|
useEffect(() => {
|
||||||
|
let ignore = false;
|
||||||
|
if (tab !== "processing") return;
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const { file, meta } = await getLatestUpload();
|
||||||
|
if (!ignore && meta) {
|
||||||
|
setFileMeta(meta as UploadedFileMeta);
|
||||||
|
if (file) {
|
||||||
|
setUploadedFile(file);
|
||||||
|
}
|
||||||
|
setLoadedFromCache(true);
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
})();
|
||||||
|
return () => {
|
||||||
|
ignore = true;
|
||||||
|
};
|
||||||
|
}, [tab]); function renderTabContent() {
|
||||||
|
switch (tab) {
|
||||||
|
case "processing":
|
||||||
|
return (
|
||||||
|
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
|
||||||
|
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
|
||||||
|
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
|
||||||
|
<div className="flex flex-col gap-3 min-w-0">
|
||||||
|
<div
|
||||||
|
onDragOver={onDragOver}
|
||||||
|
onDragLeave={onDragLeave}
|
||||||
|
onDrop={onDrop}
|
||||||
|
className={
|
||||||
|
"rounded-lg border-2 border-dashed p-6 text-center transition-colors " +
|
||||||
|
(isDragging ? "border-brand-600 bg-brand-50" : "border-slate-300 hover:border-brand-300")
|
||||||
|
}
|
||||||
|
>
|
||||||
|
<p className="text-sm text-slate-600">Drag & drop a CSV / JSON / TXT here, or click to browse.</p>
|
||||||
|
<div className="mt-3">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => inputRef.current?.click()}
|
||||||
|
className="inline-flex items-center rounded-md bg-brand-600 px-4 py-2 text-white text-sm font-medium shadow hover:bg-brand-500"
|
||||||
|
>
|
||||||
|
Choose file
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<input
|
||||||
|
ref={inputRef}
|
||||||
|
type="file"
|
||||||
|
accept=".csv,.json,.txt"
|
||||||
|
onChange={handleFileChange}
|
||||||
|
className="hidden"
|
||||||
|
aria-hidden
|
||||||
|
/>
|
||||||
|
{progress > 0 && (
|
||||||
|
<div className="w-full">
|
||||||
|
<div className="h-2 w-full rounded-full bg-slate-200 overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-2 bg-brand-600 transition-all"
|
||||||
|
style={{ width: `${progress}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 text-xs text-slate-500">{progressLabel} {progress}%</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{fileMeta && (
|
||||||
|
<div className="rounded-md border border-slate-200 p-4 bg-white shadow-sm">
|
||||||
|
<div className="flex items-center justify-between mb-2">
|
||||||
|
<div className="text-sm font-medium">{fileMeta.name}</div>
|
||||||
|
<div className="text-xs text-slate-500">{Math.round(fileMeta.size / 1024)} KB</div>
|
||||||
|
</div>
|
||||||
|
{loadedFromCache && (
|
||||||
|
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
|
||||||
|
)}
|
||||||
|
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
|
||||||
|
{/* Table preview when structured data detected; otherwise show text */}
|
||||||
|
{tablePreview && tablePreview.origin === 'csv' ? (
|
||||||
|
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
|
||||||
|
<table className="min-w-full text-xs">
|
||||||
|
<thead className="sticky top-0 bg-slate-100">
|
||||||
|
<tr>
|
||||||
|
{tablePreview.headers.map((h, idx) => (
|
||||||
|
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
|
||||||
|
))}
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{tablePreview.rows.map((r, i) => (
|
||||||
|
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
|
||||||
|
{r.map((c, j) => (
|
||||||
|
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
|
||||||
|
))}
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
||||||
|
{fileMeta.contentPreview || "(no preview)"}
|
||||||
|
</pre>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="mt-3 p-3 bg-red-50 border border-red-200 rounded-md text-sm text-red-700">
|
||||||
|
❌ {error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{analyzeResult && (
|
||||||
|
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
||||||
|
✅ Analysis complete! View results in tabs.
|
||||||
|
<a
|
||||||
|
href={getReportUrl(analyzeResult.report_file)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="ml-2 underline"
|
||||||
|
>
|
||||||
|
Download Report
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{cleanResult && (
|
||||||
|
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
||||||
|
✅ Cleaning complete! {cleanResult.summary.total_cells_affected} cells anonymized.
|
||||||
|
<div className="mt-2 flex gap-2">
|
||||||
|
<a
|
||||||
|
href={getReportUrl(cleanResult.files.cleaned_csv)}
|
||||||
|
download
|
||||||
|
className="underline"
|
||||||
|
>
|
||||||
|
Download Cleaned CSV
|
||||||
|
</a>
|
||||||
|
<a
|
||||||
|
href={getReportUrl(cleanResult.files.audit_report)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="underline"
|
||||||
|
>
|
||||||
|
View Audit Report
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="mt-3 flex justify-end gap-2">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={async () => {
|
||||||
|
reset();
|
||||||
|
try { await deleteLatestUpload(); } catch {}
|
||||||
|
setLoadedFromCache(false);
|
||||||
|
setAnalyzeResult(null);
|
||||||
|
setCleanResult(null);
|
||||||
|
}}
|
||||||
|
className="text-xs rounded-md border px-3 py-1.5 hover:bg-slate-50"
|
||||||
|
>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={handleClean}
|
||||||
|
disabled={isProcessing}
|
||||||
|
className="text-xs rounded-md bg-green-600 text-white px-3 py-1.5 hover:bg-green-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
|
>
|
||||||
|
{isProcessing ? "Processing..." : "Clean (PII)"}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={handleAnalyze}
|
||||||
|
disabled={isProcessing}
|
||||||
|
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
|
>
|
||||||
|
{isProcessing ? "Processing..." : "Analyze"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
case "bias-analysis":
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h2 className="text-xl font-semibold">Bias Analysis</h2>
|
||||||
|
{analyzeResult ? (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<div className="text-sm text-slate-600">Overall Bias Score</div>
|
||||||
|
<div className="text-2xl font-bold">{(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<div className="text-sm text-slate-600">Violations Detected</div>
|
||||||
|
<div className="text-2xl font-bold">{analyzeResult.bias_metrics.violations_detected.length}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<h3 className="font-semibold mb-2">Model Performance</h3>
|
||||||
|
<div className="grid grid-cols-4 gap-2 text-sm">
|
||||||
|
<div>
|
||||||
|
<div className="text-slate-600">Accuracy</div>
|
||||||
|
<div className="font-medium">{(analyzeResult.model_performance.accuracy * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-slate-600">Precision</div>
|
||||||
|
<div className="font-medium">{(analyzeResult.model_performance.precision * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-slate-600">Recall</div>
|
||||||
|
<div className="font-medium">{(analyzeResult.model_performance.recall * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-slate-600">F1 Score</div>
|
||||||
|
<div className="font-medium">{(analyzeResult.model_performance.f1_score * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-slate-600">Upload and analyze a dataset to see bias metrics.</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
case "risk-analysis":
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h2 className="text-xl font-semibold">Risk Analysis</h2>
|
||||||
|
{analyzeResult ? (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<div className="text-sm text-slate-600">Overall Risk Score</div>
|
||||||
|
<div className="text-2xl font-bold">{(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{cleanResult && (
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<h3 className="font-semibold mb-2">PII Detection Results</h3>
|
||||||
|
<div className="text-sm space-y-1">
|
||||||
|
<div>Cells Anonymized: <span className="font-medium">{cleanResult.summary.total_cells_affected}</span></div>
|
||||||
|
<div>Columns Removed: <span className="font-medium">{cleanResult.summary.columns_removed.length}</span></div>
|
||||||
|
<div>Columns Anonymized: <span className="font-medium">{cleanResult.summary.columns_anonymized.length}</span></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-slate-600">Upload and analyze a dataset to see risk assessment.</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
case "bias-risk-mitigation":
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h2 className="text-xl font-semibold">Mitigation Suggestions</h2>
|
||||||
|
{analyzeResult && analyzeResult.recommendations.length > 0 ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{analyzeResult.recommendations.map((rec, i) => (
|
||||||
|
<div key={i} className="p-3 bg-blue-50 border border-blue-200 rounded-md text-sm">
|
||||||
|
{rec}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-slate-600">
|
||||||
|
Recommendations will appear here after analysis.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
case "results":
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h2 className="text-xl font-semibold">Results Summary</h2>
|
||||||
|
{(analyzeResult || cleanResult) ? (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{analyzeResult && (
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<h3 className="font-semibold mb-2">Analysis Results</h3>
|
||||||
|
<div className="text-sm space-y-1">
|
||||||
|
<div>Dataset: {analyzeResult.filename}</div>
|
||||||
|
<div>Rows: {analyzeResult.dataset_info.rows}</div>
|
||||||
|
<div>Columns: {analyzeResult.dataset_info.columns}</div>
|
||||||
|
<div>Bias Score: {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%</div>
|
||||||
|
<div>Risk Score: {(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%</div>
|
||||||
|
</div>
|
||||||
|
<a
|
||||||
|
href={getReportUrl(analyzeResult.report_file)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="mt-3 inline-block text-sm text-brand-600 underline"
|
||||||
|
>
|
||||||
|
Download Full Report →
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{cleanResult && (
|
||||||
|
<div className="p-4 bg-white rounded-lg border">
|
||||||
|
<h3 className="font-semibold mb-2">Cleaning Results</h3>
|
||||||
|
<div className="text-sm space-y-1">
|
||||||
|
<div>Original: {cleanResult.dataset_info.original_rows} rows × {cleanResult.dataset_info.original_columns} cols</div>
|
||||||
|
<div>Cleaned: {cleanResult.dataset_info.cleaned_rows} rows × {cleanResult.dataset_info.cleaned_columns} cols</div>
|
||||||
|
<div>Cells Anonymized: {cleanResult.summary.total_cells_affected}</div>
|
||||||
|
<div>Columns Removed: {cleanResult.summary.columns_removed.length}</div>
|
||||||
|
<div>GDPR Compliant: {cleanResult.gdpr_compliance.length} articles applied</div>
|
||||||
|
</div>
|
||||||
|
<div className="mt-3 flex gap-2">
|
||||||
|
<a
|
||||||
|
href={getReportUrl(cleanResult.files.cleaned_csv)}
|
||||||
|
download
|
||||||
|
className="text-sm text-brand-600 underline"
|
||||||
|
>
|
||||||
|
Download Cleaned CSV →
|
||||||
|
</a>
|
||||||
|
<a
|
||||||
|
href={getReportUrl(cleanResult.files.audit_report)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-sm text-brand-600 underline"
|
||||||
|
>
|
||||||
|
View Audit Report →
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-slate-600">
|
||||||
|
Process a dataset to see aggregated results.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="h-full overflow-y-auto p-6 bg-white/60">
|
||||||
|
{renderTabContent()}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ import { useState, useRef, useCallback, useEffect } from "react";
|
|||||||
|
|
||||||
interface CenterPanelProps {
|
interface CenterPanelProps {
|
||||||
tab: TryTab;
|
tab: TryTab;
|
||||||
|
onAnalyze?: () => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface UploadedFileMeta {
|
interface UploadedFileMeta {
|
||||||
@@ -13,11 +14,19 @@ interface UploadedFileMeta {
|
|||||||
contentPreview: string;
|
contentPreview: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function CenterPanel({ tab }: CenterPanelProps) {
|
interface TablePreviewData {
|
||||||
|
headers: string[];
|
||||||
|
rows: string[][];
|
||||||
|
origin: 'csv';
|
||||||
|
}
|
||||||
|
|
||||||
|
export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
||||||
|
const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview
|
||||||
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
|
const [fileMeta, setFileMeta] = useState<UploadedFileMeta | null>(null);
|
||||||
const [isDragging, setIsDragging] = useState(false);
|
const [isDragging, setIsDragging] = useState(false);
|
||||||
const [progress, setProgress] = useState<number>(0);
|
const [progress, setProgress] = useState<number>(0);
|
||||||
const [progressLabel, setProgressLabel] = useState<string>("Processing");
|
const [progressLabel, setProgressLabel] = useState<string>("Processing");
|
||||||
|
const [tablePreview, setTablePreview] = useState<TablePreviewData | null>(null);
|
||||||
const inputRef = useRef<HTMLInputElement | null>(null);
|
const inputRef = useRef<HTMLInputElement | null>(null);
|
||||||
const [loadedFromCache, setLoadedFromCache] = useState(false);
|
const [loadedFromCache, setLoadedFromCache] = useState(false);
|
||||||
|
|
||||||
@@ -25,10 +34,43 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
setFileMeta(null);
|
setFileMeta(null);
|
||||||
setProgress(0);
|
setProgress(0);
|
||||||
setProgressLabel("Processing");
|
setProgressLabel("Processing");
|
||||||
|
setTablePreview(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null {
|
||||||
|
const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0);
|
||||||
|
if (lines.length < 2) return null;
|
||||||
|
const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length;
|
||||||
|
if (commaDensity < 2) return null;
|
||||||
|
const parseLine = (line: string) => {
|
||||||
|
const out: string[] = [];
|
||||||
|
let cur = '';
|
||||||
|
let inQuotes = false;
|
||||||
|
for (let i = 0; i < line.length; i++) {
|
||||||
|
const ch = line[i];
|
||||||
|
if (ch === '"') {
|
||||||
|
if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; }
|
||||||
|
} else if (ch === ',' && !inQuotes) {
|
||||||
|
out.push(cur);
|
||||||
|
cur = '';
|
||||||
|
} else { cur += ch; }
|
||||||
|
}
|
||||||
|
out.push(cur);
|
||||||
|
return out.map(c => c.trim());
|
||||||
|
};
|
||||||
|
const raw = lines.slice(0, maxRows).map(parseLine);
|
||||||
|
if (raw.length === 0) return null;
|
||||||
|
const headers = raw[0];
|
||||||
|
const colCount = Math.min(headers.length, maxCols);
|
||||||
|
const rows = raw.slice(1).map(r => r.slice(0, colCount));
|
||||||
|
return { headers: headers.slice(0, colCount), rows, origin: 'csv' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// We no longer build table preview for JSON; revert JSON to raw text view.
|
||||||
|
|
||||||
const processFile = useCallback(async (f: File) => {
|
const processFile = useCallback(async (f: File) => {
|
||||||
if (!f) return;
|
if (!f) return;
|
||||||
|
const isCSV = /\.csv$/i.test(f.name);
|
||||||
setProgress(0);
|
setProgress(0);
|
||||||
// For large files, show a progress bar while reading the file stream (no preview)
|
// For large files, show a progress bar while reading the file stream (no preview)
|
||||||
if (f.size > 1024 * 1024) {
|
if (f.size > 1024 * 1024) {
|
||||||
@@ -37,13 +79,35 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
name: f.name,
|
name: f.name,
|
||||||
size: f.size,
|
size: f.size,
|
||||||
type: f.type || "unknown",
|
type: f.type || "unknown",
|
||||||
contentPreview: "File too large for preview (limit 1MB).",
|
contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`,
|
||||||
};
|
};
|
||||||
setFileMeta(metaObj);
|
setFileMeta(metaObj);
|
||||||
|
setTablePreview(null);
|
||||||
// Save to IndexedDB immediately so it persists without needing full read
|
// Save to IndexedDB immediately so it persists without needing full read
|
||||||
(async () => {
|
(async () => {
|
||||||
try { await saveLatestUpload(f, metaObj); } catch {}
|
try { await saveLatestUpload(f, metaObj); } catch {}
|
||||||
})();
|
})();
|
||||||
|
// Read head slice for partial preview & possible CSV table extraction
|
||||||
|
try {
|
||||||
|
const headBlob = f.slice(0, PREVIEW_BYTES);
|
||||||
|
const headReader = new FileReader();
|
||||||
|
headReader.onload = async () => {
|
||||||
|
try {
|
||||||
|
const buf = headReader.result as ArrayBuffer;
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
const text = decoder.decode(buf);
|
||||||
|
setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev);
|
||||||
|
if (isCSV) {
|
||||||
|
const parsed = tryParseCSV(text);
|
||||||
|
setTablePreview(parsed);
|
||||||
|
} else {
|
||||||
|
setTablePreview(null);
|
||||||
|
}
|
||||||
|
try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {}
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
};
|
||||||
|
headReader.readAsArrayBuffer(headBlob);
|
||||||
|
} catch { /* ignore */ }
|
||||||
// Use streaming read for progress without buffering entire file in memory
|
// Use streaming read for progress without buffering entire file in memory
|
||||||
try {
|
try {
|
||||||
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
|
const stream: ReadableStream<Uint8Array> | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined);
|
||||||
@@ -100,6 +164,12 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
contentPreview: text.slice(0, 4000),
|
contentPreview: text.slice(0, 4000),
|
||||||
};
|
};
|
||||||
setFileMeta(metaObj);
|
setFileMeta(metaObj);
|
||||||
|
if (isCSV) {
|
||||||
|
const parsed = tryParseCSV(text);
|
||||||
|
setTablePreview(parsed);
|
||||||
|
} else {
|
||||||
|
setTablePreview(null);
|
||||||
|
}
|
||||||
// Save file blob and meta to browser cache (IndexedDB)
|
// Save file blob and meta to browser cache (IndexedDB)
|
||||||
try {
|
try {
|
||||||
await saveLatestUpload(f, metaObj);
|
await saveLatestUpload(f, metaObj);
|
||||||
@@ -114,6 +184,7 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
contentPreview: "Unable to decode preview.",
|
contentPreview: "Unable to decode preview.",
|
||||||
};
|
};
|
||||||
setFileMeta(metaObj);
|
setFileMeta(metaObj);
|
||||||
|
setTablePreview(null);
|
||||||
try {
|
try {
|
||||||
await saveLatestUpload(f, metaObj);
|
await saveLatestUpload(f, metaObj);
|
||||||
} catch {}
|
} catch {}
|
||||||
@@ -166,10 +237,10 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
switch (tab) {
|
switch (tab) {
|
||||||
case "processing":
|
case "processing":
|
||||||
return (
|
return (
|
||||||
<div className="space-y-4">
|
<div className="space-y-4 max-w-[1100px] xl:max-w-[1200px] w-full mx-auto">
|
||||||
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
|
<h2 className="text-xl font-semibold">Upload & Process Data</h2>
|
||||||
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
|
<p className="text-sm text-slate-600">Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.</p>
|
||||||
<div className="flex flex-col gap-3">
|
<div className="flex flex-col gap-3 min-w-0">
|
||||||
<div
|
<div
|
||||||
onDragOver={onDragOver}
|
onDragOver={onDragOver}
|
||||||
onDragLeave={onDragLeave}
|
onDragLeave={onDragLeave}
|
||||||
@@ -219,10 +290,34 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
|
<div className="mb-2 text-[11px] text-brand-700">Loaded from browser cache</div>
|
||||||
)}
|
)}
|
||||||
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
|
<div className="mb-3 text-xs text-slate-500">{fileMeta.type || "Unknown type"}</div>
|
||||||
|
{/* Table preview when structured data detected; otherwise show text */}
|
||||||
|
{tablePreview && tablePreview.origin === 'csv' ? (
|
||||||
|
<div className="max-h-64 w-full min-w-0 overflow-x-auto overflow-y-auto rounded-md bg-slate-50">
|
||||||
|
<table className="min-w-full text-xs">
|
||||||
|
<thead className="sticky top-0 bg-slate-100">
|
||||||
|
<tr>
|
||||||
|
{tablePreview.headers.map((h, idx) => (
|
||||||
|
<th key={idx} className="text-left font-semibold px-3 py-2 border-b border-slate-200 whitespace-nowrap">{h}</th>
|
||||||
|
))}
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{tablePreview.rows.map((r, i) => (
|
||||||
|
<tr key={i} className={i % 2 === 0 ? "" : "bg-slate-100/50"}>
|
||||||
|
{r.map((c, j) => (
|
||||||
|
<td key={j} className="px-3 py-1.5 border-b border-slate-100 whitespace-nowrap max-w-[24ch] overflow-hidden text-ellipsis">{c}</td>
|
||||||
|
))}
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
<pre className="max-h-64 overflow-auto text-xs bg-slate-50 p-3 rounded-md whitespace-pre-wrap leading-relaxed">
|
||||||
{fileMeta.contentPreview || "(no preview)"}
|
{fileMeta.contentPreview || "(no preview)"}
|
||||||
</pre>
|
</pre>
|
||||||
<div className="mt-3 flex justify-end">
|
)}
|
||||||
|
<div className="mt-3 flex justify-end gap-2">
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={async () => {
|
onClick={async () => {
|
||||||
@@ -234,6 +329,13 @@ export function CenterPanel({ tab }: CenterPanelProps) {
|
|||||||
>
|
>
|
||||||
Clear
|
Clear
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => onAnalyze?.()}
|
||||||
|
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500"
|
||||||
|
>
|
||||||
|
Analyze
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
# Nordic Privacy AI
|
|
||||||
|
|
||||||
AI-Powered GDPR compliance & personal data protection platform tailored for Nordic ecosystems (BankID, MitID, Suomi.fi).
|
|
||||||
|
|
||||||
## Tech Stack
|
|
||||||
- Next.js (App Router, TypeScript)
|
|
||||||
- Tailwind CSS
|
|
||||||
|
|
||||||
## Getting Started
|
|
||||||
```powershell
|
|
||||||
npm install
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
Visit http://localhost:3000 to view the landing page.
|
|
||||||
|
|
||||||
## Scripts
|
|
||||||
- `npm run dev` – Start dev server
|
|
||||||
- `npm run build` – Production build
|
|
||||||
- `npm start` – Run built app
|
|
||||||
- `npm run lint` – ESLint
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
- Implement /try page workflow
|
|
||||||
- Add feature sections & agent explanations
|
|
||||||
- Integrate backend services for data upload & scanning
|
|
||||||
|
|
||||||
## License
|
|
||||||
Internal hackathon prototype
|
|
||||||
113
frontend/nordic-privacy-ai/lib/idb.ts
Normal file
113
frontend/nordic-privacy-ai/lib/idb.ts
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
/**
|
||||||
|
* IndexedDB utilities for persisting file uploads in the browser.
|
||||||
|
* Stores the latest uploaded file and its metadata for recovery across sessions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const DB_NAME = "NordicPrivacyAI";
|
||||||
|
const DB_VERSION = 1;
|
||||||
|
const STORE_NAME = "latestUpload";
|
||||||
|
|
||||||
|
interface UploadedFileMeta {
|
||||||
|
name: string;
|
||||||
|
size: number;
|
||||||
|
type: string;
|
||||||
|
contentPreview: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface LatestUploadData {
|
||||||
|
file: File;
|
||||||
|
meta: UploadedFileMeta;
|
||||||
|
timestamp: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open or create the IndexedDB database
|
||||||
|
*/
|
||||||
|
function openDB(): Promise<IDBDatabase> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = indexedDB.open(DB_NAME, DB_VERSION);
|
||||||
|
|
||||||
|
request.onerror = () => reject(request.error);
|
||||||
|
request.onsuccess = () => resolve(request.result);
|
||||||
|
|
||||||
|
request.onupgradeneeded = (event) => {
|
||||||
|
const db = (event.target as IDBOpenDBRequest).result;
|
||||||
|
|
||||||
|
// Create object store if it doesn't exist
|
||||||
|
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
||||||
|
db.createObjectStore(STORE_NAME);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save the latest uploaded file and its metadata to IndexedDB
|
||||||
|
*/
|
||||||
|
export async function saveLatestUpload(
|
||||||
|
file: File,
|
||||||
|
meta: UploadedFileMeta
|
||||||
|
): Promise<void> {
|
||||||
|
const db = await openDB();
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const transaction = db.transaction([STORE_NAME], "readwrite");
|
||||||
|
const store = transaction.objectStore(STORE_NAME);
|
||||||
|
|
||||||
|
const data: LatestUploadData = {
|
||||||
|
file,
|
||||||
|
meta,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const request = store.put(data, "latest");
|
||||||
|
|
||||||
|
request.onerror = () => reject(request.error);
|
||||||
|
request.onsuccess = () => resolve();
|
||||||
|
|
||||||
|
transaction.oncomplete = () => db.close();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the latest uploaded file and metadata from IndexedDB
|
||||||
|
*/
|
||||||
|
export async function getLatestUpload(): Promise<LatestUploadData> {
|
||||||
|
const db = await openDB();
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const transaction = db.transaction([STORE_NAME], "readonly");
|
||||||
|
const store = transaction.objectStore(STORE_NAME);
|
||||||
|
const request = store.get("latest");
|
||||||
|
|
||||||
|
request.onerror = () => reject(request.error);
|
||||||
|
request.onsuccess = () => {
|
||||||
|
const result = request.result as LatestUploadData | undefined;
|
||||||
|
if (result) {
|
||||||
|
resolve(result);
|
||||||
|
} else {
|
||||||
|
reject(new Error("No cached upload found"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
transaction.oncomplete = () => db.close();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete the latest upload from IndexedDB
|
||||||
|
*/
|
||||||
|
export async function deleteLatestUpload(): Promise<void> {
|
||||||
|
const db = await openDB();
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const transaction = db.transaction([STORE_NAME], "readwrite");
|
||||||
|
const store = transaction.objectStore(STORE_NAME);
|
||||||
|
const request = store.delete("latest");
|
||||||
|
|
||||||
|
request.onerror = () => reject(request.error);
|
||||||
|
request.onsuccess = () => resolve();
|
||||||
|
|
||||||
|
transaction.oncomplete = () => db.close();
|
||||||
|
});
|
||||||
|
}
|
||||||
5
frontend/nordic-privacy-ai/next-env.d.ts
vendored
5
frontend/nordic-privacy-ai/next-env.d.ts
vendored
@@ -1,5 +0,0 @@
|
|||||||
/// <reference types="next" />
|
|
||||||
/// <reference types="next/image-types/global" />
|
|
||||||
|
|
||||||
// NOTE: This file should not be edited
|
|
||||||
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
|
||||||
@@ -10,3 +10,14 @@ presidio-analyzer>=2.2.0
|
|||||||
presidio-anonymizer>=2.2.0
|
presidio-anonymizer>=2.2.0
|
||||||
spacy>=3.7.0
|
spacy>=3.7.0
|
||||||
# Download spaCy model with: python -m spacy download en_core_web_sm
|
# Download spaCy model with: python -m spacy download en_core_web_sm
|
||||||
|
|
||||||
|
# FastAPI Backend
|
||||||
|
fastapi>=0.109.0
|
||||||
|
uvicorn[standard]>=0.27.0
|
||||||
|
python-multipart>=0.0.6
|
||||||
|
|
||||||
|
# Optional: GPU Support (uncomment if you have CUDA)
|
||||||
|
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
|
||||||
|
|
||||||
|
# Chatbot (WIP - not exposed in API yet)
|
||||||
|
gpt4all>=2.0.0
|
||||||
145
src/__init__.py
145
src/__init__.py
@@ -1,145 +0,0 @@
|
|||||||
"""
|
|
||||||
AI Governance Module - Bias Detection and Risk Analysis
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .data_processor import DataProcessor
|
|
||||||
from .model_trainer import GeneralizedModelTrainer
|
|
||||||
from .bias_analyzer import BiasAnalyzer
|
|
||||||
from .risk_analyzer import RiskAnalyzer
|
|
||||||
from .report_generator import ReportGenerator, NumpyEncoder
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import json
|
|
||||||
|
|
||||||
__version__ = '1.0.0'
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'DataProcessor',
|
|
||||||
'GeneralizedModelTrainer',
|
|
||||||
'BiasAnalyzer',
|
|
||||||
'RiskAnalyzer',
|
|
||||||
'ReportGenerator',
|
|
||||||
'NumpyEncoder',
|
|
||||||
'AIGovernanceAnalyzer'
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class AIGovernanceAnalyzer:
|
|
||||||
"""
|
|
||||||
Main interface for AI Governance analysis
|
|
||||||
|
|
||||||
Example:
|
|
||||||
>>> analyzer = AIGovernanceAnalyzer()
|
|
||||||
>>> report = analyzer.analyze('data.csv', 'target', ['gender', 'age'])
|
|
||||||
>>> print(f"Bias Score: {report['summary']['overall_bias_score']:.3f}")
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the analyzer"""
|
|
||||||
self.processor = None
|
|
||||||
self.trainer = None
|
|
||||||
self.bias_analyzer = None
|
|
||||||
self.risk_analyzer = None
|
|
||||||
self.report_generator = None
|
|
||||||
|
|
||||||
def analyze(self, data_path, target_column, protected_attributes):
|
|
||||||
"""
|
|
||||||
Run complete AI governance analysis from file
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data_path (str): Path to CSV file
|
|
||||||
target_column (str): Name of target column
|
|
||||||
protected_attributes (list): List of protected attribute column names
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Complete analysis report
|
|
||||||
"""
|
|
||||||
df = pd.read_csv(data_path)
|
|
||||||
return self.analyze_dataframe(df, target_column, protected_attributes)
|
|
||||||
|
|
||||||
def analyze_dataframe(self, df, target_column, protected_attributes):
|
|
||||||
"""
|
|
||||||
Run complete AI governance analysis from DataFrame
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df (pd.DataFrame): Input dataframe
|
|
||||||
target_column (str): Name of target column
|
|
||||||
protected_attributes (list): List of protected attribute column names
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Complete analysis report
|
|
||||||
"""
|
|
||||||
# Step 1: Process data
|
|
||||||
self.processor = DataProcessor(df)
|
|
||||||
self.processor.target_column = target_column
|
|
||||||
self.processor.protected_attributes = protected_attributes
|
|
||||||
self.processor.prepare_data()
|
|
||||||
|
|
||||||
# Step 2: Train model
|
|
||||||
self.trainer = GeneralizedModelTrainer(
|
|
||||||
self.processor.X_train,
|
|
||||||
self.processor.X_test,
|
|
||||||
self.processor.y_train,
|
|
||||||
self.processor.y_test,
|
|
||||||
self.processor.feature_names
|
|
||||||
)
|
|
||||||
self.trainer.train()
|
|
||||||
self.trainer.evaluate()
|
|
||||||
|
|
||||||
# Step 3: Analyze bias
|
|
||||||
self.bias_analyzer = BiasAnalyzer(
|
|
||||||
self.processor.X_test,
|
|
||||||
self.processor.y_test,
|
|
||||||
self.trainer.y_pred,
|
|
||||||
self.processor.df,
|
|
||||||
self.processor.protected_attributes,
|
|
||||||
self.processor.target_column
|
|
||||||
)
|
|
||||||
bias_results = self.bias_analyzer.analyze()
|
|
||||||
|
|
||||||
# Step 4: Assess risks
|
|
||||||
self.risk_analyzer = RiskAnalyzer(
|
|
||||||
self.processor.df,
|
|
||||||
self.trainer.results,
|
|
||||||
bias_results,
|
|
||||||
self.processor.protected_attributes,
|
|
||||||
self.processor.target_column
|
|
||||||
)
|
|
||||||
risk_results = self.risk_analyzer.analyze()
|
|
||||||
|
|
||||||
# Step 5: Generate report
|
|
||||||
self.report_generator = ReportGenerator(
|
|
||||||
self.trainer.results,
|
|
||||||
bias_results,
|
|
||||||
risk_results,
|
|
||||||
self.processor.df
|
|
||||||
)
|
|
||||||
|
|
||||||
return self.report_generator.generate_report()
|
|
||||||
|
|
||||||
def save_report(self, report, output_path):
|
|
||||||
"""
|
|
||||||
Save report to JSON file
|
|
||||||
|
|
||||||
Args:
|
|
||||||
report (dict): Analysis report
|
|
||||||
output_path (str): Path to save JSON file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Path to saved file
|
|
||||||
"""
|
|
||||||
with open(output_path, 'w') as f:
|
|
||||||
json.dump(report, f, indent=2, cls=NumpyEncoder)
|
|
||||||
return output_path
|
|
||||||
|
|
||||||
def get_summary(self, report):
|
|
||||||
"""
|
|
||||||
Get executive summary from report
|
|
||||||
|
|
||||||
Args:
|
|
||||||
report (dict): Analysis report
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Summary metrics
|
|
||||||
"""
|
|
||||||
return report.get('summary', {})
|
|
||||||
@@ -1,263 +0,0 @@
|
|||||||
"""
|
|
||||||
Report Generator Module
|
|
||||||
Generates comprehensive JSON reports
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import numpy as np
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
class NumpyEncoder(json.JSONEncoder):
|
|
||||||
"""Custom JSON encoder for numpy types"""
|
|
||||||
def default(self, obj):
|
|
||||||
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
|
||||||
return int(obj)
|
|
||||||
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
|
||||||
return float(obj)
|
|
||||||
elif isinstance(obj, (np.ndarray,)):
|
|
||||||
return obj.tolist()
|
|
||||||
elif isinstance(obj, (np.bool_,)):
|
|
||||||
return bool(obj)
|
|
||||||
return super(NumpyEncoder, self).default(obj)
|
|
||||||
|
|
||||||
class ReportGenerator:
|
|
||||||
"""Generate comprehensive analysis reports"""
|
|
||||||
|
|
||||||
def __init__(self, model_results, bias_results, risk_results, df):
|
|
||||||
self.model_results = model_results
|
|
||||||
self.bias_results = bias_results
|
|
||||||
self.risk_results = risk_results
|
|
||||||
self.df = df
|
|
||||||
|
|
||||||
def generate_report(self):
|
|
||||||
"""Generate comprehensive JSON report"""
|
|
||||||
report = {
|
|
||||||
'metadata': self._generate_metadata(),
|
|
||||||
'summary': self._generate_summary(),
|
|
||||||
'model_performance': self._format_model_results(),
|
|
||||||
'bias_analysis': self._format_bias_results(),
|
|
||||||
'risk_assessment': self._format_risk_results(),
|
|
||||||
'key_findings': self._extract_key_findings(),
|
|
||||||
'recommendations': self._compile_recommendations(),
|
|
||||||
'detailed_metrics': self._compile_detailed_metrics()
|
|
||||||
}
|
|
||||||
|
|
||||||
return report
|
|
||||||
|
|
||||||
def _generate_metadata(self):
|
|
||||||
"""Generate report metadata"""
|
|
||||||
return {
|
|
||||||
'report_id': f"AIGov_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
|
||||||
'generated_at': datetime.now().isoformat(),
|
|
||||||
'report_version': '1.0',
|
|
||||||
'dataset_info': {
|
|
||||||
'total_records': len(self.df),
|
|
||||||
'total_features': len(self.df.columns),
|
|
||||||
'columns': list(self.df.columns)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _generate_summary(self):
|
|
||||||
"""Generate executive summary"""
|
|
||||||
model_metrics = self.model_results.get('metrics', {})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
|
|
||||||
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
|
|
||||||
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
|
|
||||||
'model_accuracy': model_metrics.get('accuracy', 0.0),
|
|
||||||
'fairness_violations_count': len(self.bias_results.get('fairness_violations', [])),
|
|
||||||
'passes_fairness_threshold': self.bias_results.get('fairness_assessment', {}).get('passes_fairness_threshold', False)
|
|
||||||
}
|
|
||||||
|
|
||||||
def _format_model_results(self):
|
|
||||||
"""Format model performance results"""
|
|
||||||
return {
|
|
||||||
'model_type': self.model_results.get('model_type', 'Unknown'),
|
|
||||||
'metrics': self.model_results.get('metrics', {}),
|
|
||||||
'confusion_matrix': self.model_results.get('confusion_matrix', []),
|
|
||||||
'top_features': dict(list(self.model_results.get('feature_importance', {}).items())[:10])
|
|
||||||
}
|
|
||||||
|
|
||||||
def _format_bias_results(self):
|
|
||||||
"""Format bias analysis results"""
|
|
||||||
return {
|
|
||||||
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
|
|
||||||
'fairness_metrics': self.bias_results.get('fairness_metrics', {}),
|
|
||||||
'fairness_violations': self.bias_results.get('fairness_violations', []),
|
|
||||||
'fairness_assessment': self.bias_results.get('fairness_assessment', {}),
|
|
||||||
'demographic_bias_summary': self._summarize_demographic_bias()
|
|
||||||
}
|
|
||||||
|
|
||||||
def _format_risk_results(self):
|
|
||||||
"""Format risk assessment results"""
|
|
||||||
return {
|
|
||||||
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
|
|
||||||
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
|
|
||||||
'risk_categories': self.risk_results.get('risk_categories', {}),
|
|
||||||
'privacy_risks': self._summarize_privacy_risks(),
|
|
||||||
'ethical_risks': self._summarize_ethical_risks()
|
|
||||||
}
|
|
||||||
|
|
||||||
def _summarize_demographic_bias(self):
|
|
||||||
"""Summarize demographic bias"""
|
|
||||||
demo_bias = self.bias_results.get('demographic_bias', {})
|
|
||||||
summary = {}
|
|
||||||
|
|
||||||
for attr, data in demo_bias.items():
|
|
||||||
summary[attr] = {
|
|
||||||
'max_disparity': data.get('max_disparity', 0),
|
|
||||||
'groups_analyzed': len(data.get('approval_rates', {}))
|
|
||||||
}
|
|
||||||
|
|
||||||
return summary
|
|
||||||
|
|
||||||
def _summarize_privacy_risks(self):
|
|
||||||
"""Summarize privacy risks"""
|
|
||||||
privacy = self.risk_results.get('privacy_risks', {})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'pii_count': len(privacy.get('pii_detected', [])),
|
|
||||||
'anonymization_level': privacy.get('anonymization_level', 'UNKNOWN'),
|
|
||||||
'exposure_risk_count': len(privacy.get('exposure_risks', [])),
|
|
||||||
'gdpr_compliance_score': privacy.get('gdpr_compliance', {}).get('compliance_score', 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
def _summarize_ethical_risks(self):
|
|
||||||
"""Summarize ethical risks"""
|
|
||||||
ethical = self.risk_results.get('ethical_risks', {})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'fairness_issues_count': len(ethical.get('fairness_issues', [])),
|
|
||||||
'transparency_score': ethical.get('transparency_score', 0),
|
|
||||||
'bias_amplification_risk': ethical.get('bias_amplification_risk', 'UNKNOWN'),
|
|
||||||
'social_impact': ethical.get('social_impact_assessment', {})
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_key_findings(self):
|
|
||||||
"""Extract key findings from analysis"""
|
|
||||||
findings = []
|
|
||||||
|
|
||||||
# Model performance findings
|
|
||||||
accuracy = self.model_results.get('metrics', {}).get('accuracy', 0)
|
|
||||||
if accuracy >= 0.8:
|
|
||||||
findings.append(f"✓ Model achieves good accuracy ({accuracy:.2%})")
|
|
||||||
else:
|
|
||||||
findings.append(f"⚠ Model accuracy is below optimal ({accuracy:.2%})")
|
|
||||||
|
|
||||||
# Bias findings
|
|
||||||
bias_score = self.bias_results.get('overall_bias_score', 0)
|
|
||||||
if bias_score < 0.3:
|
|
||||||
findings.append("✓ Low bias detected across protected attributes")
|
|
||||||
elif bias_score < 0.5:
|
|
||||||
findings.append("⚠ Moderate bias detected - monitoring recommended")
|
|
||||||
else:
|
|
||||||
findings.append("❌ High bias detected - immediate action required")
|
|
||||||
|
|
||||||
# Fairness violations
|
|
||||||
violations = self.bias_results.get('fairness_violations', [])
|
|
||||||
if violations:
|
|
||||||
high_sev = sum(1 for v in violations if v['severity'] == 'HIGH')
|
|
||||||
findings.append(f"❌ {len(violations)} fairness violations detected ({high_sev} high severity)")
|
|
||||||
else:
|
|
||||||
findings.append("✓ No fairness violations detected")
|
|
||||||
|
|
||||||
# Privacy findings
|
|
||||||
privacy = self.risk_results.get('privacy_risks', {})
|
|
||||||
pii_count = len(privacy.get('pii_detected', []))
|
|
||||||
if pii_count > 0:
|
|
||||||
findings.append(f"⚠ {pii_count} columns contain potential PII")
|
|
||||||
else:
|
|
||||||
findings.append("✓ No obvious PII detected in dataset")
|
|
||||||
|
|
||||||
# Risk level
|
|
||||||
risk_level = self.risk_results.get('risk_level', 'UNKNOWN')
|
|
||||||
findings.append(f"Overall Risk Level: {risk_level}")
|
|
||||||
|
|
||||||
return findings
|
|
||||||
|
|
||||||
def _compile_recommendations(self):
|
|
||||||
"""Compile all recommendations"""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
# Get recommendations from each component
|
|
||||||
privacy_recs = self.risk_results.get('privacy_risks', {}).get('recommendations', [])
|
|
||||||
ethical_recs = self.risk_results.get('ethical_risks', {}).get('recommendations', [])
|
|
||||||
performance_recs = self.risk_results.get('model_performance_risks', {}).get('recommendations', [])
|
|
||||||
compliance_recs = self.risk_results.get('compliance_risks', {}).get('recommendations', [])
|
|
||||||
|
|
||||||
# Prioritize recommendations
|
|
||||||
all_recs = []
|
|
||||||
|
|
||||||
# High priority (from violations and high risks)
|
|
||||||
violations = self.bias_results.get('fairness_violations', [])
|
|
||||||
if violations:
|
|
||||||
all_recs.append({
|
|
||||||
'priority': 'HIGH',
|
|
||||||
'category': 'Fairness',
|
|
||||||
'recommendation': 'Address fairness violations in protected attributes'
|
|
||||||
})
|
|
||||||
|
|
||||||
if len(privacy_recs) > 0:
|
|
||||||
all_recs.append({
|
|
||||||
'priority': 'HIGH',
|
|
||||||
'category': 'Privacy',
|
|
||||||
'recommendation': privacy_recs[0]
|
|
||||||
})
|
|
||||||
|
|
||||||
# Medium priority
|
|
||||||
for rec in ethical_recs[:2]:
|
|
||||||
all_recs.append({
|
|
||||||
'priority': 'MEDIUM',
|
|
||||||
'category': 'Ethics',
|
|
||||||
'recommendation': rec
|
|
||||||
})
|
|
||||||
|
|
||||||
# Lower priority
|
|
||||||
for rec in performance_recs[:2]:
|
|
||||||
all_recs.append({
|
|
||||||
'priority': 'MEDIUM',
|
|
||||||
'category': 'Performance',
|
|
||||||
'recommendation': rec
|
|
||||||
})
|
|
||||||
|
|
||||||
for rec in compliance_recs[:2]:
|
|
||||||
all_recs.append({
|
|
||||||
'priority': 'MEDIUM',
|
|
||||||
'category': 'Compliance',
|
|
||||||
'recommendation': rec
|
|
||||||
})
|
|
||||||
|
|
||||||
# Convert to simple list with formatting
|
|
||||||
recommendations = [
|
|
||||||
f"[{r['priority']}] {r['category']}: {r['recommendation']}"
|
|
||||||
for r in all_recs[:10] # Limit to top 10
|
|
||||||
]
|
|
||||||
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
def _compile_detailed_metrics(self):
|
|
||||||
"""Compile detailed metrics for analysis"""
|
|
||||||
return {
|
|
||||||
'bias_metrics': {
|
|
||||||
'by_attribute': self.bias_results.get('fairness_metrics', {}),
|
|
||||||
'demographic_analysis': self.bias_results.get('demographic_bias', {})
|
|
||||||
},
|
|
||||||
'risk_breakdown': {
|
|
||||||
'privacy': self.risk_results.get('privacy_risks', {}),
|
|
||||||
'ethical': self.risk_results.get('ethical_risks', {}),
|
|
||||||
'compliance': self.risk_results.get('compliance_risks', {}),
|
|
||||||
'data_quality': self.risk_results.get('data_quality_risks', {})
|
|
||||||
},
|
|
||||||
'model_details': {
|
|
||||||
'classification_report': self.model_results.get('classification_report', {}),
|
|
||||||
'feature_importance': self.model_results.get('feature_importance', {})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def save_report(self, filepath):
|
|
||||||
"""Save report to JSON file"""
|
|
||||||
report = self.generate_report()
|
|
||||||
with open(filepath, 'w') as f:
|
|
||||||
json.dump(report, f, indent=2, cls=NumpyEncoder)
|
|
||||||
return filepath
|
|
||||||
21
start_api.py
Normal file
21
start_api.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
"""
|
||||||
|
Start the FastAPI server
|
||||||
|
Run: python start_api.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("🚀 Starting Nordic Privacy AI API Server...")
|
||||||
|
print("📍 API will be available at: http://localhost:8000")
|
||||||
|
print("📖 Interactive docs at: http://localhost:8000/docs")
|
||||||
|
print("🔗 Frontend should run at: http://localhost:3000")
|
||||||
|
print("\nPress CTRL+C to stop\n")
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
"api.main:app",
|
||||||
|
host="0.0.0.0",
|
||||||
|
port=8000,
|
||||||
|
reload=True, # Auto-reload on code changes
|
||||||
|
log_level="info"
|
||||||
|
)
|
||||||
@@ -10,7 +10,7 @@ import os
|
|||||||
# Add parent directory to path
|
# Add parent directory to path
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
from cleaning import DataCleaner, CleaningConfig
|
from data_cleaning import DataCleaner
|
||||||
|
|
||||||
|
|
||||||
def test_basic_cleaning():
|
def test_basic_cleaning():
|
||||||
|
|||||||
Reference in New Issue
Block a user