From d84df36d93a2171c984a70abc36b4dde62184782 Mon Sep 17 00:00:00 2001 From: nearlynithin Date: Sat, 8 Nov 2025 03:16:28 +0530 Subject: [PATCH] fix: support txt on input --- frontend/components/try/CenterPanel.tsx | 5092 ++++++++++++++--------- 1 file changed, 3070 insertions(+), 2022 deletions(-) diff --git a/frontend/components/try/CenterPanel.tsx b/frontend/components/try/CenterPanel.tsx index cd51226..14e787c 100644 --- a/frontend/components/try/CenterPanel.tsx +++ b/frontend/components/try/CenterPanel.tsx @@ -1,2086 +1,3134 @@ "use client"; import { TryTab } from "./Sidebar"; import { useState, useRef, useCallback, useEffect } from "react"; -import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb"; -import { analyzeDataset, cleanDataset, detectPII, getReportUrl, type AnalyzeResponse, type CleanResponse, type DetectPIIResponse } from "../../lib/api"; +import { + saveLatestUpload, + getLatestUpload, + deleteLatestUpload, +} from "../../lib/indexeddb"; +import { + analyzeDataset, + cleanDataset, + detectPII, + getReportUrl, + type AnalyzeResponse, + type CleanResponse, + type DetectPIIResponse, +} from "../../lib/api"; interface CenterPanelProps { - tab: TryTab; - onAnalyze?: () => void; + tab: TryTab; + onAnalyze?: () => void; } interface UploadedFileMeta { - name: string; - size: number; - type: string; - contentPreview: string; + name: string; + size: number; + type: string; + contentPreview: string; } interface TablePreviewData { - headers: string[]; - rows: string[][]; - origin: 'csv'; + headers: string[]; + rows: string[][]; + origin: "csv"; } export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) { - const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview - const [fileMeta, setFileMeta] = useState(null); - const [uploadedFile, setUploadedFile] = useState(null); - const [isDragging, setIsDragging] = useState(false); - const [progress, setProgress] = useState(0); - const [progressLabel, setProgressLabel] = useState("Processing"); - const [tablePreview, setTablePreview] = useState(null); - const inputRef = useRef(null); - const [loadedFromCache, setLoadedFromCache] = useState(false); - const [isProcessing, setIsProcessing] = useState(false); - const [error, setError] = useState(null); - - // Analysis results - const [analyzeResult, setAnalyzeResult] = useState(null); - const [cleanResult, setCleanResult] = useState(null); - const [piiDetectionResult, setPIIDetectionResult] = useState(null); + const PREVIEW_BYTES = 64 * 1024; // read first 64KB slice for large-file preview + const [fileMeta, setFileMeta] = useState(null); + const [uploadedFile, setUploadedFile] = useState(null); + const [isDragging, setIsDragging] = useState(false); + const [progress, setProgress] = useState(0); + const [progressLabel, setProgressLabel] = useState("Processing"); + const [tablePreview, setTablePreview] = useState( + null, + ); + const inputRef = useRef(null); + const [loadedFromCache, setLoadedFromCache] = useState(false); + const [isProcessing, setIsProcessing] = useState(false); + const [error, setError] = useState(null); - const reset = () => { - setFileMeta(null); - setUploadedFile(null); - setProgress(0); - setProgressLabel("Processing"); - setTablePreview(null); - setError(null); - setPIIDetectionResult(null); - }; + // Analysis results + const [analyzeResult, setAnalyzeResult] = useState( + null, + ); + const [cleanResult, setCleanResult] = useState(null); + const [piiDetectionResult, setPIIDetectionResult] = + useState(null); - // Handle API calls - const handleAnalyze = async () => { - if (!uploadedFile) { - setError("No file uploaded"); - return; - } - - setIsProcessing(true); - setError(null); - setProgressLabel("Analyzing dataset..."); - - try { - const result = await analyzeDataset(uploadedFile); - setAnalyzeResult(result); - setProgressLabel("Analysis complete!"); - onAnalyze?.(); // Navigate to bias-analysis tab - } catch (err: any) { - setError(err.message || "Analysis failed"); - } finally { - setIsProcessing(false); - } - }; + const reset = () => { + setFileMeta(null); + setUploadedFile(null); + setProgress(0); + setProgressLabel("Processing"); + setTablePreview(null); + setError(null); + setPIIDetectionResult(null); + }; - const handleDetectPII = async () => { - if (!uploadedFile) { - setError("No file uploaded"); - return; - } - - setIsProcessing(true); - setError(null); - setProgressLabel("Detecting PII..."); - - try { - const result = await detectPII(uploadedFile); - setPIIDetectionResult(result); - setProgressLabel("PII detection complete!"); - } catch (err: any) { - setError(err.message || "PII detection failed"); - } finally { - setIsProcessing(false); - } - }; + // Handle API calls + const handleAnalyze = async () => { + if (!uploadedFile) { + setError("No file uploaded"); + return; + } - const handleClean = async () => { - if (!uploadedFile) { - setError("No file uploaded"); - return; - } - - setIsProcessing(true); - setError(null); - setProgressLabel("Cleaning dataset..."); - - try { - const result = await cleanDataset(uploadedFile); - setCleanResult(result); - setProgressLabel("Cleaning complete!"); - } catch (err: any) { - setError(err.message || "Cleaning failed"); - } finally { - setIsProcessing(false); - } - }; function tryParseCSV(text: string, maxRows = 50, maxCols = 40): TablePreviewData | null { - const lines = text.split(/\r?\n/).filter(l => l.trim().length > 0); - if (lines.length < 2) return null; - const commaDensity = lines.slice(0, 10).filter(l => l.includes(',')).length; - if (commaDensity < 2) return null; - const parseLine = (line: string) => { - const out: string[] = []; - let cur = ''; - let inQuotes = false; - for (let i = 0; i < line.length; i++) { - const ch = line[i]; - if (ch === '"') { - if (inQuotes && line[i + 1] === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; } - } else if (ch === ',' && !inQuotes) { - out.push(cur); - cur = ''; - } else { cur += ch; } - } - out.push(cur); - return out.map(c => c.trim()); - }; - const raw = lines.slice(0, maxRows).map(parseLine); - if (raw.length === 0) return null; - const headers = raw[0]; - const colCount = Math.min(headers.length, maxCols); - const rows = raw.slice(1).map(r => r.slice(0, colCount)); - return { headers: headers.slice(0, colCount), rows, origin: 'csv' }; - } + setIsProcessing(true); + setError(null); + setProgressLabel("Analyzing dataset..."); - // We no longer build table preview for JSON; revert JSON to raw text view. + try { + const result = await analyzeDataset(uploadedFile); + setAnalyzeResult(result); + setProgressLabel("Analysis complete!"); + onAnalyze?.(); // Navigate to bias-analysis tab + } catch (err: any) { + setError(err.message || "Analysis failed"); + } finally { + setIsProcessing(false); + } + }; - const processFile = useCallback(async (f: File) => { - if (!f) return; - const isCSV = /\.csv$/i.test(f.name); - setProgress(0); - setUploadedFile(f); // Save the file for API calls - - // For large files, show a progress bar while reading the file stream (no preview) - if (f.size > 1024 * 1024) { - setProgressLabel("Uploading"); - const metaObj: UploadedFileMeta = { - name: f.name, - size: f.size, - type: f.type || "unknown", - contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES/1024)}KB)...`, - }; - setFileMeta(metaObj); - setTablePreview(null); - // Save to IndexedDB immediately so it persists without needing full read - (async () => { - try { await saveLatestUpload(f, metaObj); } catch {} - })(); - // Read head slice for partial preview & possible CSV table extraction - try { - const headBlob = f.slice(0, PREVIEW_BYTES); - const headReader = new FileReader(); - headReader.onload = async () => { - try { - const buf = headReader.result as ArrayBuffer; - const decoder = new TextDecoder(); - const text = decoder.decode(buf); - setFileMeta(prev => prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev); - if (isCSV) { - const parsed = tryParseCSV(text); - setTablePreview(parsed); - } else { - setTablePreview(null); - } - try { await saveLatestUpload(f, { ...metaObj, contentPreview: text.slice(0, 4000) }); } catch {} - } catch { /* ignore */ } - }; - headReader.readAsArrayBuffer(headBlob); - } catch { /* ignore */ } - // Use streaming read for progress without buffering entire file in memory - try { - const stream: ReadableStream | undefined = (typeof (f as any).stream === "function" ? (f as any).stream() : undefined); - if (stream && typeof stream.getReader === "function") { - const reader = stream.getReader(); - let loaded = 0; - const total = f.size || 1; - for (;;) { - const { done, value } = await reader.read(); - if (done) break; - loaded += value ? value.length : 0; - const pct = Math.min(100, Math.round((loaded / total) * 100)); - setProgress(pct); - } - setProgress(100); - } else { - // Fallback to FileReader progress events - const reader = new FileReader(); - reader.onprogress = (evt) => { - if (evt.lengthComputable) { - const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100)); - setProgress(pct); - } else { - setProgress((p) => (p < 90 ? p + 5 : p)); - } - }; - reader.onloadend = () => setProgress(100); - reader.onerror = () => setProgress(0); - reader.readAsArrayBuffer(f); - } - } catch { - setProgress(100); - } - return; - } - const reader = new FileReader(); - reader.onprogress = (evt) => { - if (evt.lengthComputable) { - const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100)); - setProgress(pct); - } else { - setProgress((p) => (p < 90 ? p + 5 : p)); - } - }; - reader.onload = async () => { - try { - const buf = reader.result as ArrayBuffer; - const decoder = new TextDecoder(); - const text = decoder.decode(buf); - const metaObj: UploadedFileMeta = { - name: f.name, - size: f.size, - type: f.type || "unknown", - contentPreview: text.slice(0, 4000), - }; - setFileMeta(metaObj); - if (isCSV) { - const parsed = tryParseCSV(text); - setTablePreview(parsed); - } else { - setTablePreview(null); - } - // Save file blob and meta to browser cache (IndexedDB) - try { - await saveLatestUpload(f, metaObj); - } catch {} - setProgressLabel("Processing"); - setProgress(100); - } catch (e) { - const metaObj: UploadedFileMeta = { - name: f.name, - size: f.size, - type: f.type || "unknown", - contentPreview: "Unable to decode preview.", - }; - setFileMeta(metaObj); - setTablePreview(null); - try { - await saveLatestUpload(f, metaObj); - } catch {} - setProgressLabel("Processing"); - setProgress(100); - } - }; - reader.onerror = () => { - setProgress(0); - }; - reader.readAsArrayBuffer(f); - }, []); + const handleDetectPII = async () => { + if (!uploadedFile) { + setError("No file uploaded"); + return; + } - function handleFileChange(e: React.ChangeEvent) { - const f = e.target.files?.[0]; - processFile(f as File); - } + setIsProcessing(true); + setError(null); + setProgressLabel("Detecting PII..."); - const onDragOver = (e: React.DragEvent) => { - e.preventDefault(); - setIsDragging(true); - }; - const onDragLeave = () => setIsDragging(false); - const onDrop = (e: React.DragEvent) => { - e.preventDefault(); - setIsDragging(false); - const f = e.dataTransfer.files?.[0]; - processFile(f as File); - }; + try { + const result = await detectPII(uploadedFile); + setPIIDetectionResult(result); + setProgressLabel("PII detection complete!"); + } catch (err: any) { + setError(err.message || "PII detection failed"); + } finally { + setIsProcessing(false); + } + }; - // Load last cached upload on mount (processing tab only) - useEffect(() => { - let ignore = false; - if (tab !== "processing") return; - (async () => { - try { - const { file, meta } = await getLatestUpload(); - if (!ignore && meta) { - setFileMeta(meta as UploadedFileMeta); - if (file) { - setUploadedFile(file); - } - setLoadedFromCache(true); - } - } catch {} - })(); - return () => { - ignore = true; - }; - }, [tab]); function renderTabContent() { - switch (tab) { - case "processing": - return ( -
-

Upload & Process Data

-

Upload a CSV / JSON / text file. We will later parse, detect PII, and queue analyses.

-
-
-

Drag & drop a CSV / JSON / TXT here, or click to browse.

-
- -
-
- - {progress > 0 && ( -
-
-
-
-
{progressLabel} {progress}%
-
- )} - {fileMeta && ( -
-
-
{fileMeta.name}
-
{Math.round(fileMeta.size / 1024)} KB
-
- {loadedFromCache && ( -
Loaded from browser cache
- )} -
{fileMeta.type || "Unknown type"}
- {/* Table preview when structured data detected; otherwise show text */} - {tablePreview && tablePreview.origin === 'csv' ? ( -
- - - - {tablePreview.headers.map((h, idx) => ( - - ))} - - - - {tablePreview.rows.map((r, i) => ( - - {r.map((c, j) => ( - - ))} - - ))} - -
{h}
{c}
-
- ) : ( -
-														{fileMeta.contentPreview || "(no preview)"}
-													
- )} - - {error && ( -
- ❌ {error} -
- )} - - {piiDetectionResult && ( -
- πŸ” PII Detection complete! Found {piiDetectionResult.summary.risky_columns_found} risky columns in {piiDetectionResult.file_type.toUpperCase()} file. -
- {piiDetectionResult.summary.high_risk_count} HIGH β€’ - {piiDetectionResult.summary.medium_risk_count} MEDIUM β€’ - {piiDetectionResult.summary.low_risk_count} LOW -
-

Review detected risks in the "Bias & Risk Mitigation" tab to choose anonymization strategies.

-
- )} - - {analyzeResult && ( -
- βœ… Analysis complete! View results in tabs. - - Download Report - -
- )} - - {cleanResult && ( -
- βœ… Cleaning complete! {cleanResult.summary.total_cells_affected} cells anonymized. - -
- )} - -
- - - -
-
- )} -
-
- ); - case "bias-analysis": - return ( -
-
-

Bias & Fairness Analysis

-

Comprehensive evaluation of algorithmic fairness across demographic groups

-
- - {analyzeResult ? ( -
- {/* Overall Bias Score Card */} -
-
-
-
Overall Bias Score
-
- {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}% -
-
- {analyzeResult.bias_metrics.overall_bias_score < 0.3 ? ( - <> - - βœ“ Low Bias - - Excellent fairness - - ) : analyzeResult.bias_metrics.overall_bias_score < 0.5 ? ( - <> - - ⚠ Moderate Bias - - Monitor recommended - - ) : ( - <> - - βœ— High Bias - - Action required - - )} -
-
-
-
Violations
-
0 ? 'text-red-600' : 'text-green-600'}`}> - {analyzeResult.bias_metrics.violations_detected.length} -
-
-
- - {/* Interpretation */} -
-
INTERPRETATION
-

- {analyzeResult.bias_metrics.overall_bias_score < 0.3 - ? "Your model demonstrates strong fairness across demographic groups. Continue monitoring to ensure consistent performance." - : analyzeResult.bias_metrics.overall_bias_score < 0.5 - ? "Moderate bias detected. Review fairness metrics below and consider implementing mitigation strategies to reduce disparities." - : "Significant bias detected. Immediate action required to address fairness concerns before deployment. Review all violation details below."} -

-
-
+ const handleClean = async () => { + if (!uploadedFile) { + setError("No file uploaded"); + return; + } - {/* Model Performance Metrics */} -
-

- πŸ“Š - Model Performance Metrics -

-
-
-
ACCURACY
-
{(analyzeResult.model_performance.accuracy * 100).toFixed(1)}%
-
Overall correctness
-
-
-
PRECISION
-
{(analyzeResult.model_performance.precision * 100).toFixed(1)}%
-
Positive prediction accuracy
-
-
-
RECALL
-
{(analyzeResult.model_performance.recall * 100).toFixed(1)}%
-
True positive detection rate
-
-
-
F1 SCORE
-
{(analyzeResult.model_performance.f1_score * 100).toFixed(1)}%
-
Balanced metric
-
-
-
+ setIsProcessing(true); + setError(null); + setProgressLabel("Cleaning dataset..."); - {/* Fairness Metrics */} - {Object.keys(analyzeResult.bias_metrics.disparate_impact).length > 0 && ( -
-

- βš–οΈ - Fairness Metrics by Protected Attribute -

- - {Object.entries(analyzeResult.bias_metrics.disparate_impact).map(([attr, metrics]: [string, any]) => ( -
-
- - {attr.toUpperCase()} - -
- - {/* Disparate Impact */} - {metrics?.disparate_impact?.value !== undefined && ( -
-
-
-
DISPARATE IMPACT RATIO
-
{metrics.disparate_impact.value.toFixed(3)}
-
-
- {metrics.disparate_impact.fair ? 'βœ“ FAIR' : 'βœ— UNFAIR'} -
-
-
{metrics.disparate_impact.interpretation || 'Ratio of positive rates between groups'}
-
- Fair Range: {metrics.disparate_impact.threshold || 0.8} - {(1/(metrics.disparate_impact.threshold || 0.8)).toFixed(2)} - {metrics.disparate_impact.fair - ? " β€’ This ratio indicates balanced treatment across groups." - : " β€’ Ratio outside fair range suggests one group receives significantly different outcomes."} -
-
- )} - - {/* Statistical Parity */} - {metrics?.statistical_parity_difference?.value !== undefined && ( -
-
-
-
STATISTICAL PARITY DIFFERENCE
-
- {metrics.statistical_parity_difference.value.toFixed(3)} -
-
-
- {metrics.statistical_parity_difference.fair ? 'βœ“ FAIR' : 'βœ— UNFAIR'} -
-
-
{metrics.statistical_parity_difference.interpretation || 'Difference in positive rates'}
-
- Fair Threshold: Β±{metrics.statistical_parity_difference.threshold || 0.1} - {metrics.statistical_parity_difference.fair - ? " β€’ Difference within acceptable range for equal treatment." - : " β€’ Significant difference in positive outcome rates between groups."} -
-
- )} - - {/* Group Metrics */} - {metrics.group_metrics && ( -
-
GROUP PERFORMANCE
-
- {Object.entries(metrics.group_metrics).map(([group, groupMetrics]: [string, any]) => ( -
-
{group}
-
-
Positive Rate: {groupMetrics.positive_rate !== undefined ? (groupMetrics.positive_rate * 100).toFixed(1) : 'N/A'}%
-
Sample Size: {groupMetrics.sample_size ?? 'N/A'}
- {groupMetrics.tpr !== undefined &&
True Positive Rate: {(groupMetrics.tpr * 100).toFixed(1)}%
} -
-
- ))} -
-
- )} -
- ))} -
- )} + try { + const result = await cleanDataset(uploadedFile); + setCleanResult(result); + setProgressLabel("Cleaning complete!"); + } catch (err: any) { + setError(err.message || "Cleaning failed"); + } finally { + setIsProcessing(false); + } + }; + function tryParseCSV( + text: string, + maxRows = 50, + maxCols = 40, + ): TablePreviewData | null { + const lines = text.split(/\r?\n/).filter((l) => l.trim().length > 0); + if (lines.length < 2) return null; + const commaDensity = lines + .slice(0, 10) + .filter((l) => l.includes(",")).length; + if (commaDensity < 2) return null; + const parseLine = (line: string) => { + const out: string[] = []; + let cur = ""; + let inQuotes = false; + for (let i = 0; i < line.length; i++) { + const ch = line[i]; + if (ch === '"') { + if (inQuotes && line[i + 1] === '"') { + cur += '"'; + i++; + } else { + inQuotes = !inQuotes; + } + } else if (ch === "," && !inQuotes) { + out.push(cur); + cur = ""; + } else { + cur += ch; + } + } + out.push(cur); + return out.map((c) => c.trim()); + }; + const raw = lines.slice(0, maxRows).map(parseLine); + if (raw.length === 0) return null; + const headers = raw[0]; + const colCount = Math.min(headers.length, maxCols); + const rows = raw.slice(1).map((r) => r.slice(0, colCount)); + return { headers: headers.slice(0, colCount), rows, origin: "csv" }; + } - {/* Violations */} - {analyzeResult.bias_metrics.violations_detected.length > 0 && ( -
-

- ⚠️ - Fairness Violations Detected -

-
- {analyzeResult.bias_metrics.violations_detected.map((violation: any, i: number) => { - // Map bias violations to relevant GDPR articles - const gdprArticles = [ - { - article: 'Article 5(1)(a) - Lawfulness, Fairness, and Transparency', - explanation: 'Personal data must be processed fairly. Algorithmic bias violates the fairness principle.' - }, - { - article: 'Article 22 - Automated Decision-Making', - explanation: 'Individuals have the right not to be subject to decisions based solely on automated processing that produce legal or similarly significant effects, especially if discriminatory.' - }, - { - article: 'Recital 71 - Safeguards Against Discrimination', - explanation: 'Automated decision-making should not be based on special categories of data and should include safeguards to prevent discriminatory effects.' - } - ]; + // We no longer build table preview for JSON; revert JSON to raw text view. - // Add ECOA if dealing with credit/lending - const isCredit = violation.attribute && ( - violation.attribute.toLowerCase().includes('credit') || - violation.attribute.toLowerCase().includes('loan') || - violation.attribute.toLowerCase().includes('income') - ); + const processFile = useCallback(async (f: File) => { + if (!f) return; + const isCSV = /\.csv$/i.test(f.name); - return ( -
- {/* Violation Header */} -
- - {violation.severity} - -
-
- {violation.attribute}: {violation.metric} -
-
{violation.message}
-
-
+ if (!isCSV) { + try { + setProgressLabel("Uploading file..."); + setIsProcessing(true); + setError(null); - {/* Violation Details */} - {violation.details && ( -
-
πŸ“Š TECHNICAL DETAILS
-
{violation.details}
-
- )} + const formData = new FormData(); + formData.append("file", f); - {/* GDPR Articles Violated */} -
-
- βš–οΈ - GDPR ARTICLES VIOLATED -
-
- {gdprArticles.map((gdpr, idx) => ( -
-
{gdpr.article}
-
{gdpr.explanation}
-
- ))} - {isCredit && ( -
-
ECOA (Equal Credit Opportunity Act)
-
- Prohibits discrimination in credit decisions based on protected characteristics. This bias violation may constitute illegal discrimination. -
-
- )} -
-
+ const res = await fetch("http://localhost:8000/api/files", { + method: "POST", + body: formData, + }); - {/* Recommendations */} -
-
βœ“ RECOMMENDED ACTIONS
-
    -
  • - β€’ - Investigate and remediate bias in the {violation.attribute} attribute -
  • -
  • - β€’ - Implement fairness constraints during model training -
  • -
  • - β€’ - Consider rebalancing dataset or applying bias mitigation techniques -
  • -
  • - β€’ - Document fairness assessment in GDPR Article 35 DPIA (Data Protection Impact Assessment) -
  • - {violation.severity === 'HIGH' && ( -
  • - β€’ - URGENT: This high-severity violation requires immediate attention before deployment -
  • - )} -
-
-
- ); - })} -
-
- )} + if (!res.ok) { + const errText = await res.text(); + throw new Error(errText || "Upload failed"); + } - {/* Key Insights */} -
-

- πŸ’‘ - Key Insights -

-
    -
  • - β€’ - Bias Score {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}% indicates - {analyzeResult.bias_metrics.overall_bias_score < 0.3 ? ' strong fairness with minimal disparities across groups.' - : analyzeResult.bias_metrics.overall_bias_score < 0.5 ? ' moderate disparities that should be monitored and addressed.' - : ' significant unfairness requiring immediate remediation before deployment.'} -
  • -
  • - β€’ - Model achieves {(analyzeResult.model_performance.accuracy * 100).toFixed(1)}% accuracy, - but fairness metrics reveal how performance varies across demographic groups. -
  • - {analyzeResult.bias_metrics.violations_detected.length > 0 ? ( -
  • - β€’ - {analyzeResult.bias_metrics.violations_detected.length} violation(s) detected. - Review mitigation tab for recommended actions to improve fairness. -
  • - ) : ( -
  • - β€’ - No violations detected. Model meets fairness thresholds across all protected attributes. -
  • - )} -
-
-
- ) : ( -
-
πŸ“Š
-

No analysis results yet

-

Upload a dataset and click "Analyze" to see bias and fairness metrics

-
- )} -
- ); - case "risk-analysis": - return ( -
- {analyzeResult ? ( -
- {/* Header: RISK ANALYSIS SUMMARY */} -
-
-
-
- πŸ”’ -

RISK ANALYSIS SUMMARY

-
- -
- {/* Overall Risk */} -
-
πŸ“Š Overall Risk
-
- {(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}% -
-
- {analyzeResult.risk_assessment.risk_level} -
-
+ // The backend returns a CSV file + const blob = await res.blob(); + const downloadUrl = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = downloadUrl; + a.download = "dataset.csv"; + document.body.appendChild(a); + a.click(); + a.remove(); - {/* Presidio Status */} -
-
πŸ”’ Detection Engine
-
- {analyzeResult.risk_assessment.presidio_enabled ? 'Presidio' : 'Regex'} -
-
- - {analyzeResult.risk_assessment.presidio_enabled ? 'Enhanced' : 'Standard'} -
-
+ setProgressLabel("File processed successfully"); + } catch (err: any) { + setError(err.message || "File processing failed"); + } finally { + setIsProcessing(false); + } + return; // stop further processing since we’re done + } - {/* Violations */} -
-
⚠️ Violations
-
0 - ? 'text-red-400' - : 'text-green-400' - }`}> - {analyzeResult.risk_assessment.violations?.length || 0} -
-
- {(analyzeResult.risk_assessment.violations?.filter((v: any) => v.severity === 'CRITICAL').length || 0)} Critical Issues -
-
-
-
-
+ setProgress(0); + setUploadedFile(f); // Save the file for API calls - {/* Risk Categories Grid with Enhanced Design */} -
-
- πŸ“ˆ -

Category Scores

-
- -
- {Object.entries(analyzeResult.risk_assessment.risk_categories || {}).map(([category, score]: [string, any]) => { - const riskPct = (score * 100); - const riskLevel = riskPct >= 70 ? 'CRITICAL' : riskPct >= 50 ? 'HIGH' : riskPct >= 30 ? 'MEDIUM' : 'LOW'; - const categoryConfig: Record = { - privacy: { icon: 'οΏ½', label: 'Privacy', color: 'blue' }, - ethical: { icon: '🟠', label: 'Ethical', color: 'purple' }, - compliance: { icon: 'οΏ½', label: 'Compliance', color: 'indigo' }, - security: { icon: 'οΏ½', label: 'Security', color: 'cyan' }, - operational: { icon: '🟠', label: 'Operational', color: 'orange' }, - data_quality: { icon: 'οΏ½', label: 'Data Quality', color: 'green' } - }; - - const config = categoryConfig[category] || { icon: 'πŸ“Œ', label: category, color: 'slate' }; - - // Dynamic emoji based on risk level - const riskEmoji = riskPct < 25 ? '🟒' : riskPct < 50 ? '🟑' : '🟠'; - - return ( -
-
- -
-
- {riskEmoji} - - {riskLevel} - -
- -
- {config.label} -
- -
- {riskPct.toFixed(1)}% -
- - {/* Progress Bar */} -
-
-
-
-
-
-
- ); - })} -
-
+ // For large files, show a progress bar while reading the file stream (no preview) + if (f.size > 1024 * 1024) { + setProgressLabel("Uploading"); + const metaObj: UploadedFileMeta = { + name: f.name, + size: f.size, + type: f.type || "unknown", + contentPreview: `Loading partial preview (first ${Math.round(PREVIEW_BYTES / 1024)}KB)...`, + }; + setFileMeta(metaObj); + setTablePreview(null); + // Save to IndexedDB immediately so it persists without needing full read + (async () => { + try { + await saveLatestUpload(f, metaObj); + } catch {} + })(); + // Read head slice for partial preview & possible CSV table extraction + try { + const headBlob = f.slice(0, PREVIEW_BYTES); + const headReader = new FileReader(); + headReader.onload = async () => { + try { + const buf = headReader.result as ArrayBuffer; + const decoder = new TextDecoder(); + const text = decoder.decode(buf); + setFileMeta((prev) => + prev ? { ...prev, contentPreview: text.slice(0, 4000) } : prev, + ); + if (isCSV) { + const parsed = tryParseCSV(text); + setTablePreview(parsed); + } else { + setTablePreview(null); + } + try { + await saveLatestUpload(f, { + ...metaObj, + contentPreview: text.slice(0, 4000), + }); + } catch {} + } catch { + /* ignore */ + } + }; + headReader.readAsArrayBuffer(headBlob); + } catch { + /* ignore */ + } + // Use streaming read for progress without buffering entire file in memory + try { + const stream: ReadableStream | undefined = + typeof (f as any).stream === "function" + ? (f as any).stream() + : undefined; + if (stream && typeof stream.getReader === "function") { + const reader = stream.getReader(); + let loaded = 0; + const total = f.size || 1; + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + loaded += value ? value.length : 0; + const pct = Math.min(100, Math.round((loaded / total) * 100)); + setProgress(pct); + } + setProgress(100); + } else { + // Fallback to FileReader progress events + const reader = new FileReader(); + reader.onprogress = (evt) => { + if (evt.lengthComputable) { + const pct = Math.min( + 100, + Math.round((evt.loaded / evt.total) * 100), + ); + setProgress(pct); + } else { + setProgress((p) => (p < 90 ? p + 5 : p)); + } + }; + reader.onloadend = () => setProgress(100); + reader.onerror = () => setProgress(0); + reader.readAsArrayBuffer(f); + } + } catch { + setProgress(100); + } + return; + } + const reader = new FileReader(); + reader.onprogress = (evt) => { + if (evt.lengthComputable) { + const pct = Math.min(100, Math.round((evt.loaded / evt.total) * 100)); + setProgress(pct); + } else { + setProgress((p) => (p < 90 ? p + 5 : p)); + } + }; + reader.onload = async () => { + try { + const buf = reader.result as ArrayBuffer; + const decoder = new TextDecoder(); + const text = decoder.decode(buf); + const metaObj: UploadedFileMeta = { + name: f.name, + size: f.size, + type: f.type || "unknown", + contentPreview: text.slice(0, 4000), + }; + setFileMeta(metaObj); + if (isCSV) { + const parsed = tryParseCSV(text); + setTablePreview(parsed); + } else { + setTablePreview(null); + } + // Save file blob and meta to browser cache (IndexedDB) + try { + await saveLatestUpload(f, metaObj); + } catch {} + setProgressLabel("Processing"); + setProgress(100); + } catch (e) { + const metaObj: UploadedFileMeta = { + name: f.name, + size: f.size, + type: f.type || "unknown", + contentPreview: "Unable to decode preview.", + }; + setFileMeta(metaObj); + setTablePreview(null); + try { + await saveLatestUpload(f, metaObj); + } catch {} + setProgressLabel("Processing"); + setProgress(100); + } + }; + reader.onerror = () => { + setProgress(0); + }; + reader.readAsArrayBuffer(f); + }, []); - {/* Risky Features Analysis - Feature-Level Risk Display */} - {analyzeResult.risk_assessment.privacy_risks && ( -
-
- ⚠️ -

Risky Features & Columns

- - {typeof analyzeResult.risk_assessment.privacy_risks === 'object' && !Array.isArray(analyzeResult.risk_assessment.privacy_risks) - ? (analyzeResult.risk_assessment.privacy_risks.pii_count || 0) - : (Array.isArray(analyzeResult.risk_assessment.privacy_risks) ? analyzeResult.risk_assessment.privacy_risks.length : 0)} Risky Features Found - -
+ function handleFileChange(e: React.ChangeEvent) { + const f = e.target.files?.[0]; + processFile(f as File); + } - {/* Risky Features List */} - {(typeof analyzeResult.risk_assessment.privacy_risks === 'object' && - !Array.isArray(analyzeResult.risk_assessment.privacy_risks) && - analyzeResult.risk_assessment.privacy_risks.pii_detected && - analyzeResult.risk_assessment.privacy_risks.pii_detected.length > 0) ? ( -
- {/* Privacy Risk Metrics Summary */} -
-
-
Re-Identification Risk
-
0.7 ? 'text-red-600' : - (analyzeResult.risk_assessment.privacy_risks.reidentification_risk || 0) > 0.4 ? 'text-orange-600' : - 'text-green-600' - }`}> - {analyzeResult.risk_assessment.privacy_risks.reidentification_risk - ? (analyzeResult.risk_assessment.privacy_risks.reidentification_risk * 100).toFixed(0) - : 0}% -
-
Can individuals be identified?
-
-
-
Data Minimization
-
0.7 ? 'text-green-600' : - (analyzeResult.risk_assessment.privacy_risks.data_minimization_score || 0) > 0.4 ? 'text-orange-600' : - 'text-red-600' - }`}> - {analyzeResult.risk_assessment.privacy_risks.data_minimization_score - ? (analyzeResult.risk_assessment.privacy_risks.data_minimization_score * 100).toFixed(0) - : 0}% -
-
Collecting only necessary data
-
-
-
Anonymization Level
-
- {analyzeResult.risk_assessment.privacy_risks.anonymization_level || 'NONE'} -
-
Protection applied
-
-
-
Detection Method
-
- {analyzeResult.risk_assessment.privacy_risks.detection_method || 'Auto'} -
-
Analysis engine used
-
-
+ const onDragOver = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(true); + }; + const onDragLeave = () => setIsDragging(false); + const onDrop = (e: React.DragEvent) => { + e.preventDefault(); + setIsDragging(false); + const f = e.dataTransfer.files?.[0]; + processFile(f as File); + }; - {/* Individual Risky Features */} -
-

- πŸ” Detailed Feature Risk Analysis -

- {analyzeResult.risk_assessment.privacy_risks.pii_detected.map((pii: any, idx: number) => { - // Map PII types to risk explanations with GDPR Article references - const riskExplanations: Record = { - 'EMAIL_ADDRESS': { - why: 'Email addresses are direct identifiers that can be used to contact and track individuals across systems, creating privacy risks.', - impact: 'HIGH RISK: Can lead to identity theft, phishing attacks, unauthorized marketing, and privacy violations under GDPR Article 6.', - gdprArticles: [ - 'Article 4(1) - Definition of Personal Data: Email is personally identifiable information', - 'Article 6 - Lawful Basis Required: Processing requires consent, contract, or legitimate interest', - 'Article 7 - Consent Conditions: Must obtain explicit, informed consent', - 'Article 17 - Right to Erasure: Users can request email deletion', - 'Article 21 - Right to Object: Users can opt out of email processing' - ], - actions: ['Encrypt email addresses', 'Hash or pseudonymize for analytics', 'Implement consent management', 'Enable right to erasure', 'Provide opt-out mechanisms'] - }, - 'EMAIL': { - why: 'Email addresses are direct identifiers that can be used to contact and track individuals across systems.', - impact: 'HIGH RISK: Can lead to identity theft, phishing attacks, unauthorized marketing, and privacy violations.', - gdprArticles: [ - 'Article 4(1) - Personal Data Definition', - 'Article 6 - Lawful Basis for Processing', - 'Article 7 - Conditions for Consent', - 'Article 17 - Right to Erasure' - ], - actions: ['Encrypt email addresses', 'Implement consent management', 'Enable deletion on request', 'Apply data minimization'] - }, - 'PHONE_NUMBER': { - why: 'Phone numbers directly identify individuals and enable real-time contact, creating opportunities for harassment and fraud.', - impact: 'HIGH RISK: Enables unwanted contact, harassment, SIM swapping attacks, location tracking, and telemarketing violations.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Phone numbers identify natural persons', - 'Article 6 - Lawfulness of Processing: Requires lawful basis', - 'Article 32 - Security of Processing: Must implement appropriate security measures', - 'Article 21 - Right to Object to Processing', - 'ePrivacy Directive - Consent required for electronic communications' - ], - actions: ['Remove if not essential', 'Apply tokenization', 'Restrict access controls', 'Implement call verification', 'Enable number suppression'] - }, - 'PHONE': { - why: 'Phone numbers are direct personal identifiers enabling contact and tracking.', - impact: 'HIGH RISK: Harassment, fraud, and unauthorized marketing.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 6 - Lawful Processing', - 'Article 32 - Security Measures' - ], - actions: ['Tokenize phone numbers', 'Implement access controls', 'Enable opt-out'] - }, - 'PERSON': { - why: 'Personal names are primary identifiers. Combined with other quasi-identifiers (age, location), they enable complete re-identification.', - impact: 'MEDIUM-HIGH RISK: When combined with location, age, or other quasi-identifiers, creates high re-identification risk violating k-anonymity.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Names identify natural persons', - 'Article 5(1)(c) - Data Minimization: Collect only necessary data', - 'Article 5(1)(e) - Storage Limitation: Keep only as long as necessary', - 'Article 25 - Data Protection by Design and Default', - 'Article 32(1)(a) - Pseudonymization and encryption requirements' - ], - actions: ['Use pseudonyms or IDs', 'Apply k-anonymity techniques (kβ‰₯5)', 'Separate name from sensitive attributes', 'Implement access logging', 'Apply l-diversity for protection'] - }, - 'NAME': { - why: 'Names are direct personal identifiers that enable individual identification.', - impact: 'MEDIUM-HIGH RISK: Re-identification when combined with other data.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 5(1)(c) - Data Minimization', - 'Article 25 - Data Protection by Design' - ], - actions: ['Use pseudonyms', 'Apply k-anonymity', 'Implement access logging'] - }, - 'LOCATION': { - why: 'Location data reveals where individuals live, work, and travel, exposing personal patterns, habits, and sensitive locations (hospitals, religious sites).', - impact: 'HIGH RISK: Can expose home addresses, workplaces, medical facilities, places of worship, creating discrimination and stalking risks.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Location identifies individuals', - 'Article 9(1) - Special Categories: Location at sensitive sites reveals protected characteristics', - 'Article 32 - Security Measures: Encryption and access controls required', - 'Article 35 - Data Protection Impact Assessment: Required for location tracking', - 'Recital 30 - Online identifiers and location data' - ], - actions: ['Generalize to zip code or city level', 'Apply geographic masking', 'Remove precise coordinates', 'Implement geofencing', 'Conduct DPIA', 'Apply differential privacy'] - }, - 'ADDRESS': { - why: 'Physical addresses directly identify individuals and their home locations.', - impact: 'HIGH RISK: Enables stalking, burglary, and privacy violations.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 9 - Special Categories (if sensitive location)', - 'Article 32 - Security Measures' - ], - actions: ['Generalize to zip code', 'Apply geographic masking', 'Restrict access'] - }, - 'SSN': { - why: 'Social Security Numbers are PERMANENT unique identifiers used across critical systems (banking, taxes, healthcare, employment).', - impact: 'CRITICAL RISK: Enables complete identity theft, fraudulent credit, tax fraud, medical identity theft, and unauthorized government benefits access.', - gdprArticles: [ - 'Article 9(1) - Special Category Data: Often linked to health/financial data', - 'Article 32 - Security of Processing: Encryption, access controls, pseudonymization mandatory', - 'Article 33 - Breach Notification: Immediate notification required', - 'Article 34 - Data Subject Notification: Notify individuals of breaches', - 'Article 35 - Data Protection Impact Assessment: DPIA required', - 'Recital 75 - High risk to rights and freedoms' - ], - actions: ['REMOVE IMMEDIATELY if possible', 'Encrypt with AES-256', 'Never display in full', 'Implement strict access controls', 'Conduct DPIA', 'Enable breach detection', 'Maintain audit logs'] - }, - 'US_SSN': { - why: 'US Social Security Numbers are permanent government identifiers linked to financial, medical, employment, and government benefits.', - impact: 'CRITICAL RISK: Highest identity theft risk. Compromise leads to decades of fraud, financial damage, and cannot be changed.', - gdprArticles: [ - 'Article 9(1) - Special Category: Links to health and financial data', - 'Article 32 - Security Measures: State-of-the-art encryption required', - 'Article 33 - Breach Notification: 72-hour notification to supervisory authority', - 'Article 34 - Communication to Data Subjects: Immediate notification', - 'Article 35 - DPIA: Mandatory impact assessment' - ], - actions: ['Encrypt end-to-end with AES-256', 'Use last 4 digits only for display', 'Implement multi-factor authentication', 'Enable breach detection', 'Create comprehensive audit trails', 'Apply tokenization', 'Conduct annual security audits'] - }, - 'CREDIT_CARD': { - why: 'Credit card numbers provide direct access to financial accounts and purchasing power, subject to PCI-DSS and GDPR.', - impact: 'CRITICAL RISK: Financial fraud, unauthorized transactions, PCI-DSS violations (fines up to $500K/month), GDPR violations (4% global revenue).', - gdprArticles: [ - 'Article 4(1) - Personal Data: Financial identifiers', - 'Article 32 - Security of Processing: PCI-DSS Level 1 compliance mandatory', - 'Article 33 - Breach Notification: Immediate reporting required', - 'Article 34 - Data Subject Notification', - 'PCI-DSS Standards: Cannot store CVV, must tokenize' - ], - actions: ['Tokenize immediately', 'Never store CVV/CVC', 'Use PCI-compliant vault', 'Implement fraud detection', 'Apply end-to-end encryption', 'Use 3D Secure', 'Maintain PCI-DSS certification', 'Conduct quarterly security scans'] - }, - 'CARD': { - why: 'Card numbers enable direct financial access.', - impact: 'CRITICAL RISK: Financial fraud and PCI-DSS violations.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 32 - Security Measures', - 'PCI-DSS Compliance' - ], - actions: ['Tokenize immediately', 'Use PCI-compliant vault', 'Never store CVV'] - }, - 'IP_ADDRESS': { - why: 'IP addresses are online identifiers that track user behavior, reveal location, and enable device fingerprinting across websites.', - impact: 'MEDIUM RISK: Enables tracking across websites, reveals approximate location, can be linked to individuals, violates ePrivacy Directive.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Online identifier', - 'Article 6 - Lawful Basis: Requires consent or legitimate interest', - 'ePrivacy Directive - Consent for cookies and tracking', - 'Recital 30 - Online identifiers and IP addresses', - 'Article 21 - Right to Object to profiling' - ], - actions: ['Truncate last octet for IPv4', 'Hash for analytics', 'Implement IP anonymization', 'Reduce retention period to 90 days', 'Provide opt-out for tracking', 'Apply differential privacy'] - }, - 'IP': { - why: 'IP addresses are online identifiers enabling tracking.', - impact: 'MEDIUM RISK: Cross-site tracking and location revelation.', - gdprArticles: [ - 'Article 4(1) - Online Identifier', - 'Article 6 - Lawful Basis', - 'ePrivacy Directive' - ], - actions: ['Truncate IP addresses', 'Hash for analytics', 'Reduce retention'] - }, - 'MEDICAL_LICENSE': { - why: 'Medical information is SPECIAL CATEGORY DATA under GDPR Article 9, requiring the highest level of protection due to discrimination risks.', - impact: 'CRITICAL RISK: Health data breach leads to discrimination, insurance denial, employment issues, severe privacy violations, and HIPAA fines.', - gdprArticles: [ - 'Article 9(1) - Special Category (Health Data): Explicit consent required', - 'Article 9(2)(h) - Health/social care exception', - 'Article 32 - Security of Processing: Encryption mandatory', - 'Article 35 - DPIA: Impact assessment required', - 'Article 25 - Data Protection by Design', - 'HIPAA Compliance (if applicable)' - ], - actions: ['Encrypt with healthcare-grade security (AES-256)', 'Implement role-based access control (RBAC)', 'Conduct Data Protection Impact Assessment', 'Apply strict retention policies', 'Ensure HIPAA compliance', 'Use de-identification techniques', 'Maintain comprehensive audit logs'] - }, - 'MEDICAL': { - why: 'Medical data is special category data requiring explicit consent.', - impact: 'CRITICAL RISK: Discrimination and severe privacy violations.', - gdprArticles: [ - 'Article 9(1) - Special Category (Health)', - 'Article 32 - Security', - 'Article 35 - DPIA Required' - ], - actions: ['Encrypt data', 'Implement RBAC', 'Conduct DPIA'] - }, - 'US_DRIVER_LICENSE': { - why: 'Driver license numbers are government-issued identifiers used for identity verification across financial, healthcare, and government systems.', - impact: 'HIGH RISK: Identity fraud, fake ID creation, unauthorized access to services, and DMV record access.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Government identifier', - 'Article 6 - Lawful Processing: Document lawful basis', - 'Article 32 - Security Measures: Encryption and access controls', - 'Article 15 - Right of Access: Individuals can request data', - 'Article 17 - Right to Erasure: Deletion on request' - ], - actions: ['Hash or encrypt license numbers', 'Limit to identity verification only', 'Never display in full', 'Implement verification logging', 'Apply pseudonymization', 'Enable deletion mechanisms'] - }, - 'LICENSE': { - why: 'License numbers are government identifiers.', - impact: 'HIGH RISK: Identity fraud and unauthorized access.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 6 - Lawful Processing', - 'Article 32 - Security' - ], - actions: ['Hash license numbers', 'Limit to verification', 'Never display in full'] - }, - 'US_PASSPORT': { - why: 'Passport numbers are international identity documents used for travel and high-security identification, recognized globally.', - impact: 'CRITICAL RISK: International identity fraud, unauthorized travel booking, visa fraud, and access to secure facilities.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Unique government identifier', - 'Article 32 - Security Measures: State-of-the-art encryption required', - 'Article 35 - Impact Assessment: DPIA for high-risk processing', - 'Article 5(1)(f) - Integrity and Confidentiality', - 'Cross-border data transfer regulations' - ], - actions: ['Encrypt with strong encryption (AES-256)', 'Restrict access to authorized personnel only', 'Implement tamper detection', 'Apply geographic access controls', 'Maintain detailed audit trails', 'Use tokenization', 'Implement MFA for access'] - }, - 'PASSPORT': { - why: 'Passport numbers enable international identification.', - impact: 'CRITICAL RISK: International fraud and unauthorized travel.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 32 - Security Measures', - 'Article 35 - Impact Assessment' - ], - actions: ['Encrypt passports', 'Restrict access', 'Implement tamper detection'] - }, - 'US_BANK_NUMBER': { - why: 'Bank account numbers provide DIRECT ACCESS to financial accounts and enable ACH transfers, wire transfers, and direct debits.', - impact: 'CRITICAL RISK: Unauthorized withdrawals, ACH fraud, wire transfer fraud, complete account takeover, and financial ruin.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Financial identifier', - 'Article 32 - Security Measures: Encryption and tokenization mandatory', - 'Article 33 - Breach Notification: 72-hour notification', - 'Article 34 - Data Subject Notification: Immediate alert to account holders', - 'PSD2 - Strong Customer Authentication required' - ], - actions: ['Tokenize immediately', 'Never display account numbers', 'Use secure payment gateways', 'Implement transaction monitoring', 'Apply multi-factor authentication', 'Use Strong Customer Authentication (SCA)', 'Enable fraud alerts', 'Encrypt at rest and in transit'] - }, - 'BANK_ACCOUNT': { - why: 'Bank account numbers enable direct financial access.', - impact: 'CRITICAL RISK: Financial fraud and account takeover.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 32 - Security Measures', - 'Article 33 - Breach Notification' - ], - actions: ['Tokenize accounts', 'Never display numbers', 'Implement MFA'] - }, - 'DOB': { - why: 'Date of birth is a quasi-identifier that combined with other data enables re-identification and age-based discrimination.', - impact: 'MEDIUM-HIGH RISK: Combined with name and zip code, enables 87% re-identification rate. Age discrimination risk.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Quasi-identifier', - 'Article 5(1)(c) - Data Minimization: Use age ranges instead', - 'Article 9 - Special Categories: Can reveal protected characteristics', - 'Article 22 - Automated Decision-Making: Age-based profiling restrictions', - 'Recital 26 - Pseudonymization reduces risks' - ], - actions: ['Use age ranges instead of exact DOB', 'Apply k-anonymity (kβ‰₯5)', 'Generalize to year or month', 'Separate from other identifiers', 'Implement access controls', 'Apply l-diversity'] - }, - 'ZIP_CODE': { - why: 'ZIP codes are geographic quasi-identifiers. Research shows 87% of US population uniquely identified by ZIP + DOB + Gender.', - impact: 'MEDIUM RISK: When combined with DOB and gender, enables 87% re-identification. Reveals socioeconomic status and demographics.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Quasi-identifier', - 'Article 5(1)(c) - Data Minimization', - 'Article 32(1)(a) - Pseudonymization', - 'Recital 26 - Anonymization techniques', - 'Article 25 - Data Protection by Default' - ], - actions: ['Generalize to first 3 digits', 'Use geographic aggregation', 'Apply k-anonymity', 'Combine with other anonymization techniques', 'Separate from name and DOB'] - }, - 'IBAN_CODE': { - why: 'IBAN (International Bank Account Number) provides access to bank accounts across European Economic Area.', - impact: 'CRITICAL RISK: International financial fraud, SEPA direct debit fraud, and cross-border money theft.', - gdprArticles: [ - 'Article 4(1) - Personal Data', - 'Article 32 - Security of Processing', - 'Article 33 - Breach Notification', - 'PSD2 - Strong Customer Authentication' - ], - actions: ['Tokenize IBAN', 'Implement SCA', 'Use secure payment processors', 'Enable fraud monitoring', 'Apply encryption'] - }, - 'CRYPTO': { - why: 'Cryptocurrency addresses and wallets are permanent financial identifiers that cannot be changed if compromised.', - impact: 'CRITICAL RISK: Irreversible financial theft, no fraud protection, transaction history exposure, wallet draining.', - gdprArticles: [ - 'Article 4(1) - Personal Data: Cryptocurrency addresses can identify individuals', - 'Article 5(1)(f) - Security Principle', - 'Article 32 - Security Measures: Multi-signature and cold storage', - 'Article 17 - Right to Erasure: Blockchain immutability challenges' - ], - actions: ['Use multi-signature wallets', 'Implement cold storage', 'Never display private keys', 'Use hardware security modules', 'Apply address rotation', 'Implement withdrawal limits'] - } - }; + // Load last cached upload on mount (processing tab only) + useEffect(() => { + let ignore = false; + if (tab !== "processing") return; + (async () => { + try { + const { file, meta } = await getLatestUpload(); + if (!ignore && meta) { + setFileMeta(meta as UploadedFileMeta); + if (file) { + setUploadedFile(file); + } + setLoadedFromCache(true); + } + } catch {} + })(); + return () => { + ignore = true; + }; + }, [tab]); + function renderTabContent() { + switch (tab) { + case "processing": + return ( +
+

Upload & Process Data

+

+ Upload a CSV / JSON / text file. We will later parse, detect PII, + and queue analyses. +

+
+
+

+ Drag & drop a CSV / JSON / TXT here, or click to browse. +

+
+ +
+
+ + {progress > 0 && ( +
+
+
+
+
+ {progressLabel} {progress}% +
+
+ )} + {fileMeta && ( +
+
+
{fileMeta.name}
+
+ {Math.round(fileMeta.size / 1024)} KB +
+
+ {loadedFromCache && ( +
+ Loaded from browser cache +
+ )} +
+ {fileMeta.type || "Unknown type"} +
+ {/* Table preview when structured data detected; otherwise show text */} + {tablePreview && tablePreview.origin === "csv" ? ( +
+ + + + {tablePreview.headers.map((h, idx) => ( + + ))} + + + + {tablePreview.rows.map((r, i) => ( + + {r.map((c, j) => ( + + ))} + + ))} + +
+ {h} +
+ {c} +
+
+ ) : ( +
+                      {fileMeta.contentPreview || "(no preview)"}
+                    
+ )} - // Fallback for unmapped PII types - const riskInfo = riskExplanations[pii.type] || riskExplanations[pii.type.toUpperCase()] || { - why: 'This data type contains personal information that could identify individuals or reveal sensitive patterns according to GDPR Article 4(1).', - impact: 'POTENTIAL RISK: May violate privacy regulations if not properly protected. Could enable tracking, profiling, or discrimination.', - gdprArticles: [ - 'Article 4(1) - Definition of Personal Data', - 'Article 5 - Principles: Lawfulness, Fairness, Transparency', - 'Article 6 - Lawful Basis Required for Processing', - 'Article 24 - Responsibility of the Controller', - 'Article 25 - Data Protection by Design and Default' - ], - actions: ['Review necessity of this data field', 'Apply appropriate anonymization techniques', 'Implement access controls and audit logging', 'Document lawful basis for processing', 'Conduct Privacy Impact Assessment'] - }; + {error && ( +
+ ❌ {error} +
+ )} - return ( -
-
- -
- {/* Feature Header */} -
-
-
- - {pii.severity === 'CRITICAL' ? 'πŸ”΄' : - pii.severity === 'HIGH' ? '🟠' : - pii.severity === 'MEDIUM' ? '🟑' : 'πŸ”΅'} - -
-
- {pii.column} -
-
- PII Type: {pii.type.replace(/_/g, ' ')} - {pii.occurrences && ( - <> - β€’ - Found in: {pii.occurrences} rows - - )} - {pii.confidence && ( - <> - β€’ - Confidence: {(pii.confidence * 100).toFixed(0)}% - - )} -
-
-
-
- - {pii.severity} RISK - -
+ {piiDetectionResult && ( +
+ πŸ” PII Detection complete! Found{" "} + {piiDetectionResult.summary.risky_columns_found} risky + columns in {piiDetectionResult.file_type.toUpperCase()}{" "} + file. +
+ + {piiDetectionResult.summary.high_risk_count} HIGH + {" "} + β€’ + + {piiDetectionResult.summary.medium_risk_count} MEDIUM + {" "} + β€’ + + {piiDetectionResult.summary.low_risk_count} LOW + +
+

+ Review detected risks in the "Bias & Risk Mitigation" + tab to choose anonymization strategies. +

+
+ )} - {/* Why is this risky? */} -
-
- ❓ -
-
WHY IS THIS FEATURE RISKY?
-

{riskInfo.why}

-
-
-
+ {analyzeResult && ( +
+ βœ… Analysis complete! View results in tabs. + + Download Report + +
+ )} - {/* Impact */} -
-
- ⚠️ -
-
POTENTIAL IMPACT IF EXPOSED
-

{riskInfo.impact}

-
-
-
+ {cleanResult && ( +
+ βœ… Cleaning complete!{" "} + {cleanResult.summary.total_cells_affected} cells + anonymized. + +
+ )} - {/* GDPR Articles Violated */} -
-
- βš–οΈ -
-
GDPR ARTICLES VIOLATED / APPLICABLE
-
- {riskInfo.gdprArticles.map((article, i) => ( -
- β€’ - {article} -
- ))} -
-
-
-
+
+ + + +
+
+ )} +
+
+ ); + case "bias-analysis": + return ( +
+
+

+ Bias & Fairness Analysis +

+

+ Comprehensive evaluation of algorithmic fairness across + demographic groups +

+
- {/* Recommended Actions */} -
-
- βœ… -
-
RECOMMENDED ACTIONS TO REDUCE RISK
-
    - {riskInfo.actions.map((action, i) => ( -
  • - {i + 1}. - {action} -
  • - ))} -
-
-
-
-
-
- ); - })} -
-
- ) : ( -
- βœ“ -
-
No PII Detected
-
Dataset appears to be free of personally identifiable information
-
-
- )} -
- )} {/* Violations Section with Enhanced Design */} - {analyzeResult.risk_assessment.violations && - analyzeResult.risk_assessment.violations.length > 0 && ( -
-
- ⚠️ -

Violations

- - {analyzeResult.risk_assessment.violations.length} Issues Found - -
+ {analyzeResult ? ( +
+ {/* Overall Bias Score Card */} +
+
+
+
+ Overall Bias Score +
+
+ {( + analyzeResult.bias_metrics.overall_bias_score * 100 + ).toFixed(1)} + % +
+
+ {analyzeResult.bias_metrics.overall_bias_score < 0.3 ? ( + <> + + βœ“ Low Bias + + + Excellent fairness + + + ) : analyzeResult.bias_metrics.overall_bias_score < + 0.5 ? ( + <> + + ⚠ Moderate Bias + + + Monitor recommended + + + ) : ( + <> + + βœ— High Bias + + + Action required + + + )} +
+
+
+
+ Violations +
+
0 ? "text-red-600" : "text-green-600"}`} + > + {analyzeResult.bias_metrics.violations_detected.length} +
+
+
-
- {analyzeResult.risk_assessment.violations.map((violation: any, idx: number) => ( -
-
- -
-
-
- - {violation.severity} - - - {violation.category} - -
-
- -
- - {violation.severity === 'CRITICAL' ? 'πŸ”΄' : - violation.severity === 'HIGH' ? '🟠' : - violation.severity === 'MEDIUM' ? '🟑' : 'πŸ”΅'} - -
-
- {violation.message} -
- {violation.details && ( -
- {violation.details} -
- )} -
-
-
-
- ))} -
-
- )} + {/* Interpretation */} +
+
+ INTERPRETATION +
+

+ {analyzeResult.bias_metrics.overall_bias_score < 0.3 + ? "Your model demonstrates strong fairness across demographic groups. Continue monitoring to ensure consistent performance." + : analyzeResult.bias_metrics.overall_bias_score < 0.5 + ? "Moderate bias detected. Review fairness metrics below and consider implementing mitigation strategies to reduce disparities." + : "Significant bias detected. Immediate action required to address fairness concerns before deployment. Review all violation details below."} +

+
+
- {/* Key Insights Section with Enhanced Design */} - {analyzeResult.risk_assessment.insights && - analyzeResult.risk_assessment.insights.length > 0 && ( -
-
-
- -
-
- πŸ’‘ -

Key Insights

-
+ {/* Model Performance Metrics */} +
+

+ πŸ“Š + Model Performance Metrics +

+
+
+
+ ACCURACY +
+
+ {( + analyzeResult.model_performance.accuracy * 100 + ).toFixed(1)} + % +
+
+ Overall correctness +
+
+
+
+ PRECISION +
+
+ {( + analyzeResult.model_performance.precision * 100 + ).toFixed(1)} + % +
+
+ Positive prediction accuracy +
+
+
+
+ RECALL +
+
+ {(analyzeResult.model_performance.recall * 100).toFixed( + 1, + )} + % +
+
+ True positive detection rate +
+
+
+
+ F1 SCORE +
+
+ {( + analyzeResult.model_performance.f1_score * 100 + ).toFixed(1)} + % +
+
+ Balanced metric +
+
+
+
-
- {analyzeResult.risk_assessment.insights.map((insight: string, idx: number) => ( -
- β€’ - {insight} -
- ))} -
-
-
- )} + {/* Fairness Metrics */} + {Object.keys(analyzeResult.bias_metrics.disparate_impact) + .length > 0 && ( +
+

+ βš–οΈ + Fairness Metrics by Protected Attribute +

- {/* Compliance Status - Enhanced with GDPR Article Details */} - {analyzeResult.risk_assessment.compliance_risks && ( -
-
- πŸ“‹ -

Regulatory Compliance Status

-
+ {Object.entries( + analyzeResult.bias_metrics.disparate_impact, + ).map(([attr, metrics]: [string, any]) => ( +
+
+ + {attr.toUpperCase()} + +
-
- {Object.entries(analyzeResult.risk_assessment.compliance_risks) - .filter(([key]) => ['gdpr', 'ccpa', 'hipaa', 'ecoa'].includes(key)) - .map(([regulation, data]: [string, any]) => { - if (!data || typeof data !== 'object') return null; - - const regulationInfo: Record = { - gdpr: { - name: 'GDPR (General Data Protection Regulation)', - description: 'EU regulation protecting personal data and privacy', - keyArticles: [ - 'Article 5 - Principles (lawfulness, fairness, transparency, purpose limitation, data minimization)', - 'Article 6 - Lawful basis for processing', - 'Article 7 - Conditions for consent', - 'Article 9 - Processing special categories of personal data', - 'Article 15-22 - Data subject rights (access, rectification, erasure, portability)', - 'Article 25 - Data protection by design and by default', - 'Article 32 - Security of processing', - 'Article 35 - Data protection impact assessment' - ] - }, - ccpa: { - name: 'CCPA (California Consumer Privacy Act)', - description: 'California law providing privacy rights to consumers', - keyArticles: [ - 'Right to Know what personal information is collected', - 'Right to Delete personal information', - 'Right to Opt-Out of sale of personal information', - 'Right to Non-Discrimination for exercising CCPA rights', - 'Notice at Collection requirements' - ] - }, - hipaa: { - name: 'HIPAA (Health Insurance Portability and Accountability Act)', - description: 'US regulation protecting health information', - keyArticles: [ - 'Privacy Rule - Protected Health Information (PHI) safeguards', - 'Security Rule - Administrative, physical, technical safeguards', - 'Breach Notification Rule - Incident reporting requirements', - 'Minimum Necessary Standard - Access limitation' - ] - }, - ecoa: { - name: 'ECOA (Equal Credit Opportunity Act)', - description: 'US law prohibiting discrimination in credit decisions', - keyArticles: [ - 'Prohibition of discrimination based on protected characteristics', - 'Adverse action notice requirements', - 'Record retention requirements', - 'Monitoring and reporting obligations' - ] - } - }; + {/* Disparate Impact */} + {metrics?.disparate_impact?.value !== undefined && ( +
+
+
+
+ DISPARATE IMPACT RATIO +
+
+ {metrics.disparate_impact.value.toFixed(3)} +
+
+
+ {metrics.disparate_impact.fair + ? "βœ“ FAIR" + : "βœ— UNFAIR"} +
+
+
+ {metrics.disparate_impact.interpretation || + "Ratio of positive rates between groups"} +
+
+ Fair Range:{" "} + {metrics.disparate_impact.threshold || 0.8} -{" "} + {( + 1 / (metrics.disparate_impact.threshold || 0.8) + ).toFixed(2)} + {metrics.disparate_impact.fair + ? " β€’ This ratio indicates balanced treatment across groups." + : " β€’ Ratio outside fair range suggests one group receives significantly different outcomes."} +
+
+ )} - const info = regulationInfo[regulation] || { name: regulation.toUpperCase(), description: '', keyArticles: [] }; - - return ( -
- {/* Header */} -
-
-
-
- {info.name} -
- {info.description && ( -
{info.description}
- )} -
- - {data.status === 'NOT_APPLICABLE' ? 'N/A' : data.status} - -
-
+ {/* Statistical Parity */} + {metrics?.statistical_parity_difference?.value !== + undefined && ( +
+
+
+
+ STATISTICAL PARITY DIFFERENCE +
+
+ {metrics.statistical_parity_difference.value.toFixed( + 3, + )} +
+
+
+ {metrics.statistical_parity_difference.fair + ? "βœ“ FAIR" + : "βœ— UNFAIR"} +
+
+
+ {metrics.statistical_parity_difference + .interpretation || + "Difference in positive rates"} +
+
+ Fair Threshold: Β± + {metrics.statistical_parity_difference + .threshold || 0.1} + {metrics.statistical_parity_difference.fair + ? " β€’ Difference within acceptable range for equal treatment." + : " β€’ Significant difference in positive outcome rates between groups."} +
+
+ )} - {/* Content */} -
- {data.applicable === false ? ( -
- This regulation does not appear to apply to your dataset based on detected data types. -
- ) : ( -
- {/* Score */} - {data.score !== undefined && ( -
-
Compliance Score:
-
-
0.7 ? 'bg-green-500' : - data.score > 0.4 ? 'bg-yellow-500' : - 'bg-red-500' - }`} - style={{ width: `${data.score * 100}%` }} - >
-
-
- {(data.score * 100).toFixed(0)}% -
-
- )} + {/* Group Metrics */} + {metrics.group_metrics && ( +
+
+ GROUP PERFORMANCE +
+
+ {Object.entries(metrics.group_metrics).map( + ([group, groupMetrics]: [string, any]) => ( +
+
+ {group} +
+
+
+ Positive Rate:{" "} + + {groupMetrics.positive_rate !== + undefined + ? ( + groupMetrics.positive_rate * 100 + ).toFixed(1) + : "N/A"} + % + +
+
+ Sample Size:{" "} + + {groupMetrics.sample_size ?? "N/A"} + +
+ {groupMetrics.tpr !== undefined && ( +
+ True Positive Rate:{" "} + + {(groupMetrics.tpr * 100).toFixed( + 1, + )} + % + +
+ )} +
+
+ ), + )} +
+
+ )} +
+ ))} +
+ )} - {/* Compliant Checks */} - {data.compliant_checks && data.compliant_checks.length > 0 && ( -
-
βœ“ Compliant Areas:
-
- {data.compliant_checks.map((check: string, idx: number) => ( - - {check.replace(/_/g, ' ')} - - ))} -
-
- )} + {/* Violations */} + {analyzeResult.bias_metrics.violations_detected.length > 0 && ( +
+

+ ⚠️ + Fairness Violations Detected +

+
+ {analyzeResult.bias_metrics.violations_detected.map( + (violation: any, i: number) => { + // Map bias violations to relevant GDPR articles + const gdprArticles = [ + { + article: + "Article 5(1)(a) - Lawfulness, Fairness, and Transparency", + explanation: + "Personal data must be processed fairly. Algorithmic bias violates the fairness principle.", + }, + { + article: "Article 22 - Automated Decision-Making", + explanation: + "Individuals have the right not to be subject to decisions based solely on automated processing that produce legal or similarly significant effects, especially if discriminatory.", + }, + { + article: + "Recital 71 - Safeguards Against Discrimination", + explanation: + "Automated decision-making should not be based on special categories of data and should include safeguards to prevent discriminatory effects.", + }, + ]; - {/* Non-Compliant Checks */} - {data.non_compliant_checks && data.non_compliant_checks.length > 0 && ( -
-
⚠️ Non-Compliant Areas:
-
- {data.non_compliant_checks.map((check: string, idx: number) => ( - - {check.replace(/_/g, ' ')} - - ))} -
-
- )} + // Add ECOA if dealing with credit/lending + const isCredit = + violation.attribute && + (violation.attribute + .toLowerCase() + .includes("credit") || + violation.attribute + .toLowerCase() + .includes("loan") || + violation.attribute + .toLowerCase() + .includes("income")); - {/* Key Articles/Requirements */} - {info.keyArticles.length > 0 && ( -
- - πŸ“– View Key Requirements & Articles - -
- {info.keyArticles.map((article, idx) => ( -
- β€’ - {article} -
- ))} -
-
- )} + return ( +
+ {/* Violation Header */} +
+ + {violation.severity} + +
+
+ {violation.attribute}: {violation.metric} +
+
+ {violation.message} +
+
+
- {/* Bias Score for ECOA */} - {regulation === 'ecoa' && data.bias_score !== undefined && ( -
-
Bias Score (Discrimination Risk):
-
-
-
-
-
- {(data.bias_score * 100).toFixed(1)}% -
-
-
- {data.bias_score < 0.3 ? 'Low discrimination risk' : - data.bias_score < 0.5 ? 'Moderate discrimination risk - monitor closely' : - 'High discrimination risk - immediate remediation required'} -
-
- )} -
- )} -
-
- ); - })} -
+ {/* Violation Details */} + {violation.details && ( +
+
+ πŸ“Š TECHNICAL DETAILS +
+
+ {violation.details} +
+
+ )} - {/* Compliance Recommendations */} - {analyzeResult.risk_assessment.compliance_risks.recommendations && - analyzeResult.risk_assessment.compliance_risks.recommendations.length > 0 && ( -
-
πŸ“Œ Compliance Recommendations
-
- {analyzeResult.risk_assessment.compliance_risks.recommendations.map((rec: any, idx: number) => ( -
- - {rec.priority} - -
-
{rec.recommendation}
- {rec.rationale && ( -
{rec.rationale}
- )} -
-
- ))} -
-
- )} -
- )} -
- ) : ( -
- πŸ”’ -

No risk analysis results yet

-

Upload a dataset and click "Analyze" to see comprehensive risk assessment

-
- )} -
- ); - case "bias-risk-mitigation": - return ( -
-
-

PII Detection & Anonymization Strategy

-

Review detected risky features and choose how to anonymize them

-
- - {piiDetectionResult ? ( -
- {/* File Info Banner */} -
-
- File: - {piiDetectionResult.filename} - - {piiDetectionResult.file_type.toUpperCase()} - - - {piiDetectionResult.dataset_info.rows} rows Γ— {piiDetectionResult.dataset_info.columns} columns - -
-
+ {/* GDPR Articles Violated */} +
+
+ βš–οΈ + GDPR ARTICLES VIOLATED +
+
+ {gdprArticles.map((gdpr, idx) => ( +
+
+ {gdpr.article} +
+
+ {gdpr.explanation} +
+
+ ))} + {isCredit && ( +
+
+ ECOA (Equal Credit Opportunity Act) +
+
+ Prohibits discrimination in credit + decisions based on protected + characteristics. This bias violation may + constitute illegal discrimination. +
+
+ )} +
+
- {/* Summary Card */} -
-
-
-
TOTAL COLUMNS SCANNED
-
{piiDetectionResult.summary.total_columns_scanned}
-
-
-
HIGH RISK
-
{piiDetectionResult.summary.high_risk_count}
-
Must remove
-
-
-
MEDIUM RISK
-
{piiDetectionResult.summary.medium_risk_count}
-
Hash recommended
-
-
-
LOW RISK
-
{piiDetectionResult.summary.low_risk_count}
-
Mask/generalize
-
-
-
- {piiDetectionResult.message} -
-
+ {/* Recommendations */} +
+
+ βœ“ RECOMMENDED ACTIONS +
+
    +
  • + β€’ + + Investigate and remediate bias in the{" "} + {violation.attribute} attribute + +
  • +
  • + β€’ + + Implement fairness constraints during + model training + +
  • +
  • + β€’ + + Consider rebalancing dataset or applying + bias mitigation techniques + +
  • +
  • + β€’ + + Document fairness assessment in GDPR + Article 35 DPIA (Data Protection Impact + Assessment) + +
  • + {violation.severity === "HIGH" && ( +
  • + β€’ + + URGENT: This high-severity violation + requires immediate attention before + deployment + +
  • + )} +
+
+
+ ); + }, + )} +
+
+ )} - {/* Risky Features List */} -
- {piiDetectionResult.risky_features.map((feature, idx) => { - const riskColor = - feature.risk_level === 'HIGH' ? 'red' : - feature.risk_level === 'MEDIUM' ? 'orange' : - feature.risk_level === 'LOW' ? 'yellow' : 'gray'; - - const bgColor = - feature.risk_level === 'HIGH' ? 'bg-red-50 border-red-300' : - feature.risk_level === 'MEDIUM' ? 'bg-orange-50 border-orange-300' : - feature.risk_level === 'LOW' ? 'bg-yellow-50 border-yellow-300' : 'bg-gray-50 border-gray-300'; - - return ( -
- {/* Header */} -
-
-
- - {feature.risk_level} RISK - - {feature.column} -
-
- Detected: {feature.entity_type} - β€’ - Confidence: {(feature.confidence * 100).toFixed(1)}% - β€’ - Occurrences: {feature.detection_count} -
-
-
+ {/* Key Insights */} +
+

+ πŸ’‘ + Key Insights +

+
    +
  • + β€’ + + + Bias Score{" "} + {( + analyzeResult.bias_metrics.overall_bias_score * 100 + ).toFixed(1)} + % + {" "} + indicates + {analyzeResult.bias_metrics.overall_bias_score < 0.3 + ? " strong fairness with minimal disparities across groups." + : analyzeResult.bias_metrics.overall_bias_score < 0.5 + ? " moderate disparities that should be monitored and addressed." + : " significant unfairness requiring immediate remediation before deployment."} + +
  • +
  • + β€’ + + + Model achieves{" "} + {( + analyzeResult.model_performance.accuracy * 100 + ).toFixed(1)} + % accuracy + + , but fairness metrics reveal how performance varies + across demographic groups. + +
  • + {analyzeResult.bias_metrics.violations_detected.length > + 0 ? ( +
  • + β€’ + + + { + analyzeResult.bias_metrics.violations_detected + .length + }{" "} + violation(s) + {" "} + detected. Review mitigation tab for recommended + actions to improve fairness. + +
  • + ) : ( +
  • + β€’ + + No violations detected. Model meets + fairness thresholds across all protected attributes. + +
  • + )} +
+
+
+ ) : ( +
+
πŸ“Š
+

No analysis results yet

+

+ Upload a dataset and click "Analyze" to see bias and fairness + metrics +

+
+ )} +
+ ); + case "risk-analysis": + return ( +
+ {analyzeResult ? ( +
+ {/* Header: RISK ANALYSIS SUMMARY */} +
+
+
+
+ πŸ”’ +

+ RISK ANALYSIS SUMMARY +

+
- {/* Explanation */} -
-
WHY IS THIS RISKY?
-

{feature.explanation}

-
- GDPR Reference: {feature.gdpr_article} -
-
+
+ {/* Overall Risk */} +
+
+ πŸ“Š Overall Risk +
+
+ {( + analyzeResult.risk_assessment.overall_risk_score * + 100 + ).toFixed(1)} + % +
+
+ {analyzeResult.risk_assessment.risk_level} +
+
- {/* Sample Values */} - {feature.sample_values.length > 0 && ( -
-
SAMPLE VALUES
-
- {feature.sample_values.map((val, i) => ( - - {val} - - ))} -
-
- )} + {/* Presidio Status */} +
+
+ πŸ”’ Detection Engine +
+
+ {analyzeResult.risk_assessment.presidio_enabled + ? "Presidio" + : "Regex"} +
+
+ + {analyzeResult.risk_assessment.presidio_enabled + ? "Enhanced" + : "Standard"} +
+
- {/* Recommended Strategy */} -
-
-
-
βœ“ RECOMMENDED STRATEGY
-
{feature.recommended_strategy}
-
{feature.strategy_description}
-
-
- Reversible: {feature.reversible ? 'Yes' : 'No'} -
-
- Use Cases: {feature.use_cases.join(', ')} -
-
-
- -
-
+ {/* Violations */} +
+
+ ⚠️ Violations +
+
0 + ? "text-red-400" + : "text-green-400" + }`} + > + {analyzeResult.risk_assessment.violations?.length || + 0} +
+
+ {analyzeResult.risk_assessment.violations?.filter( + (v: any) => v.severity === "CRITICAL", + ).length || 0}{" "} + Critical Issues +
+
+
+
+
+ {/* Risk Categories Grid with Enhanced Design */} +
+
+ πŸ“ˆ +

+ Category Scores +

+
- {/* Alternative Strategies */} -
- - View Alternative Strategies - -
- {Object.entries(piiDetectionResult.available_strategies) - .filter(([strategy]) => strategy !== feature.recommended_strategy) - .map(([strategy, details]: [string, any]) => ( -
-
{strategy}
-
{details.description}
-
- - {details.risk_level} Risk - - -
-
- ))} -
-
-
- ); - })} -
+
+ {Object.entries( + analyzeResult.risk_assessment.risk_categories || {}, + ).map(([category, score]: [string, any]) => { + const riskPct = score * 100; + const riskLevel = + riskPct >= 70 + ? "CRITICAL" + : riskPct >= 50 + ? "HIGH" + : riskPct >= 30 + ? "MEDIUM" + : "LOW"; + const categoryConfig: Record< + string, + { icon: string; label: string; color: string } + > = { + privacy: { icon: "�", label: "Privacy", color: "blue" }, + ethical: { + icon: "🟠", + label: "Ethical", + color: "purple", + }, + compliance: { + icon: "�", + label: "Compliance", + color: "indigo", + }, + security: { + icon: "�", + label: "Security", + color: "cyan", + }, + operational: { + icon: "🟠", + label: "Operational", + color: "orange", + }, + data_quality: { + icon: "�", + label: "Data Quality", + color: "green", + }, + }; - {/* Apply All Button */} -
- -
-
- ) : ( -
-
πŸ”
-

No PII detection results yet

-

Upload a dataset and click "πŸ” Detect PII" to scan for risky features

-
- )} -
- ); - case "results": - return ( -
-

Results Summary

- {(analyzeResult || cleanResult) ? ( -
- {analyzeResult && ( -
-

Analysis Results

-
-
Dataset: {analyzeResult.filename}
-
Rows: {analyzeResult.dataset_info.rows}
-
Columns: {analyzeResult.dataset_info.columns}
-
Bias Score: {(analyzeResult.bias_metrics.overall_bias_score * 100).toFixed(1)}%
-
Risk Score: {(analyzeResult.risk_assessment.overall_risk_score * 100).toFixed(1)}%
-
- - Download Full Report β†’ - -
- )} - - {cleanResult && ( -
-

Cleaning Results

-
-
Original: {cleanResult.dataset_info.original_rows} rows Γ— {cleanResult.dataset_info.original_columns} cols
-
Cleaned: {cleanResult.dataset_info.cleaned_rows} rows Γ— {cleanResult.dataset_info.cleaned_columns} cols
-
Cells Anonymized: {cleanResult.summary.total_cells_affected}
-
Columns Removed: {cleanResult.summary.columns_removed.length}
-
GDPR Compliant: {cleanResult.gdpr_compliance.length} articles applied
-
- -
- )} -
- ) : ( -

- Process a dataset to see aggregated results. -

- )} -
- ); - default: - return null; - } - } + const config = categoryConfig[category] || { + icon: "πŸ“Œ", + label: category, + color: "slate", + }; - return ( -
- {renderTabContent()} -
- ); -} \ No newline at end of file + // Dynamic emoji based on risk level + const riskEmoji = + riskPct < 25 ? "🟒" : riskPct < 50 ? "🟑" : "🟠"; + + return ( +
+
+ +
+
+ {riskEmoji} + + {riskLevel} + +
+ +
+ {config.label} +
+ +
+ {riskPct.toFixed(1)}% +
+ + {/* Progress Bar */} +
+
+
+
+
+
+
+ ); + })} +
+
+ {/* Risky Features Analysis - Feature-Level Risk Display */} + {analyzeResult.risk_assessment.privacy_risks && ( +
+
+ ⚠️ +

+ Risky Features & Columns +

+ + {typeof analyzeResult.risk_assessment.privacy_risks === + "object" && + !Array.isArray( + analyzeResult.risk_assessment.privacy_risks, + ) + ? analyzeResult.risk_assessment.privacy_risks + .pii_count || 0 + : Array.isArray( + analyzeResult.risk_assessment.privacy_risks, + ) + ? analyzeResult.risk_assessment.privacy_risks.length + : 0}{" "} + Risky Features Found + +
+ + {/* Risky Features List */} + {typeof analyzeResult.risk_assessment.privacy_risks === + "object" && + !Array.isArray( + analyzeResult.risk_assessment.privacy_risks, + ) && + analyzeResult.risk_assessment.privacy_risks.pii_detected && + analyzeResult.risk_assessment.privacy_risks.pii_detected + .length > 0 ? ( +
+ {/* Privacy Risk Metrics Summary */} +
+
+
+ Re-Identification Risk +
+
0.7 + ? "text-red-600" + : (analyzeResult.risk_assessment.privacy_risks + .reidentification_risk || 0) > 0.4 + ? "text-orange-600" + : "text-green-600" + }`} + > + {analyzeResult.risk_assessment.privacy_risks + .reidentification_risk + ? ( + analyzeResult.risk_assessment.privacy_risks + .reidentification_risk * 100 + ).toFixed(0) + : 0} + % +
+
+ Can individuals be identified? +
+
+
+
+ Data Minimization +
+
0.7 + ? "text-green-600" + : (analyzeResult.risk_assessment.privacy_risks + .data_minimization_score || 0) > 0.4 + ? "text-orange-600" + : "text-red-600" + }`} + > + {analyzeResult.risk_assessment.privacy_risks + .data_minimization_score + ? ( + analyzeResult.risk_assessment.privacy_risks + .data_minimization_score * 100 + ).toFixed(0) + : 0} + % +
+
+ Collecting only necessary data +
+
+
+
+ Anonymization Level +
+
+ {analyzeResult.risk_assessment.privacy_risks + .anonymization_level || "NONE"} +
+
+ Protection applied +
+
+
+
+ Detection Method +
+
+ {analyzeResult.risk_assessment.privacy_risks + .detection_method || "Auto"} +
+
+ Analysis engine used +
+
+
+ + {/* Individual Risky Features */} +
+

+ πŸ” Detailed Feature Risk Analysis +

+ {analyzeResult.risk_assessment.privacy_risks.pii_detected.map( + (pii: any, idx: number) => { + // Map PII types to risk explanations with GDPR Article references + const riskExplanations: Record< + string, + { + why: string; + impact: string; + gdprArticles: string[]; + actions: string[]; + } + > = { + EMAIL_ADDRESS: { + why: "Email addresses are direct identifiers that can be used to contact and track individuals across systems, creating privacy risks.", + impact: + "HIGH RISK: Can lead to identity theft, phishing attacks, unauthorized marketing, and privacy violations under GDPR Article 6.", + gdprArticles: [ + "Article 4(1) - Definition of Personal Data: Email is personally identifiable information", + "Article 6 - Lawful Basis Required: Processing requires consent, contract, or legitimate interest", + "Article 7 - Consent Conditions: Must obtain explicit, informed consent", + "Article 17 - Right to Erasure: Users can request email deletion", + "Article 21 - Right to Object: Users can opt out of email processing", + ], + actions: [ + "Encrypt email addresses", + "Hash or pseudonymize for analytics", + "Implement consent management", + "Enable right to erasure", + "Provide opt-out mechanisms", + ], + }, + EMAIL: { + why: "Email addresses are direct identifiers that can be used to contact and track individuals across systems.", + impact: + "HIGH RISK: Can lead to identity theft, phishing attacks, unauthorized marketing, and privacy violations.", + gdprArticles: [ + "Article 4(1) - Personal Data Definition", + "Article 6 - Lawful Basis for Processing", + "Article 7 - Conditions for Consent", + "Article 17 - Right to Erasure", + ], + actions: [ + "Encrypt email addresses", + "Implement consent management", + "Enable deletion on request", + "Apply data minimization", + ], + }, + PHONE_NUMBER: { + why: "Phone numbers directly identify individuals and enable real-time contact, creating opportunities for harassment and fraud.", + impact: + "HIGH RISK: Enables unwanted contact, harassment, SIM swapping attacks, location tracking, and telemarketing violations.", + gdprArticles: [ + "Article 4(1) - Personal Data: Phone numbers identify natural persons", + "Article 6 - Lawfulness of Processing: Requires lawful basis", + "Article 32 - Security of Processing: Must implement appropriate security measures", + "Article 21 - Right to Object to Processing", + "ePrivacy Directive - Consent required for electronic communications", + ], + actions: [ + "Remove if not essential", + "Apply tokenization", + "Restrict access controls", + "Implement call verification", + "Enable number suppression", + ], + }, + PHONE: { + why: "Phone numbers are direct personal identifiers enabling contact and tracking.", + impact: + "HIGH RISK: Harassment, fraud, and unauthorized marketing.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 6 - Lawful Processing", + "Article 32 - Security Measures", + ], + actions: [ + "Tokenize phone numbers", + "Implement access controls", + "Enable opt-out", + ], + }, + PERSON: { + why: "Personal names are primary identifiers. Combined with other quasi-identifiers (age, location), they enable complete re-identification.", + impact: + "MEDIUM-HIGH RISK: When combined with location, age, or other quasi-identifiers, creates high re-identification risk violating k-anonymity.", + gdprArticles: [ + "Article 4(1) - Personal Data: Names identify natural persons", + "Article 5(1)(c) - Data Minimization: Collect only necessary data", + "Article 5(1)(e) - Storage Limitation: Keep only as long as necessary", + "Article 25 - Data Protection by Design and Default", + "Article 32(1)(a) - Pseudonymization and encryption requirements", + ], + actions: [ + "Use pseudonyms or IDs", + "Apply k-anonymity techniques (kβ‰₯5)", + "Separate name from sensitive attributes", + "Implement access logging", + "Apply l-diversity for protection", + ], + }, + NAME: { + why: "Names are direct personal identifiers that enable individual identification.", + impact: + "MEDIUM-HIGH RISK: Re-identification when combined with other data.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 5(1)(c) - Data Minimization", + "Article 25 - Data Protection by Design", + ], + actions: [ + "Use pseudonyms", + "Apply k-anonymity", + "Implement access logging", + ], + }, + LOCATION: { + why: "Location data reveals where individuals live, work, and travel, exposing personal patterns, habits, and sensitive locations (hospitals, religious sites).", + impact: + "HIGH RISK: Can expose home addresses, workplaces, medical facilities, places of worship, creating discrimination and stalking risks.", + gdprArticles: [ + "Article 4(1) - Personal Data: Location identifies individuals", + "Article 9(1) - Special Categories: Location at sensitive sites reveals protected characteristics", + "Article 32 - Security Measures: Encryption and access controls required", + "Article 35 - Data Protection Impact Assessment: Required for location tracking", + "Recital 30 - Online identifiers and location data", + ], + actions: [ + "Generalize to zip code or city level", + "Apply geographic masking", + "Remove precise coordinates", + "Implement geofencing", + "Conduct DPIA", + "Apply differential privacy", + ], + }, + ADDRESS: { + why: "Physical addresses directly identify individuals and their home locations.", + impact: + "HIGH RISK: Enables stalking, burglary, and privacy violations.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 9 - Special Categories (if sensitive location)", + "Article 32 - Security Measures", + ], + actions: [ + "Generalize to zip code", + "Apply geographic masking", + "Restrict access", + ], + }, + SSN: { + why: "Social Security Numbers are PERMANENT unique identifiers used across critical systems (banking, taxes, healthcare, employment).", + impact: + "CRITICAL RISK: Enables complete identity theft, fraudulent credit, tax fraud, medical identity theft, and unauthorized government benefits access.", + gdprArticles: [ + "Article 9(1) - Special Category Data: Often linked to health/financial data", + "Article 32 - Security of Processing: Encryption, access controls, pseudonymization mandatory", + "Article 33 - Breach Notification: Immediate notification required", + "Article 34 - Data Subject Notification: Notify individuals of breaches", + "Article 35 - Data Protection Impact Assessment: DPIA required", + "Recital 75 - High risk to rights and freedoms", + ], + actions: [ + "REMOVE IMMEDIATELY if possible", + "Encrypt with AES-256", + "Never display in full", + "Implement strict access controls", + "Conduct DPIA", + "Enable breach detection", + "Maintain audit logs", + ], + }, + US_SSN: { + why: "US Social Security Numbers are permanent government identifiers linked to financial, medical, employment, and government benefits.", + impact: + "CRITICAL RISK: Highest identity theft risk. Compromise leads to decades of fraud, financial damage, and cannot be changed.", + gdprArticles: [ + "Article 9(1) - Special Category: Links to health and financial data", + "Article 32 - Security Measures: State-of-the-art encryption required", + "Article 33 - Breach Notification: 72-hour notification to supervisory authority", + "Article 34 - Communication to Data Subjects: Immediate notification", + "Article 35 - DPIA: Mandatory impact assessment", + ], + actions: [ + "Encrypt end-to-end with AES-256", + "Use last 4 digits only for display", + "Implement multi-factor authentication", + "Enable breach detection", + "Create comprehensive audit trails", + "Apply tokenization", + "Conduct annual security audits", + ], + }, + CREDIT_CARD: { + why: "Credit card numbers provide direct access to financial accounts and purchasing power, subject to PCI-DSS and GDPR.", + impact: + "CRITICAL RISK: Financial fraud, unauthorized transactions, PCI-DSS violations (fines up to $500K/month), GDPR violations (4% global revenue).", + gdprArticles: [ + "Article 4(1) - Personal Data: Financial identifiers", + "Article 32 - Security of Processing: PCI-DSS Level 1 compliance mandatory", + "Article 33 - Breach Notification: Immediate reporting required", + "Article 34 - Data Subject Notification", + "PCI-DSS Standards: Cannot store CVV, must tokenize", + ], + actions: [ + "Tokenize immediately", + "Never store CVV/CVC", + "Use PCI-compliant vault", + "Implement fraud detection", + "Apply end-to-end encryption", + "Use 3D Secure", + "Maintain PCI-DSS certification", + "Conduct quarterly security scans", + ], + }, + CARD: { + why: "Card numbers enable direct financial access.", + impact: + "CRITICAL RISK: Financial fraud and PCI-DSS violations.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 32 - Security Measures", + "PCI-DSS Compliance", + ], + actions: [ + "Tokenize immediately", + "Use PCI-compliant vault", + "Never store CVV", + ], + }, + IP_ADDRESS: { + why: "IP addresses are online identifiers that track user behavior, reveal location, and enable device fingerprinting across websites.", + impact: + "MEDIUM RISK: Enables tracking across websites, reveals approximate location, can be linked to individuals, violates ePrivacy Directive.", + gdprArticles: [ + "Article 4(1) - Personal Data: Online identifier", + "Article 6 - Lawful Basis: Requires consent or legitimate interest", + "ePrivacy Directive - Consent for cookies and tracking", + "Recital 30 - Online identifiers and IP addresses", + "Article 21 - Right to Object to profiling", + ], + actions: [ + "Truncate last octet for IPv4", + "Hash for analytics", + "Implement IP anonymization", + "Reduce retention period to 90 days", + "Provide opt-out for tracking", + "Apply differential privacy", + ], + }, + IP: { + why: "IP addresses are online identifiers enabling tracking.", + impact: + "MEDIUM RISK: Cross-site tracking and location revelation.", + gdprArticles: [ + "Article 4(1) - Online Identifier", + "Article 6 - Lawful Basis", + "ePrivacy Directive", + ], + actions: [ + "Truncate IP addresses", + "Hash for analytics", + "Reduce retention", + ], + }, + MEDICAL_LICENSE: { + why: "Medical information is SPECIAL CATEGORY DATA under GDPR Article 9, requiring the highest level of protection due to discrimination risks.", + impact: + "CRITICAL RISK: Health data breach leads to discrimination, insurance denial, employment issues, severe privacy violations, and HIPAA fines.", + gdprArticles: [ + "Article 9(1) - Special Category (Health Data): Explicit consent required", + "Article 9(2)(h) - Health/social care exception", + "Article 32 - Security of Processing: Encryption mandatory", + "Article 35 - DPIA: Impact assessment required", + "Article 25 - Data Protection by Design", + "HIPAA Compliance (if applicable)", + ], + actions: [ + "Encrypt with healthcare-grade security (AES-256)", + "Implement role-based access control (RBAC)", + "Conduct Data Protection Impact Assessment", + "Apply strict retention policies", + "Ensure HIPAA compliance", + "Use de-identification techniques", + "Maintain comprehensive audit logs", + ], + }, + MEDICAL: { + why: "Medical data is special category data requiring explicit consent.", + impact: + "CRITICAL RISK: Discrimination and severe privacy violations.", + gdprArticles: [ + "Article 9(1) - Special Category (Health)", + "Article 32 - Security", + "Article 35 - DPIA Required", + ], + actions: [ + "Encrypt data", + "Implement RBAC", + "Conduct DPIA", + ], + }, + US_DRIVER_LICENSE: { + why: "Driver license numbers are government-issued identifiers used for identity verification across financial, healthcare, and government systems.", + impact: + "HIGH RISK: Identity fraud, fake ID creation, unauthorized access to services, and DMV record access.", + gdprArticles: [ + "Article 4(1) - Personal Data: Government identifier", + "Article 6 - Lawful Processing: Document lawful basis", + "Article 32 - Security Measures: Encryption and access controls", + "Article 15 - Right of Access: Individuals can request data", + "Article 17 - Right to Erasure: Deletion on request", + ], + actions: [ + "Hash or encrypt license numbers", + "Limit to identity verification only", + "Never display in full", + "Implement verification logging", + "Apply pseudonymization", + "Enable deletion mechanisms", + ], + }, + LICENSE: { + why: "License numbers are government identifiers.", + impact: + "HIGH RISK: Identity fraud and unauthorized access.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 6 - Lawful Processing", + "Article 32 - Security", + ], + actions: [ + "Hash license numbers", + "Limit to verification", + "Never display in full", + ], + }, + US_PASSPORT: { + why: "Passport numbers are international identity documents used for travel and high-security identification, recognized globally.", + impact: + "CRITICAL RISK: International identity fraud, unauthorized travel booking, visa fraud, and access to secure facilities.", + gdprArticles: [ + "Article 4(1) - Personal Data: Unique government identifier", + "Article 32 - Security Measures: State-of-the-art encryption required", + "Article 35 - Impact Assessment: DPIA for high-risk processing", + "Article 5(1)(f) - Integrity and Confidentiality", + "Cross-border data transfer regulations", + ], + actions: [ + "Encrypt with strong encryption (AES-256)", + "Restrict access to authorized personnel only", + "Implement tamper detection", + "Apply geographic access controls", + "Maintain detailed audit trails", + "Use tokenization", + "Implement MFA for access", + ], + }, + PASSPORT: { + why: "Passport numbers enable international identification.", + impact: + "CRITICAL RISK: International fraud and unauthorized travel.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 32 - Security Measures", + "Article 35 - Impact Assessment", + ], + actions: [ + "Encrypt passports", + "Restrict access", + "Implement tamper detection", + ], + }, + US_BANK_NUMBER: { + why: "Bank account numbers provide DIRECT ACCESS to financial accounts and enable ACH transfers, wire transfers, and direct debits.", + impact: + "CRITICAL RISK: Unauthorized withdrawals, ACH fraud, wire transfer fraud, complete account takeover, and financial ruin.", + gdprArticles: [ + "Article 4(1) - Personal Data: Financial identifier", + "Article 32 - Security Measures: Encryption and tokenization mandatory", + "Article 33 - Breach Notification: 72-hour notification", + "Article 34 - Data Subject Notification: Immediate alert to account holders", + "PSD2 - Strong Customer Authentication required", + ], + actions: [ + "Tokenize immediately", + "Never display account numbers", + "Use secure payment gateways", + "Implement transaction monitoring", + "Apply multi-factor authentication", + "Use Strong Customer Authentication (SCA)", + "Enable fraud alerts", + "Encrypt at rest and in transit", + ], + }, + BANK_ACCOUNT: { + why: "Bank account numbers enable direct financial access.", + impact: + "CRITICAL RISK: Financial fraud and account takeover.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 32 - Security Measures", + "Article 33 - Breach Notification", + ], + actions: [ + "Tokenize accounts", + "Never display numbers", + "Implement MFA", + ], + }, + DOB: { + why: "Date of birth is a quasi-identifier that combined with other data enables re-identification and age-based discrimination.", + impact: + "MEDIUM-HIGH RISK: Combined with name and zip code, enables 87% re-identification rate. Age discrimination risk.", + gdprArticles: [ + "Article 4(1) - Personal Data: Quasi-identifier", + "Article 5(1)(c) - Data Minimization: Use age ranges instead", + "Article 9 - Special Categories: Can reveal protected characteristics", + "Article 22 - Automated Decision-Making: Age-based profiling restrictions", + "Recital 26 - Pseudonymization reduces risks", + ], + actions: [ + "Use age ranges instead of exact DOB", + "Apply k-anonymity (kβ‰₯5)", + "Generalize to year or month", + "Separate from other identifiers", + "Implement access controls", + "Apply l-diversity", + ], + }, + ZIP_CODE: { + why: "ZIP codes are geographic quasi-identifiers. Research shows 87% of US population uniquely identified by ZIP + DOB + Gender.", + impact: + "MEDIUM RISK: When combined with DOB and gender, enables 87% re-identification. Reveals socioeconomic status and demographics.", + gdprArticles: [ + "Article 4(1) - Personal Data: Quasi-identifier", + "Article 5(1)(c) - Data Minimization", + "Article 32(1)(a) - Pseudonymization", + "Recital 26 - Anonymization techniques", + "Article 25 - Data Protection by Default", + ], + actions: [ + "Generalize to first 3 digits", + "Use geographic aggregation", + "Apply k-anonymity", + "Combine with other anonymization techniques", + "Separate from name and DOB", + ], + }, + IBAN_CODE: { + why: "IBAN (International Bank Account Number) provides access to bank accounts across European Economic Area.", + impact: + "CRITICAL RISK: International financial fraud, SEPA direct debit fraud, and cross-border money theft.", + gdprArticles: [ + "Article 4(1) - Personal Data", + "Article 32 - Security of Processing", + "Article 33 - Breach Notification", + "PSD2 - Strong Customer Authentication", + ], + actions: [ + "Tokenize IBAN", + "Implement SCA", + "Use secure payment processors", + "Enable fraud monitoring", + "Apply encryption", + ], + }, + CRYPTO: { + why: "Cryptocurrency addresses and wallets are permanent financial identifiers that cannot be changed if compromised.", + impact: + "CRITICAL RISK: Irreversible financial theft, no fraud protection, transaction history exposure, wallet draining.", + gdprArticles: [ + "Article 4(1) - Personal Data: Cryptocurrency addresses can identify individuals", + "Article 5(1)(f) - Security Principle", + "Article 32 - Security Measures: Multi-signature and cold storage", + "Article 17 - Right to Erasure: Blockchain immutability challenges", + ], + actions: [ + "Use multi-signature wallets", + "Implement cold storage", + "Never display private keys", + "Use hardware security modules", + "Apply address rotation", + "Implement withdrawal limits", + ], + }, + }; + + // Fallback for unmapped PII types + const riskInfo = riskExplanations[pii.type] || + riskExplanations[pii.type.toUpperCase()] || { + why: "This data type contains personal information that could identify individuals or reveal sensitive patterns according to GDPR Article 4(1).", + impact: + "POTENTIAL RISK: May violate privacy regulations if not properly protected. Could enable tracking, profiling, or discrimination.", + gdprArticles: [ + "Article 4(1) - Definition of Personal Data", + "Article 5 - Principles: Lawfulness, Fairness, Transparency", + "Article 6 - Lawful Basis Required for Processing", + "Article 24 - Responsibility of the Controller", + "Article 25 - Data Protection by Design and Default", + ], + actions: [ + "Review necessity of this data field", + "Apply appropriate anonymization techniques", + "Implement access controls and audit logging", + "Document lawful basis for processing", + "Conduct Privacy Impact Assessment", + ], + }; + + return ( +
+
+ +
+ {/* Feature Header */} +
+
+
+ + {pii.severity === "CRITICAL" + ? "πŸ”΄" + : pii.severity === "HIGH" + ? "🟠" + : pii.severity === "MEDIUM" + ? "🟑" + : "πŸ”΅"} + +
+
+ {pii.column} +
+
+ + PII Type: + {" "} + {pii.type.replace(/_/g, " ")} + {pii.occurrences && ( + <> + + β€’ + + + Found in: + {" "} + {pii.occurrences} rows + + )} + {pii.confidence && ( + <> + + β€’ + + + Confidence: + {" "} + {( + pii.confidence * 100 + ).toFixed(0)} + % + + )} +
+
+
+
+ + {pii.severity} RISK + +
+ + {/* Why is this risky? */} +
+
+ ❓ +
+
+ WHY IS THIS FEATURE RISKY? +
+

+ {riskInfo.why} +

+
+
+
+ + {/* Impact */} +
+
+ ⚠️ +
+
+ POTENTIAL IMPACT IF EXPOSED +
+

+ {riskInfo.impact} +

+
+
+
+ + {/* GDPR Articles Violated */} +
+
+ βš–οΈ +
+
+ GDPR ARTICLES VIOLATED / APPLICABLE +
+
+ {riskInfo.gdprArticles.map( + (article, i) => ( +
+ + β€’ + + + {article} + +
+ ), + )} +
+
+
+
+ + {/* Recommended Actions */} +
+
+ βœ… +
+
+ RECOMMENDED ACTIONS TO REDUCE RISK +
+
    + {riskInfo.actions.map( + (action, i) => ( +
  • + + {i + 1}. + + + {action} + +
  • + ), + )} +
+
+
+
+
+
+ ); + }, + )} +
+
+ ) : ( +
+ βœ“ +
+
+ No PII Detected +
+
+ Dataset appears to be free of personally + identifiable information +
+
+
+ )} +
+ )}{" "} + {/* Violations Section with Enhanced Design */} + {analyzeResult.risk_assessment.violations && + analyzeResult.risk_assessment.violations.length > 0 && ( +
+
+ ⚠️ +

+ Violations +

+ + {analyzeResult.risk_assessment.violations.length}{" "} + Issues Found + +
+ +
+ {analyzeResult.risk_assessment.violations.map( + (violation: any, idx: number) => ( +
+
+ +
+
+
+ + {violation.severity} + + + {violation.category} + +
+
+ +
+ + {violation.severity === "CRITICAL" + ? "πŸ”΄" + : violation.severity === "HIGH" + ? "🟠" + : violation.severity === "MEDIUM" + ? "🟑" + : "πŸ”΅"} + +
+
+ {violation.message} +
+ {violation.details && ( +
+ {violation.details} +
+ )} +
+
+
+
+ ), + )} +
+
+ )} + {/* Key Insights Section with Enhanced Design */} + {analyzeResult.risk_assessment.insights && + analyzeResult.risk_assessment.insights.length > 0 && ( +
+
+
+ +
+
+ πŸ’‘ +

+ Key Insights +

+
+ +
+ {analyzeResult.risk_assessment.insights.map( + (insight: string, idx: number) => ( +
+ + β€’ + + + {insight} + +
+ ), + )} +
+
+
+ )} + {/* Compliance Status - Enhanced with GDPR Article Details */} + {analyzeResult.risk_assessment.compliance_risks && ( +
+
+ πŸ“‹ +

+ Regulatory Compliance Status +

+
+ +
+ {Object.entries( + analyzeResult.risk_assessment.compliance_risks, + ) + .filter(([key]) => + ["gdpr", "ccpa", "hipaa", "ecoa"].includes(key), + ) + .map(([regulation, data]: [string, any]) => { + if (!data || typeof data !== "object") return null; + + const regulationInfo: Record< + string, + { + name: string; + description: string; + keyArticles: string[]; + } + > = { + gdpr: { + name: "GDPR (General Data Protection Regulation)", + description: + "EU regulation protecting personal data and privacy", + keyArticles: [ + "Article 5 - Principles (lawfulness, fairness, transparency, purpose limitation, data minimization)", + "Article 6 - Lawful basis for processing", + "Article 7 - Conditions for consent", + "Article 9 - Processing special categories of personal data", + "Article 15-22 - Data subject rights (access, rectification, erasure, portability)", + "Article 25 - Data protection by design and by default", + "Article 32 - Security of processing", + "Article 35 - Data protection impact assessment", + ], + }, + ccpa: { + name: "CCPA (California Consumer Privacy Act)", + description: + "California law providing privacy rights to consumers", + keyArticles: [ + "Right to Know what personal information is collected", + "Right to Delete personal information", + "Right to Opt-Out of sale of personal information", + "Right to Non-Discrimination for exercising CCPA rights", + "Notice at Collection requirements", + ], + }, + hipaa: { + name: "HIPAA (Health Insurance Portability and Accountability Act)", + description: + "US regulation protecting health information", + keyArticles: [ + "Privacy Rule - Protected Health Information (PHI) safeguards", + "Security Rule - Administrative, physical, technical safeguards", + "Breach Notification Rule - Incident reporting requirements", + "Minimum Necessary Standard - Access limitation", + ], + }, + ecoa: { + name: "ECOA (Equal Credit Opportunity Act)", + description: + "US law prohibiting discrimination in credit decisions", + keyArticles: [ + "Prohibition of discrimination based on protected characteristics", + "Adverse action notice requirements", + "Record retention requirements", + "Monitoring and reporting obligations", + ], + }, + }; + + const info = regulationInfo[regulation] || { + name: regulation.toUpperCase(), + description: "", + keyArticles: [], + }; + + return ( +
+ {/* Header */} +
+
+
+
+ {info.name} +
+ {info.description && ( +
+ {info.description} +
+ )} +
+ + {data.status === "NOT_APPLICABLE" + ? "N/A" + : data.status} + +
+
+ + {/* Content */} +
+ {data.applicable === false ? ( +
+ This regulation does not appear to apply to + your dataset based on detected data types. +
+ ) : ( +
+ {/* Score */} + {data.score !== undefined && ( +
+
+ Compliance Score: +
+
+
0.7 + ? "bg-green-500" + : data.score > 0.4 + ? "bg-yellow-500" + : "bg-red-500" + }`} + style={{ + width: `${data.score * 100}%`, + }} + >
+
+
+ {(data.score * 100).toFixed(0)}% +
+
+ )} + + {/* Compliant Checks */} + {data.compliant_checks && + data.compliant_checks.length > 0 && ( +
+
+ βœ“ Compliant Areas: +
+
+ {data.compliant_checks.map( + (check: string, idx: number) => ( + + {check.replace(/_/g, " ")} + + ), + )} +
+
+ )} + + {/* Non-Compliant Checks */} + {data.non_compliant_checks && + data.non_compliant_checks.length > 0 && ( +
+
+ ⚠️ Non-Compliant Areas: +
+
+ {data.non_compliant_checks.map( + (check: string, idx: number) => ( + + {check.replace(/_/g, " ")} + + ), + )} +
+
+ )} + + {/* Key Articles/Requirements */} + {info.keyArticles.length > 0 && ( +
+ + πŸ“– View Key Requirements & Articles + +
+ {info.keyArticles.map( + (article, idx) => ( +
+ + β€’ + + + {article} + +
+ ), + )} +
+
+ )} + + {/* Bias Score for ECOA */} + {regulation === "ecoa" && + data.bias_score !== undefined && ( +
+
+ Bias Score (Discrimination Risk): +
+
+
+
+
+
+ {(data.bias_score * 100).toFixed( + 1, + )} + % +
+
+
+ {data.bias_score < 0.3 + ? "Low discrimination risk" + : data.bias_score < 0.5 + ? "Moderate discrimination risk - monitor closely" + : "High discrimination risk - immediate remediation required"} +
+
+ )} +
+ )} +
+
+ ); + })} +
+ + {/* Compliance Recommendations */} + {analyzeResult.risk_assessment.compliance_risks + .recommendations && + analyzeResult.risk_assessment.compliance_risks + .recommendations.length > 0 && ( +
+
+ πŸ“Œ Compliance Recommendations +
+
+ {analyzeResult.risk_assessment.compliance_risks.recommendations.map( + (rec: any, idx: number) => ( +
+ + {rec.priority} + +
+
+ {rec.recommendation} +
+ {rec.rationale && ( +
+ {rec.rationale} +
+ )} +
+
+ ), + )} +
+
+ )} +
+ )} +
+ ) : ( +
+ πŸ”’ +

+ No risk analysis results yet +

+

+ Upload a dataset and click "Analyze" to see comprehensive risk + assessment +

+
+ )} +
+ ); + case "bias-risk-mitigation": + return ( +
+
+

+ PII Detection & Anonymization Strategy +

+

+ Review detected risky features and choose how to anonymize them +

+
+ + {piiDetectionResult ? ( +
+ {/* File Info Banner */} +
+
+ File: + + {piiDetectionResult.filename} + + + {piiDetectionResult.file_type.toUpperCase()} + + + {piiDetectionResult.dataset_info.rows} rows Γ—{" "} + {piiDetectionResult.dataset_info.columns} columns + +
+
+ + {/* Summary Card */} +
+
+
+
+ TOTAL COLUMNS SCANNED +
+
+ {piiDetectionResult.summary.total_columns_scanned} +
+
+
+
+ HIGH RISK +
+
+ {piiDetectionResult.summary.high_risk_count} +
+
Must remove
+
+
+
+ MEDIUM RISK +
+
+ {piiDetectionResult.summary.medium_risk_count} +
+
+ Hash recommended +
+
+
+
+ LOW RISK +
+
+ {piiDetectionResult.summary.low_risk_count} +
+
+ Mask/generalize +
+
+
+
+ {piiDetectionResult.message} +
+
+ + {/* Risky Features List */} +
+ {piiDetectionResult.risky_features.map((feature, idx) => { + const riskColor = + feature.risk_level === "HIGH" + ? "red" + : feature.risk_level === "MEDIUM" + ? "orange" + : feature.risk_level === "LOW" + ? "yellow" + : "gray"; + + const bgColor = + feature.risk_level === "HIGH" + ? "bg-red-50 border-red-300" + : feature.risk_level === "MEDIUM" + ? "bg-orange-50 border-orange-300" + : feature.risk_level === "LOW" + ? "bg-yellow-50 border-yellow-300" + : "bg-gray-50 border-gray-300"; + + return ( +
+ {/* Header */} +
+
+
+ + {feature.risk_level} RISK + + + {feature.column} + +
+
+ Detected:{" "} + {feature.entity_type} + β€’ + + Confidence: + {" "} + {(feature.confidence * 100).toFixed(1)}% + β€’ + + Occurrences: + {" "} + {feature.detection_count} +
+
+
+ + {/* Explanation */} +
+
+ WHY IS THIS RISKY? +
+

+ {feature.explanation} +

+
+ GDPR Reference:{" "} + {feature.gdpr_article} +
+
+ + {/* Sample Values */} + {feature.sample_values.length > 0 && ( +
+
+ SAMPLE VALUES +
+
+ {feature.sample_values.map((val, i) => ( + + {val} + + ))} +
+
+ )} + + {/* Recommended Strategy */} +
+
+
+
+ βœ“ RECOMMENDED STRATEGY +
+
+ {feature.recommended_strategy} +
+
+ {feature.strategy_description} +
+
+
+ Reversible:{" "} + {feature.reversible ? "Yes" : "No"} +
+
+ Use Cases:{" "} + {feature.use_cases.join(", ")} +
+
+
+ +
+
+ + {/* Alternative Strategies */} +
+ + View Alternative Strategies + +
+ {Object.entries( + piiDetectionResult.available_strategies, + ) + .filter( + ([strategy]) => + strategy !== feature.recommended_strategy, + ) + .map(([strategy, details]: [string, any]) => ( +
+
+ {strategy} +
+
+ {details.description} +
+
+ + {details.risk_level} Risk + + +
+
+ ))} +
+
+
+ ); + })} +
+ + {/* Apply All Button */} +
+ +
+
+ ) : ( +
+
πŸ”
+

+ No PII detection results yet +

+

+ Upload a dataset and click "πŸ” Detect PII" to scan for risky + features +

+
+ )} +
+ ); + case "results": + return ( +
+

Results Summary

+ {analyzeResult || cleanResult ? ( +
+ {analyzeResult && ( +
+

Analysis Results

+
+
Dataset: {analyzeResult.filename}
+
Rows: {analyzeResult.dataset_info.rows}
+
Columns: {analyzeResult.dataset_info.columns}
+
+ Bias Score:{" "} + {( + analyzeResult.bias_metrics.overall_bias_score * 100 + ).toFixed(1)} + % +
+
+ Risk Score:{" "} + {( + analyzeResult.risk_assessment.overall_risk_score * 100 + ).toFixed(1)} + % +
+
+ + Download Full Report β†’ + +
+ )} + + {cleanResult && ( +
+

Cleaning Results

+
+
+ Original: {cleanResult.dataset_info.original_rows} rows + Γ— {cleanResult.dataset_info.original_columns} cols +
+
+ Cleaned: {cleanResult.dataset_info.cleaned_rows} rows Γ—{" "} + {cleanResult.dataset_info.cleaned_columns} cols +
+
+ Cells Anonymized:{" "} + {cleanResult.summary.total_cells_affected} +
+
+ Columns Removed:{" "} + {cleanResult.summary.columns_removed.length} +
+
+ GDPR Compliant: {cleanResult.gdpr_compliance.length}{" "} + articles applied +
+
+ +
+ )} +
+ ) : ( +

+ Process a dataset to see aggregated results. +

+ )} +
+ ); + default: + return null; + } + } + + return ( +
+ {renderTabContent()} +
+ ); +}