mirror of
https://github.com/PlatypusPus/MushroomEmpire.git
synced 2026-02-08 06:28:58 +00:00
152 lines
6.2 KiB
Python
152 lines
6.2 KiB
Python
"""
|
|
AI Governance Analysis Router
|
|
Handles bias detection and risk analysis endpoints
|
|
"""
|
|
|
|
from fastapi import APIRouter, File, UploadFile, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
import pandas as pd
|
|
import numpy as np
|
|
import io
|
|
import os
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Dict, Any
|
|
|
|
# Import AI Governance modules
|
|
import sys
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
from ai_governance import AIGovernanceAnalyzer
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
def convert_to_serializable(obj):
|
|
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
|
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
|
return int(obj)
|
|
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
|
return float(obj)
|
|
elif isinstance(obj, np.ndarray):
|
|
return obj.tolist()
|
|
elif isinstance(obj, dict):
|
|
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
|
elif isinstance(obj, list):
|
|
return [convert_to_serializable(item) for item in obj]
|
|
return obj
|
|
|
|
@router.post("/analyze")
|
|
async def analyze_dataset(file: UploadFile = File(...)):
|
|
"""
|
|
Analyze uploaded dataset for bias and risk
|
|
|
|
- **file**: CSV file to analyze
|
|
|
|
Returns:
|
|
- Analysis results (bias metrics, risk assessment)
|
|
- Report file path for download
|
|
"""
|
|
|
|
# Validate file type
|
|
if not file.filename.endswith('.csv'):
|
|
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
|
|
|
try:
|
|
# Read uploaded file
|
|
contents = await file.read()
|
|
df = pd.read_csv(io.BytesIO(contents))
|
|
|
|
if df.empty:
|
|
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
|
|
|
# Initialize AI Governance Analyzer
|
|
analyzer = AIGovernanceAnalyzer()
|
|
|
|
# Auto-detect target column and protected attributes
|
|
# Target: Last column (common convention) or first binary/categorical column
|
|
target_column = df.columns[-1]
|
|
|
|
# Protected attributes: Common sensitive columns
|
|
protected_keywords = ['gender', 'age', 'race', 'sex', 'ethnicity', 'religion', 'nationality']
|
|
protected_attributes = [col for col in df.columns
|
|
if any(keyword in col.lower() for keyword in protected_keywords)]
|
|
|
|
# If no protected attributes found, use first few categorical columns
|
|
if not protected_attributes:
|
|
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
|
protected_attributes = [col for col in categorical_cols if col != target_column][:3]
|
|
|
|
print(f"Analyzing dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
|
print(f"Target column: {target_column}")
|
|
print(f"Protected attributes: {protected_attributes}")
|
|
|
|
# Run analysis
|
|
report = analyzer.analyze_dataframe(df, target_column, protected_attributes)
|
|
|
|
# Generate report filename
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
safe_filename = (file.filename or "dataset").replace('.csv', '')
|
|
report_filename = f"governance_report_{safe_filename}_{timestamp}.json"
|
|
report_path = os.path.join("reports", report_filename)
|
|
|
|
# Save full report to disk
|
|
full_report_path = os.path.join(
|
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
|
report_path
|
|
)
|
|
analyzer.save_report(report, full_report_path)
|
|
|
|
# Prepare response with summary
|
|
bias_analysis = report.get("bias_analysis", {})
|
|
model_metrics = report.get("model_performance", {}).get("metrics", {})
|
|
risk_assessment = report.get("risk_assessment", {})
|
|
|
|
response_data = {
|
|
"status": "success",
|
|
"filename": file.filename,
|
|
"dataset_info": {
|
|
"rows": len(df),
|
|
"columns": len(df.columns),
|
|
"features": list(df.columns)
|
|
},
|
|
"model_performance": {
|
|
"accuracy": model_metrics.get("accuracy", 0),
|
|
"precision": model_metrics.get("precision", 0),
|
|
"recall": model_metrics.get("recall", 0),
|
|
"f1_score": model_metrics.get("f1_score", 0)
|
|
},
|
|
"bias_metrics": {
|
|
"overall_bias_score": bias_analysis.get("overall_bias_score", 0),
|
|
"disparate_impact": bias_analysis.get("fairness_metrics", {}),
|
|
"statistical_parity": bias_analysis.get("fairness_metrics", {}),
|
|
"violations_detected": bias_analysis.get("fairness_violations", [])
|
|
},
|
|
"risk_assessment": {
|
|
"overall_risk_score": risk_assessment.get("overall_risk_score", 0),
|
|
"privacy_risks": risk_assessment.get("privacy_risks", []),
|
|
"ethical_risks": risk_assessment.get("ethical_risks", []),
|
|
"compliance_risks": risk_assessment.get("risk_categories", {}).get("compliance_risks", []),
|
|
"data_quality_risks": risk_assessment.get("risk_categories", {}).get("data_quality_risks", [])
|
|
},
|
|
"recommendations": report.get("recommendations", []),
|
|
"report_file": f"/{report_path}",
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
# Debug: Print bias metrics being sent to frontend
|
|
print(f"\n📊 Sending bias metrics to frontend:")
|
|
print(f" Overall Bias Score: {response_data['bias_metrics']['overall_bias_score']:.3f}")
|
|
print(f" Violations: {len(response_data['bias_metrics']['violations_detected'])}")
|
|
print(f" Fairness Metrics: {len(response_data['bias_metrics']['disparate_impact'])} attributes")
|
|
|
|
# Convert all numpy/pandas types to native Python types
|
|
response_data = convert_to_serializable(response_data)
|
|
|
|
return JSONResponse(content=response_data)
|
|
|
|
except pd.errors.EmptyDataError:
|
|
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
|
except Exception as e:
|
|
print(f"Error during analysis: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|