mirror of
https://github.com/PlatypusPus/MushroomEmpire.git
synced 2026-02-07 22:18:59 +00:00
ref:Cleaned the Structure
This commit is contained in:
141
api/routers/analyze.py
Normal file
141
api/routers/analyze.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
AI Governance Analysis Router
|
||||
Handles bias detection and risk analysis endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import io
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
|
||||
# Import AI Governance modules
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
from ai_governance import AIGovernanceAnalyzer
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def convert_to_serializable(obj):
|
||||
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||
return float(obj)
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, dict):
|
||||
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [convert_to_serializable(item) for item in obj]
|
||||
return obj
|
||||
|
||||
@router.post("/analyze")
|
||||
async def analyze_dataset(file: UploadFile = File(...)):
|
||||
"""
|
||||
Analyze uploaded dataset for bias and risk
|
||||
|
||||
- **file**: CSV file to analyze
|
||||
|
||||
Returns:
|
||||
- Analysis results (bias metrics, risk assessment)
|
||||
- Report file path for download
|
||||
"""
|
||||
|
||||
# Validate file type
|
||||
if not file.filename.endswith('.csv'):
|
||||
raise HTTPException(status_code=400, detail="Only CSV files are supported")
|
||||
|
||||
try:
|
||||
# Read uploaded file
|
||||
contents = await file.read()
|
||||
df = pd.read_csv(io.BytesIO(contents))
|
||||
|
||||
if df.empty:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||
|
||||
# Initialize AI Governance Analyzer
|
||||
analyzer = AIGovernanceAnalyzer()
|
||||
|
||||
# Auto-detect target column and protected attributes
|
||||
# Target: Last column (common convention) or first binary/categorical column
|
||||
target_column = df.columns[-1]
|
||||
|
||||
# Protected attributes: Common sensitive columns
|
||||
protected_keywords = ['gender', 'age', 'race', 'sex', 'ethnicity', 'religion', 'nationality']
|
||||
protected_attributes = [col for col in df.columns
|
||||
if any(keyword in col.lower() for keyword in protected_keywords)]
|
||||
|
||||
# If no protected attributes found, use first few categorical columns
|
||||
if not protected_attributes:
|
||||
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
||||
protected_attributes = [col for col in categorical_cols if col != target_column][:3]
|
||||
|
||||
print(f"Analyzing dataset: {file.filename} ({len(df)} rows, {len(df.columns)} columns)")
|
||||
print(f"Target column: {target_column}")
|
||||
print(f"Protected attributes: {protected_attributes}")
|
||||
|
||||
# Run analysis
|
||||
report = analyzer.analyze_dataframe(df, target_column, protected_attributes)
|
||||
|
||||
# Generate report filename
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_filename = (file.filename or "dataset").replace('.csv', '')
|
||||
report_filename = f"governance_report_{safe_filename}_{timestamp}.json"
|
||||
report_path = os.path.join("reports", report_filename)
|
||||
|
||||
# Save full report to disk
|
||||
full_report_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
report_path
|
||||
)
|
||||
analyzer.save_report(report, full_report_path)
|
||||
|
||||
# Prepare response with summary
|
||||
response_data = {
|
||||
"status": "success",
|
||||
"filename": file.filename,
|
||||
"dataset_info": {
|
||||
"rows": len(df),
|
||||
"columns": len(df.columns),
|
||||
"features": list(df.columns)
|
||||
},
|
||||
"model_performance": {
|
||||
"accuracy": report.get("model_metrics", {}).get("accuracy", 0),
|
||||
"precision": report.get("model_metrics", {}).get("precision", 0),
|
||||
"recall": report.get("model_metrics", {}).get("recall", 0),
|
||||
"f1_score": report.get("model_metrics", {}).get("f1_score", 0)
|
||||
},
|
||||
"bias_metrics": {
|
||||
"overall_bias_score": report.get("bias_metrics", {}).get("overall_bias_score", 0),
|
||||
"disparate_impact": report.get("bias_metrics", {}).get("disparate_impact", {}),
|
||||
"statistical_parity": report.get("bias_metrics", {}).get("statistical_parity_difference", {}),
|
||||
"violations_detected": report.get("bias_metrics", {}).get("fairness_violations", [])
|
||||
},
|
||||
"risk_assessment": {
|
||||
"overall_risk_score": report.get("risk_metrics", {}).get("overall_risk_score", 0),
|
||||
"privacy_risks": report.get("risk_metrics", {}).get("privacy_risks", []),
|
||||
"ethical_risks": report.get("risk_metrics", {}).get("ethical_risks", []),
|
||||
"compliance_risks": report.get("risk_metrics", {}).get("compliance_risks", []),
|
||||
"data_quality_risks": report.get("risk_metrics", {}).get("data_quality_risks", [])
|
||||
},
|
||||
"recommendations": report.get("recommendations", []),
|
||||
"report_file": f"/{report_path}",
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Convert all numpy/pandas types to native Python types
|
||||
response_data = convert_to_serializable(response_data)
|
||||
|
||||
return JSONResponse(content=response_data)
|
||||
|
||||
except pd.errors.EmptyDataError:
|
||||
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||
except Exception as e:
|
||||
print(f"Error during analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
||||
Reference in New Issue
Block a user