Feat:Create the Basic Ai Governance Package to use has a guide

This commit is contained in:
2025-11-06 23:26:50 +05:30
parent 61d02d1dac
commit 9a3d073815
14 changed files with 47207 additions and 0 deletions

145
ai_governance/__init__.py Normal file
View File

@@ -0,0 +1,145 @@
"""
AI Governance Module - Bias Detection and Risk Analysis
"""
from .data_processor import DataProcessor
from .model_trainer import GeneralizedModelTrainer
from .bias_analyzer import BiasAnalyzer
from .risk_analyzer import RiskAnalyzer
from .report_generator import ReportGenerator, NumpyEncoder
import pandas as pd
import json
__version__ = '1.0.0'
__all__ = [
'DataProcessor',
'GeneralizedModelTrainer',
'BiasAnalyzer',
'RiskAnalyzer',
'ReportGenerator',
'NumpyEncoder',
'AIGovernanceAnalyzer'
]
class AIGovernanceAnalyzer:
"""
Main interface for AI Governance analysis
Example:
>>> analyzer = AIGovernanceAnalyzer()
>>> report = analyzer.analyze('data.csv', 'target', ['gender', 'age'])
>>> print(f"Bias Score: {report['summary']['overall_bias_score']:.3f}")
"""
def __init__(self):
"""Initialize the analyzer"""
self.processor = None
self.trainer = None
self.bias_analyzer = None
self.risk_analyzer = None
self.report_generator = None
def analyze(self, data_path, target_column, protected_attributes):
"""
Run complete AI governance analysis from file
Args:
data_path (str): Path to CSV file
target_column (str): Name of target column
protected_attributes (list): List of protected attribute column names
Returns:
dict: Complete analysis report
"""
df = pd.read_csv(data_path)
return self.analyze_dataframe(df, target_column, protected_attributes)
def analyze_dataframe(self, df, target_column, protected_attributes):
"""
Run complete AI governance analysis from DataFrame
Args:
df (pd.DataFrame): Input dataframe
target_column (str): Name of target column
protected_attributes (list): List of protected attribute column names
Returns:
dict: Complete analysis report
"""
# Step 1: Process data
self.processor = DataProcessor(df)
self.processor.target_column = target_column
self.processor.protected_attributes = protected_attributes
self.processor.prepare_data()
# Step 2: Train model
self.trainer = GeneralizedModelTrainer(
self.processor.X_train,
self.processor.X_test,
self.processor.y_train,
self.processor.y_test,
self.processor.feature_names
)
self.trainer.train()
self.trainer.evaluate()
# Step 3: Analyze bias
self.bias_analyzer = BiasAnalyzer(
self.processor.X_test,
self.processor.y_test,
self.trainer.y_pred,
self.processor.df,
self.processor.protected_attributes,
self.processor.target_column
)
bias_results = self.bias_analyzer.analyze()
# Step 4: Assess risks
self.risk_analyzer = RiskAnalyzer(
self.processor.df,
self.trainer.results,
bias_results,
self.processor.protected_attributes,
self.processor.target_column
)
risk_results = self.risk_analyzer.analyze()
# Step 5: Generate report
self.report_generator = ReportGenerator(
self.trainer.results,
bias_results,
risk_results,
self.processor.df
)
return self.report_generator.generate_report()
def save_report(self, report, output_path):
"""
Save report to JSON file
Args:
report (dict): Analysis report
output_path (str): Path to save JSON file
Returns:
str: Path to saved file
"""
with open(output_path, 'w') as f:
json.dump(report, f, indent=2, cls=NumpyEncoder)
return output_path
def get_summary(self, report):
"""
Get executive summary from report
Args:
report (dict): Analysis report
Returns:
dict: Summary metrics
"""
return report.get('summary', {})

View File

@@ -0,0 +1,288 @@
"""
Bias Analyzer Module
Detects and quantifies bias in ML models
"""
import numpy as np
import pandas as pd
from collections import defaultdict
class BiasAnalyzer:
"""Analyze bias in ML model predictions"""
def __init__(self, X_test, y_test, y_pred, original_df, protected_attributes, target_column):
self.X_test = X_test
self.y_test = y_test
self.y_pred = y_pred
self.original_df = original_df
self.protected_attributes = protected_attributes
self.target_column = target_column
self.results = {}
def analyze(self):
"""Perform comprehensive bias analysis"""
self.results = {
'demographic_bias': self._analyze_demographic_bias(),
'fairness_metrics': self._calculate_fairness_metrics(),
'fairness_violations': self._detect_fairness_violations(),
'fairness_assessment': self._assess_overall_fairness(),
'overall_bias_score': 0.0
}
# Calculate overall bias score
self.results['overall_bias_score'] = self._calculate_overall_bias_score()
return self.results
def _analyze_demographic_bias(self):
"""Analyze bias across demographic groups"""
bias_analysis = {}
for attr in self.protected_attributes:
if attr not in self.original_df.columns:
continue
# Get unique groups
groups = self.original_df[attr].unique()
# Calculate metrics for each group
group_metrics = {}
approval_rates = {}
for group in groups:
# Get indices for this group
group_mask = self.original_df[attr] == group
group_indices = self.original_df[group_mask].index
# Get test set indices that are in this group
test_indices = self.X_test.index
common_indices = group_indices.intersection(test_indices)
if len(common_indices) == 0:
continue
# Get predictions for this group
group_pred_indices = [i for i, idx in enumerate(test_indices) if idx in common_indices]
group_preds = self.y_pred[group_pred_indices] if len(group_pred_indices) > 0 else []
group_true = self.y_test.iloc[group_pred_indices] if len(group_pred_indices) > 0 else []
if len(group_preds) == 0:
continue
# Calculate approval rate (positive prediction rate)
approval_rate = np.mean(group_preds) * 100
approval_rates[str(group)] = float(approval_rate)
# Calculate accuracy for this group
accuracy = np.mean(group_preds == group_true) if len(group_true) > 0 else 0
group_metrics[str(group)] = {
'sample_size': len(group_preds),
'approval_rate': float(approval_rate),
'accuracy': float(accuracy),
'positive_predictions': int(np.sum(group_preds)),
'negative_predictions': int(len(group_preds) - np.sum(group_preds))
}
bias_analysis[attr] = {
'group_metrics': group_metrics,
'approval_rates': approval_rates,
'max_disparity': float(max(approval_rates.values()) - min(approval_rates.values())) if approval_rates else 0
}
return bias_analysis
def _calculate_fairness_metrics(self):
"""Calculate standard fairness metrics"""
fairness_metrics = {}
for attr in self.protected_attributes:
if attr not in self.original_df.columns:
continue
groups = self.original_df[attr].unique()
if len(groups) < 2:
continue
# Get metrics for each group
group_data = {}
for group in groups:
group_mask = self.original_df[attr] == group
group_indices = self.original_df[group_mask].index
test_indices = self.X_test.index
common_indices = group_indices.intersection(test_indices)
if len(common_indices) == 0:
continue
group_pred_indices = [i for i, idx in enumerate(test_indices) if idx in common_indices]
group_preds = self.y_pred[group_pred_indices]
group_true = self.y_test.iloc[group_pred_indices]
if len(group_preds) == 0:
continue
# Calculate metrics
positive_rate = np.mean(group_preds)
# True positive rate (TPR) - Recall
true_positives = np.sum((group_preds == 1) & (group_true == 1))
actual_positives = np.sum(group_true == 1)
tpr = true_positives / actual_positives if actual_positives > 0 else 0
# False positive rate (FPR)
false_positives = np.sum((group_preds == 1) & (group_true == 0))
actual_negatives = np.sum(group_true == 0)
fpr = false_positives / actual_negatives if actual_negatives > 0 else 0
group_data[str(group)] = {
'positive_rate': float(positive_rate),
'tpr': float(tpr),
'fpr': float(fpr),
'sample_size': len(group_preds)
}
if len(group_data) < 2:
continue
# Calculate disparate impact
group_names = list(group_data.keys())
reference_group = group_names[0]
comparison_group = group_names[1]
ref_positive_rate = group_data[reference_group]['positive_rate']
comp_positive_rate = group_data[comparison_group]['positive_rate']
disparate_impact = comp_positive_rate / ref_positive_rate if ref_positive_rate > 0 else 0
# Calculate statistical parity difference
statistical_parity_diff = comp_positive_rate - ref_positive_rate
# Calculate equal opportunity difference
ref_tpr = group_data[reference_group]['tpr']
comp_tpr = group_data[comparison_group]['tpr']
equal_opportunity_diff = comp_tpr - ref_tpr
fairness_metrics[attr] = {
'disparate_impact': {
'value': float(disparate_impact),
'threshold': 0.8,
'fair': 0.8 <= disparate_impact <= 1.25,
'interpretation': 'Ratio of positive rates between groups'
},
'statistical_parity_difference': {
'value': float(statistical_parity_diff),
'threshold': 0.1,
'fair': abs(statistical_parity_diff) < 0.1,
'interpretation': 'Difference in positive rates'
},
'equal_opportunity_difference': {
'value': float(equal_opportunity_diff),
'threshold': 0.1,
'fair': abs(equal_opportunity_diff) < 0.1,
'interpretation': 'Difference in true positive rates'
},
'group_metrics': group_data
}
return fairness_metrics
def _detect_fairness_violations(self):
"""Detect specific fairness violations"""
violations = []
fairness_metrics = self.results.get('fairness_metrics', {})
for attr, metrics in fairness_metrics.items():
# Check disparate impact
di = metrics.get('disparate_impact', {})
if not di.get('fair', True):
violations.append({
'attribute': attr,
'metric': 'Disparate Impact',
'value': di['value'],
'threshold': di['threshold'],
'severity': 'HIGH' if di['value'] < 0.5 or di['value'] > 2.0 else 'MEDIUM',
'message': f"Disparate impact ratio of {di['value']:.3f} violates fairness threshold (0.8-1.25)"
})
# Check statistical parity
spd = metrics.get('statistical_parity_difference', {})
if not spd.get('fair', True):
violations.append({
'attribute': attr,
'metric': 'Statistical Parity',
'value': spd['value'],
'threshold': spd['threshold'],
'severity': 'HIGH' if abs(spd['value']) > 0.2 else 'MEDIUM',
'message': f"Statistical parity difference of {spd['value']:.3f} exceeds threshold (0.1)"
})
# Check equal opportunity
eod = metrics.get('equal_opportunity_difference', {})
if not eod.get('fair', True):
violations.append({
'attribute': attr,
'metric': 'Equal Opportunity',
'value': eod['value'],
'threshold': eod['threshold'],
'severity': 'HIGH' if abs(eod['value']) > 0.2 else 'MEDIUM',
'message': f"Equal opportunity difference of {eod['value']:.3f} exceeds threshold (0.1)"
})
return violations
def _assess_overall_fairness(self):
"""Assess overall fairness of the model"""
violations = self.results.get('fairness_violations', [])
high_severity_count = sum(1 for v in violations if v['severity'] == 'HIGH')
medium_severity_count = sum(1 for v in violations if v['severity'] == 'MEDIUM')
passes_threshold = high_severity_count == 0 and medium_severity_count <= 1
assessment = {
'passes_fairness_threshold': passes_threshold,
'high_severity_violations': high_severity_count,
'medium_severity_violations': medium_severity_count,
'total_violations': len(violations),
'recommendation': self._get_fairness_recommendation(high_severity_count, medium_severity_count)
}
return assessment
def _get_fairness_recommendation(self, high_count, medium_count):
"""Get recommendation based on violation counts"""
if high_count > 0:
return "CRITICAL: Immediate action required to address high-severity fairness violations"
elif medium_count > 2:
return "WARNING: Multiple fairness issues detected. Review and address violations"
elif medium_count > 0:
return "CAUTION: Minor fairness issues detected. Monitor and consider improvements"
else:
return "GOOD: No significant fairness violations detected"
def _calculate_overall_bias_score(self):
"""Calculate overall bias score (0-1, lower is better)"""
scores = []
# Score from fairness metrics
fairness_metrics = self.results.get('fairness_metrics', {})
for attr, metrics in fairness_metrics.items():
# Disparate impact score (deviation from 1.0)
di_value = metrics.get('disparate_impact', {}).get('value', 1.0)
di_score = abs(1.0 - di_value)
scores.append(min(di_score, 1.0))
# Statistical parity score
spd_value = abs(metrics.get('statistical_parity_difference', {}).get('value', 0))
scores.append(min(spd_value * 5, 1.0)) # Scale to 0-1
# Equal opportunity score
eod_value = abs(metrics.get('equal_opportunity_difference', {}).get('value', 0))
scores.append(min(eod_value * 5, 1.0)) # Scale to 0-1
# Average all scores
overall_score = np.mean(scores) if scores else 0.0
return float(overall_score)

View File

@@ -0,0 +1,126 @@
"""
Data Processor Module
Handles data loading, preprocessing, and feature detection
"""
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import re
class DataProcessor:
"""Process and prepare data for analysis"""
def __init__(self, df):
self.df = df.copy()
self.original_df = df.copy()
self.target_column = None
self.protected_attributes = []
self.numerical_features = []
self.categorical_features = []
self.feature_names = []
self.encoders = {}
self.scaler = StandardScaler()
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
# Auto-detect column types
self._detect_column_types()
def _detect_column_types(self):
"""Automatically detect numerical and categorical columns"""
for col in self.df.columns:
if self.df[col].dtype in ['int64', 'float64']:
# Check if it's actually categorical (few unique values)
if self.df[col].nunique() < 10 and self.df[col].nunique() / len(self.df) < 0.05:
self.categorical_features.append(col)
else:
self.numerical_features.append(col)
else:
self.categorical_features.append(col)
def _detect_pii_columns(self):
"""Detect potential PII columns"""
pii_keywords = [
'name', 'email', 'phone', 'address', 'ssn', 'social',
'passport', 'license', 'id', 'zip', 'postal'
]
pii_columns = []
for col in self.df.columns:
col_lower = col.lower()
if any(keyword in col_lower for keyword in pii_keywords):
pii_columns.append(col)
return pii_columns
def prepare_data(self, test_size=0.2, random_state=42):
"""Prepare data for model training"""
# Handle missing values
self.df = self.df.dropna()
# Separate features and target
if self.target_column is None:
# Auto-detect target (last column or column with 'target', 'label', 'status')
target_candidates = [col for col in self.df.columns
if any(keyword in col.lower() for keyword in ['target', 'label', 'status', 'class'])]
self.target_column = target_candidates[0] if target_candidates else self.df.columns[-1]
# Prepare features
feature_cols = [col for col in self.df.columns if col != self.target_column]
X = self.df[feature_cols].copy()
y = self.df[self.target_column].copy()
# Encode categorical variables
for col in self.categorical_features:
if col in X.columns:
le = LabelEncoder()
X[col] = le.fit_transform(X[col].astype(str))
self.encoders[col] = le
# Store feature names
self.feature_names = X.columns.tolist()
# Split data
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state, stratify=y if y.nunique() < 10 else None
)
# Scale numerical features
numerical_cols = [col for col in self.numerical_features if col in self.X_train.columns]
if numerical_cols:
self.X_train[numerical_cols] = self.scaler.fit_transform(self.X_train[numerical_cols])
self.X_test[numerical_cols] = self.scaler.transform(self.X_test[numerical_cols])
return self.X_train, self.X_test, self.y_train, self.y_test
def get_data_summary(self):
"""Get summary statistics of the dataset"""
summary = {
'total_records': len(self.df),
'total_features': len(self.df.columns),
'numerical_features': len(self.numerical_features),
'categorical_features': len(self.categorical_features),
'missing_values': self.df.isnull().sum().to_dict(),
'target_column': self.target_column,
'protected_attributes': self.protected_attributes,
'pii_columns': self._detect_pii_columns(),
'target_distribution': self.df[self.target_column].value_counts().to_dict() if self.target_column else {}
}
return summary
def get_protected_attribute_stats(self):
"""Get statistics for protected attributes"""
stats = {}
for attr in self.protected_attributes:
if attr in self.df.columns:
stats[attr] = {
'unique_values': self.df[attr].nunique(),
'value_counts': self.df[attr].value_counts().to_dict(),
'missing_count': self.df[attr].isnull().sum()
}
return stats

View File

@@ -0,0 +1,150 @@
"""
Generalized Model Trainer Module
Trains ML models for binary/multi-class classification
"""
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
confusion_matrix, classification_report, roc_auc_score
)
import warnings
warnings.filterwarnings('ignore')
class GeneralizedModelTrainer:
"""Train and evaluate machine learning models"""
def __init__(self, X_train, X_test, y_train, y_test, feature_names):
self.X_train = X_train
self.X_test = X_test
self.y_train = y_train
self.y_test = y_test
self.feature_names = feature_names
self.model = None
self.y_pred = None
self.y_pred_proba = None
self.results = {}
def train(self, model_type='random_forest'):
"""Train the model"""
if model_type == 'random_forest':
self.model = RandomForestClassifier(
n_estimators=100,
max_depth=10,
min_samples_split=5,
min_samples_leaf=2,
random_state=42,
n_jobs=-1
)
elif model_type == 'gradient_boosting':
self.model = GradientBoostingClassifier(
n_estimators=100,
max_depth=5,
learning_rate=0.1,
random_state=42
)
elif model_type == 'logistic_regression':
self.model = LogisticRegression(
max_iter=1000,
random_state=42,
n_jobs=-1
)
# Train the model
self.model.fit(self.X_train, self.y_train)
# Make predictions
self.y_pred = self.model.predict(self.X_test)
# Get prediction probabilities
if hasattr(self.model, 'predict_proba'):
self.y_pred_proba = self.model.predict_proba(self.X_test)
return self.model
def evaluate(self):
"""Evaluate model performance"""
# Calculate metrics
accuracy = accuracy_score(self.y_test, self.y_pred)
# Handle binary and multi-class cases
average = 'binary' if len(np.unique(self.y_test)) == 2 else 'weighted'
precision = precision_score(self.y_test, self.y_pred, average=average, zero_division=0)
recall = recall_score(self.y_test, self.y_pred, average=average, zero_division=0)
f1 = f1_score(self.y_test, self.y_pred, average=average, zero_division=0)
# Confusion matrix
cm = confusion_matrix(self.y_test, self.y_pred)
# Classification report
report = classification_report(self.y_test, self.y_pred, output_dict=True, zero_division=0)
# ROC AUC (for binary classification)
roc_auc = None
if len(np.unique(self.y_test)) == 2 and self.y_pred_proba is not None:
try:
roc_auc = roc_auc_score(self.y_test, self.y_pred_proba[:, 1])
except:
roc_auc = None
# Feature importance
feature_importance = {}
if hasattr(self.model, 'feature_importances_'):
importances = self.model.feature_importances_
feature_importance = dict(zip(self.feature_names, importances))
# Sort by importance
feature_importance = dict(sorted(feature_importance.items(), key=lambda x: x[1], reverse=True))
# Store results
self.results = {
'model_type': type(self.model).__name__,
'metrics': {
'accuracy': float(accuracy),
'precision': float(precision),
'recall': float(recall),
'f1': float(f1),
'roc_auc': float(roc_auc) if roc_auc else None
},
'confusion_matrix': cm.tolist(),
'classification_report': report,
'feature_importance': feature_importance,
'predictions': {
'y_true': self.y_test.tolist() if hasattr(self.y_test, 'tolist') else list(self.y_test),
'y_pred': self.y_pred.tolist() if hasattr(self.y_pred, 'tolist') else list(self.y_pred)
}
}
return self.results
def get_model_complexity(self):
"""Assess model complexity for risk analysis"""
complexity = {
'interpretability': 'medium',
'complexity_score': 0.5
}
if isinstance(self.model, LogisticRegression):
complexity['interpretability'] = 'high'
complexity['complexity_score'] = 0.2
elif isinstance(self.model, (RandomForestClassifier, GradientBoostingClassifier)):
complexity['interpretability'] = 'medium'
complexity['complexity_score'] = 0.6
return complexity
def predict(self, X):
"""Make predictions on new data"""
if self.model is None:
raise ValueError("Model not trained yet")
return self.model.predict(X)
def predict_proba(self, X):
"""Get prediction probabilities"""
if self.model is None:
raise ValueError("Model not trained yet")
if hasattr(self.model, 'predict_proba'):
return self.model.predict_proba(X)
return None

View File

@@ -0,0 +1,263 @@
"""
Report Generator Module
Generates comprehensive JSON reports
"""
import json
import numpy as np
from datetime import datetime
class NumpyEncoder(json.JSONEncoder):
"""Custom JSON encoder for numpy types"""
def default(self, obj):
if isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
elif isinstance(obj, (np.floating, np.float64, np.float32)):
return float(obj)
elif isinstance(obj, (np.ndarray,)):
return obj.tolist()
elif isinstance(obj, (np.bool_,)):
return bool(obj)
return super(NumpyEncoder, self).default(obj)
class ReportGenerator:
"""Generate comprehensive analysis reports"""
def __init__(self, model_results, bias_results, risk_results, df):
self.model_results = model_results
self.bias_results = bias_results
self.risk_results = risk_results
self.df = df
def generate_report(self):
"""Generate comprehensive JSON report"""
report = {
'metadata': self._generate_metadata(),
'summary': self._generate_summary(),
'model_performance': self._format_model_results(),
'bias_analysis': self._format_bias_results(),
'risk_assessment': self._format_risk_results(),
'key_findings': self._extract_key_findings(),
'recommendations': self._compile_recommendations(),
'detailed_metrics': self._compile_detailed_metrics()
}
return report
def _generate_metadata(self):
"""Generate report metadata"""
return {
'report_id': f"AIGov_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
'generated_at': datetime.now().isoformat(),
'report_version': '1.0',
'dataset_info': {
'total_records': len(self.df),
'total_features': len(self.df.columns),
'columns': list(self.df.columns)
}
}
def _generate_summary(self):
"""Generate executive summary"""
model_metrics = self.model_results.get('metrics', {})
return {
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
'model_accuracy': model_metrics.get('accuracy', 0.0),
'fairness_violations_count': len(self.bias_results.get('fairness_violations', [])),
'passes_fairness_threshold': self.bias_results.get('fairness_assessment', {}).get('passes_fairness_threshold', False)
}
def _format_model_results(self):
"""Format model performance results"""
return {
'model_type': self.model_results.get('model_type', 'Unknown'),
'metrics': self.model_results.get('metrics', {}),
'confusion_matrix': self.model_results.get('confusion_matrix', []),
'top_features': dict(list(self.model_results.get('feature_importance', {}).items())[:10])
}
def _format_bias_results(self):
"""Format bias analysis results"""
return {
'overall_bias_score': self.bias_results.get('overall_bias_score', 0.0),
'fairness_metrics': self.bias_results.get('fairness_metrics', {}),
'fairness_violations': self.bias_results.get('fairness_violations', []),
'fairness_assessment': self.bias_results.get('fairness_assessment', {}),
'demographic_bias_summary': self._summarize_demographic_bias()
}
def _format_risk_results(self):
"""Format risk assessment results"""
return {
'overall_risk_score': self.risk_results.get('overall_risk_score', 0.0),
'risk_level': self.risk_results.get('risk_level', 'UNKNOWN'),
'risk_categories': self.risk_results.get('risk_categories', {}),
'privacy_risks': self._summarize_privacy_risks(),
'ethical_risks': self._summarize_ethical_risks()
}
def _summarize_demographic_bias(self):
"""Summarize demographic bias"""
demo_bias = self.bias_results.get('demographic_bias', {})
summary = {}
for attr, data in demo_bias.items():
summary[attr] = {
'max_disparity': data.get('max_disparity', 0),
'groups_analyzed': len(data.get('approval_rates', {}))
}
return summary
def _summarize_privacy_risks(self):
"""Summarize privacy risks"""
privacy = self.risk_results.get('privacy_risks', {})
return {
'pii_count': len(privacy.get('pii_detected', [])),
'anonymization_level': privacy.get('anonymization_level', 'UNKNOWN'),
'exposure_risk_count': len(privacy.get('exposure_risks', [])),
'gdpr_compliance_score': privacy.get('gdpr_compliance', {}).get('compliance_score', 0)
}
def _summarize_ethical_risks(self):
"""Summarize ethical risks"""
ethical = self.risk_results.get('ethical_risks', {})
return {
'fairness_issues_count': len(ethical.get('fairness_issues', [])),
'transparency_score': ethical.get('transparency_score', 0),
'bias_amplification_risk': ethical.get('bias_amplification_risk', 'UNKNOWN'),
'social_impact': ethical.get('social_impact_assessment', {})
}
def _extract_key_findings(self):
"""Extract key findings from analysis"""
findings = []
# Model performance findings
accuracy = self.model_results.get('metrics', {}).get('accuracy', 0)
if accuracy >= 0.8:
findings.append(f"✓ Model achieves good accuracy ({accuracy:.2%})")
else:
findings.append(f"⚠ Model accuracy is below optimal ({accuracy:.2%})")
# Bias findings
bias_score = self.bias_results.get('overall_bias_score', 0)
if bias_score < 0.3:
findings.append("✓ Low bias detected across protected attributes")
elif bias_score < 0.5:
findings.append("⚠ Moderate bias detected - monitoring recommended")
else:
findings.append("❌ High bias detected - immediate action required")
# Fairness violations
violations = self.bias_results.get('fairness_violations', [])
if violations:
high_sev = sum(1 for v in violations if v['severity'] == 'HIGH')
findings.append(f"{len(violations)} fairness violations detected ({high_sev} high severity)")
else:
findings.append("✓ No fairness violations detected")
# Privacy findings
privacy = self.risk_results.get('privacy_risks', {})
pii_count = len(privacy.get('pii_detected', []))
if pii_count > 0:
findings.append(f"{pii_count} columns contain potential PII")
else:
findings.append("✓ No obvious PII detected in dataset")
# Risk level
risk_level = self.risk_results.get('risk_level', 'UNKNOWN')
findings.append(f"Overall Risk Level: {risk_level}")
return findings
def _compile_recommendations(self):
"""Compile all recommendations"""
recommendations = []
# Get recommendations from each component
privacy_recs = self.risk_results.get('privacy_risks', {}).get('recommendations', [])
ethical_recs = self.risk_results.get('ethical_risks', {}).get('recommendations', [])
performance_recs = self.risk_results.get('model_performance_risks', {}).get('recommendations', [])
compliance_recs = self.risk_results.get('compliance_risks', {}).get('recommendations', [])
# Prioritize recommendations
all_recs = []
# High priority (from violations and high risks)
violations = self.bias_results.get('fairness_violations', [])
if violations:
all_recs.append({
'priority': 'HIGH',
'category': 'Fairness',
'recommendation': 'Address fairness violations in protected attributes'
})
if len(privacy_recs) > 0:
all_recs.append({
'priority': 'HIGH',
'category': 'Privacy',
'recommendation': privacy_recs[0]
})
# Medium priority
for rec in ethical_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Ethics',
'recommendation': rec
})
# Lower priority
for rec in performance_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Performance',
'recommendation': rec
})
for rec in compliance_recs[:2]:
all_recs.append({
'priority': 'MEDIUM',
'category': 'Compliance',
'recommendation': rec
})
# Convert to simple list with formatting
recommendations = [
f"[{r['priority']}] {r['category']}: {r['recommendation']}"
for r in all_recs[:10] # Limit to top 10
]
return recommendations
def _compile_detailed_metrics(self):
"""Compile detailed metrics for analysis"""
return {
'bias_metrics': {
'by_attribute': self.bias_results.get('fairness_metrics', {}),
'demographic_analysis': self.bias_results.get('demographic_bias', {})
},
'risk_breakdown': {
'privacy': self.risk_results.get('privacy_risks', {}),
'ethical': self.risk_results.get('ethical_risks', {}),
'compliance': self.risk_results.get('compliance_risks', {}),
'data_quality': self.risk_results.get('data_quality_risks', {})
},
'model_details': {
'classification_report': self.model_results.get('classification_report', {}),
'feature_importance': self.model_results.get('feature_importance', {})
}
}
def save_report(self, filepath):
"""Save report to JSON file"""
report = self.generate_report()
with open(filepath, 'w') as f:
json.dump(report, f, indent=2, cls=NumpyEncoder)
return filepath

View File

@@ -0,0 +1,445 @@
"""
Risk Analyzer Module
Assesses privacy and ethical risks in AI models
"""
import pandas as pd
import numpy as np
import re
from datetime import datetime
class RiskAnalyzer:
"""Analyze privacy and ethical risks"""
def __init__(self, df, model_results, bias_results, protected_attributes, target_column):
self.df = df
self.model_results = model_results
self.bias_results = bias_results
self.protected_attributes = protected_attributes
self.target_column = target_column
self.results = {}
def analyze(self):
"""Perform comprehensive risk analysis"""
self.results = {
'privacy_risks': self._analyze_privacy_risks(),
'ethical_risks': self._analyze_ethical_risks(),
'model_performance_risks': self._analyze_model_performance_risks(),
'compliance_risks': self._analyze_compliance_risks(),
'data_quality_risks': self._analyze_data_quality_risks(),
'risk_categories': {},
'overall_risk_score': 0.0,
'risk_level': 'UNKNOWN'
}
# Aggregate risk categories
self.results['risk_categories'] = self._aggregate_risk_categories()
# Calculate overall risk score
self.results['overall_risk_score'] = self._calculate_overall_risk_score()
# Determine risk level
self.results['risk_level'] = self._determine_risk_level()
return self.results
def _analyze_privacy_risks(self):
"""Analyze privacy-related risks"""
privacy_risks = {
'pii_detected': [],
'sensitive_attributes': self.protected_attributes,
'data_minimization_score': 0.0,
'anonymization_level': 'NONE',
'exposure_risks': [],
'gdpr_compliance': {},
'recommendations': []
}
# Detect PII columns
pii_patterns = {
'email': r'^.*email.*$',
'phone': r'^.*(phone|mobile|tel).*$',
'address': r'^.*(address|street|city|zip|postal).*$',
'name': r'^.*(name|firstname|lastname).*$',
'ssn': r'^.*(ssn|social.*security).*$',
'id': r'^.*(id|identifier|passport|license).*$',
'dob': r'^.*(dob|birth|birthday).*$',
'age': r'^.*age.*$',
'gender': r'^.*gender.*$'
}
for col in self.df.columns:
col_lower = col.lower()
for pii_type, pattern in pii_patterns.items():
if re.match(pattern, col_lower):
privacy_risks['pii_detected'].append({
'column': col,
'type': pii_type,
'severity': 'HIGH' if pii_type in ['ssn', 'email', 'phone'] else 'MEDIUM'
})
break
# Check data minimization
total_cols = len(self.df.columns)
essential_cols = len([col for col in self.df.columns if col != self.target_column])
privacy_risks['data_minimization_score'] = 1.0 - (essential_cols / total_cols) if total_cols > 0 else 0.0
# Assess anonymization level
if len(privacy_risks['pii_detected']) > 5:
privacy_risks['anonymization_level'] = 'NONE'
privacy_risks['exposure_risks'].append("High number of PII columns detected without anonymization")
elif len(privacy_risks['pii_detected']) > 0:
privacy_risks['anonymization_level'] = 'PARTIAL'
privacy_risks['exposure_risks'].append("Some PII columns detected - consider anonymization")
else:
privacy_risks['anonymization_level'] = 'FULL'
# GDPR compliance checks
privacy_risks['gdpr_compliance'] = {
'has_consent_mechanism': False, # Cannot determine from data alone
'data_portability': True, # CSV format allows export
'right_to_erasure': False, # Cannot determine from data alone
'data_protection_by_design': len(privacy_risks['pii_detected']) == 0,
'compliance_score': 0.25
}
# Recommendations
if len(privacy_risks['pii_detected']) > 0:
privacy_risks['recommendations'].append("Implement data anonymization techniques (hashing, tokenization)")
privacy_risks['recommendations'].append("Remove unnecessary PII columns")
privacy_risks['recommendations'].append("Implement access controls for sensitive data")
privacy_risks['recommendations'].append("Implement data encryption at rest and in transit")
privacy_risks['recommendations'].append("Establish data retention and deletion policies")
privacy_risks['recommendations'].append("Conduct regular privacy impact assessments")
return privacy_risks
def _analyze_ethical_risks(self):
"""Analyze ethical risks"""
ethical_risks = {
'fairness_issues': [],
'transparency_score': 0.0,
'transparency_notes': '',
'accountability_measures': [],
'social_impact_assessment': {},
'bias_amplification_risk': 'UNKNOWN',
'recommendations': []
}
# Fairness issues from bias analysis
violations = self.bias_results.get('fairness_violations', [])
for violation in violations:
ethical_risks['fairness_issues'].append(
f"{violation['attribute']}: {violation['message']} (Severity: {violation['severity']})"
)
# Transparency score based on model complexity
model_type = self.model_results.get('model_type', 'Unknown')
if model_type in ['LogisticRegression', 'DecisionTreeClassifier']:
ethical_risks['transparency_score'] = 0.9
ethical_risks['transparency_notes'] = "Model is highly interpretable"
elif model_type in ['RandomForestClassifier', 'GradientBoostingClassifier']:
ethical_risks['transparency_score'] = 0.6
ethical_risks['transparency_notes'] = "Model has moderate interpretability - feature importance available"
else:
ethical_risks['transparency_score'] = 0.3
ethical_risks['transparency_notes'] = "Model has low interpretability - consider using SHAP/LIME"
# Accountability measures
ethical_risks['accountability_measures'] = [
"Model versioning and tracking",
"Prediction logging for audit trail",
"Regular bias monitoring",
"Human review for high-stakes decisions"
]
# Social impact assessment
ethical_risks['social_impact_assessment'] = {
'affected_groups': self.protected_attributes,
'potential_harms': [
"Unfair denial of opportunities for protected groups",
"Reinforcement of historical biases",
"Lack of recourse for affected individuals"
],
'mitigation_strategies': [
"Regular fairness audits",
"Diverse dataset collection",
"Stakeholder engagement",
"Appeal and review mechanisms"
]
}
# Bias amplification risk
overall_bias = self.bias_results.get('overall_bias_score', 0)
if overall_bias > 0.5:
ethical_risks['bias_amplification_risk'] = 'HIGH'
elif overall_bias > 0.3:
ethical_risks['bias_amplification_risk'] = 'MEDIUM'
else:
ethical_risks['bias_amplification_risk'] = 'LOW'
# Recommendations
ethical_risks['recommendations'] = [
"Implement regular fairness audits and monitoring",
"Use explainable AI techniques (SHAP, LIME) for transparency",
"Establish ethics review board for model deployment",
"Create feedback mechanisms for affected individuals",
"Document decision-making processes and limitations",
"Provide clear communication about model capabilities and limitations"
]
return ethical_risks
def _analyze_model_performance_risks(self):
"""Analyze risks related to model performance"""
risks = {
'performance_gaps': [],
'overfitting_risk': 'UNKNOWN',
'underfitting_risk': 'UNKNOWN',
'reliability_score': 0.0,
'recommendations': []
}
metrics = self.model_results.get('metrics', {})
accuracy = metrics.get('accuracy', 0)
precision = metrics.get('precision', 0)
recall = metrics.get('recall', 0)
# Check for performance issues
if accuracy < 0.7:
risks['performance_gaps'].append("Low overall accuracy - model may not be reliable")
risks['underfitting_risk'] = 'HIGH'
if precision < 0.6:
risks['performance_gaps'].append("Low precision - high false positive rate")
if recall < 0.6:
risks['performance_gaps'].append("Low recall - missing many positive cases")
# Calculate reliability score
risks['reliability_score'] = (accuracy + precision + recall) / 3
# Recommendations
if accuracy < 0.7:
risks['recommendations'].append("Consider more complex models or feature engineering")
risks['recommendations'].append("Collect more training data")
if precision < 0.6 or recall < 0.6:
risks['recommendations'].append("Adjust classification threshold")
risks['recommendations'].append("Address class imbalance")
risks['recommendations'].append("Implement continuous monitoring of model performance")
risks['recommendations'].append("Set up alerts for performance degradation")
return risks
def _analyze_compliance_risks(self):
"""Analyze regulatory compliance risks"""
risks = {
'regulatory_frameworks': [],
'compliance_gaps': [],
'audit_readiness': 'LOW',
'documentation_completeness': 0.0,
'recommendations': []
}
# Identify applicable frameworks
risks['regulatory_frameworks'] = [
'GDPR (General Data Protection Regulation)',
'CCPA (California Consumer Privacy Act)',
'AI Act (EU)',
'Fair Credit Reporting Act (if applicable)'
]
# Identify compliance gaps
privacy_risks = self.results.get('privacy_risks', {}) if 'privacy_risks' in self.results else {}
if len(privacy_risks.get('pii_detected', [])) > 0:
risks['compliance_gaps'].append("Unprotected PII may violate GDPR/CCPA requirements")
if len(self.bias_results.get('fairness_violations', [])) > 0:
risks['compliance_gaps'].append("Fairness violations may violate anti-discrimination laws")
if not privacy_risks.get('gdpr_compliance', {}).get('data_protection_by_design', False):
risks['compliance_gaps'].append("Lack of privacy by design principles")
# Assess audit readiness
if len(risks['compliance_gaps']) == 0:
risks['audit_readiness'] = 'HIGH'
elif len(risks['compliance_gaps']) <= 2:
risks['audit_readiness'] = 'MEDIUM'
else:
risks['audit_readiness'] = 'LOW'
# Documentation completeness (placeholder - would need more info)
risks['documentation_completeness'] = 0.4
# Recommendations
risks['recommendations'] = [
"Conduct comprehensive privacy impact assessment",
"Document data lineage and processing activities",
"Implement data subject rights (access, deletion, portability)",
"Establish regular compliance audits",
"Create model cards documenting intended use and limitations",
"Implement model monitoring and incident response procedures"
]
return risks
def _analyze_data_quality_risks(self):
"""Analyze data quality risks"""
risks = {
'missing_data': {},
'data_imbalance': {},
'outlier_risk': 'UNKNOWN',
'data_quality_score': 0.0,
'recommendations': []
}
# Missing data analysis
missing_counts = self.df.isnull().sum()
missing_pct = (missing_counts / len(self.df)) * 100
for col in self.df.columns:
if missing_pct[col] > 5:
risks['missing_data'][col] = {
'count': int(missing_counts[col]),
'percentage': float(missing_pct[col])
}
# Class imbalance
if self.target_column in self.df.columns:
target_dist = self.df[self.target_column].value_counts()
imbalance_ratio = target_dist.max() / target_dist.min() if len(target_dist) > 1 else 1.0
risks['data_imbalance'] = {
'ratio': float(imbalance_ratio),
'distribution': target_dist.to_dict(),
'severe': imbalance_ratio > 5
}
# Calculate data quality score
missing_score = 1.0 - (len(risks['missing_data']) / len(self.df.columns))
imbalance_score = 1.0 / (1.0 + np.log1p(risks['data_imbalance'].get('ratio', 1) - 1))
risks['data_quality_score'] = (missing_score + imbalance_score) / 2
# Recommendations
if len(risks['missing_data']) > 0:
risks['recommendations'].append("Address missing data through imputation or removal")
if risks['data_imbalance'].get('severe', False):
risks['recommendations'].append("Use resampling techniques (SMOTE) to address class imbalance")
risks['recommendations'].append("Consider adjusting class weights in model training")
risks['recommendations'].append("Implement data validation pipelines")
risks['recommendations'].append("Monitor data drift over time")
return risks
def _aggregate_risk_categories(self):
"""Aggregate risks into categories with scores"""
categories = {}
# Privacy risks
privacy = self.results.get('privacy_risks', {})
privacy_score = self._calculate_privacy_risk_score(privacy)
categories['privacy_risks'] = {
'score': privacy_score,
'level': self._score_to_level(privacy_score),
'issues': [
f"{len(privacy['pii_detected'])} PII columns detected",
f"Anonymization level: {privacy['anonymization_level']}"
],
'recommendations': privacy['recommendations'][:3]
}
# Ethical risks
ethical = self.results.get('ethical_risks', {})
ethical_score = self._calculate_ethical_risk_score(ethical)
categories['ethical_risks'] = {
'score': ethical_score,
'level': self._score_to_level(ethical_score),
'issues': ethical['fairness_issues'][:3],
'recommendations': ethical['recommendations'][:3]
}
# Model performance risks
performance = self.results.get('model_performance_risks', {})
performance_score = 1.0 - performance.get('reliability_score', 0.5)
categories['model_performance_risks'] = {
'score': performance_score,
'level': self._score_to_level(performance_score),
'issues': performance['performance_gaps'],
'recommendations': performance['recommendations'][:3]
}
# Compliance risks
compliance = self.results.get('compliance_risks', {})
compliance_score = len(compliance['compliance_gaps']) / 10.0
categories['compliance_risks'] = {
'score': min(compliance_score, 1.0),
'level': self._score_to_level(min(compliance_score, 1.0)),
'issues': compliance['compliance_gaps'],
'recommendations': compliance['recommendations'][:3]
}
# Data quality risks
data_quality = self.results.get('data_quality_risks', {})
data_quality_score = 1.0 - data_quality.get('data_quality_score', 0.5)
categories['data_quality_risks'] = {
'score': data_quality_score,
'level': self._score_to_level(data_quality_score),
'issues': [
f"{len(data_quality['missing_data'])} columns with missing data",
f"Class imbalance ratio: {data_quality['data_imbalance'].get('ratio', 1):.2f}"
],
'recommendations': data_quality['recommendations'][:3]
}
return categories
def _calculate_privacy_risk_score(self, privacy_risks):
"""Calculate privacy risk score (0-1, higher is worse)"""
pii_count = len(privacy_risks.get('pii_detected', []))
pii_score = min(pii_count / 10, 1.0)
anon_level = privacy_risks.get('anonymization_level', 'NONE')
anon_score = {'FULL': 0.0, 'PARTIAL': 0.5, 'NONE': 1.0}.get(anon_level, 0.5)
gdpr_score = 1.0 - privacy_risks.get('gdpr_compliance', {}).get('compliance_score', 0)
return (pii_score * 0.4 + anon_score * 0.3 + gdpr_score * 0.3)
def _calculate_ethical_risk_score(self, ethical_risks):
"""Calculate ethical risk score (0-1, higher is worse)"""
fairness_score = len(ethical_risks.get('fairness_issues', [])) / 10
transparency_score = 1.0 - ethical_risks.get('transparency_score', 0.5)
bias_amp = ethical_risks.get('bias_amplification_risk', 'MEDIUM')
bias_score = {'LOW': 0.2, 'MEDIUM': 0.5, 'HIGH': 0.9}.get(bias_amp, 0.5)
return (fairness_score * 0.4 + transparency_score * 0.3 + bias_score * 0.3)
def _calculate_overall_risk_score(self):
"""Calculate overall risk score"""
category_scores = []
for category, details in self.results.get('risk_categories', {}).items():
category_scores.append(details['score'])
overall = np.mean(category_scores) if category_scores else 0.5
return float(min(overall, 1.0))
def _determine_risk_level(self):
"""Determine overall risk level"""
score = self.results.get('overall_risk_score', 0.5)
return self._score_to_level(score)
def _score_to_level(self, score):
"""Convert score to risk level"""
if score >= 0.7:
return 'HIGH'
elif score >= 0.4:
return 'MEDIUM'
else:
return 'LOW'