mirror of
https://github.com/PlatypusPus/MushroomEmpire.git
synced 2026-02-07 22:18:59 +00:00
2
.gitignore
vendored
2
.gitignore
vendored
@@ -74,4 +74,4 @@ frontend/nordic-privacy-ai/.next/
|
|||||||
frontend/nordic-privacy-ai/out/
|
frontend/nordic-privacy-ai/out/
|
||||||
frontend/nordic-privacy-ai/node_modules/
|
frontend/nordic-privacy-ai/node_modules/
|
||||||
|
|
||||||
Data
|
Datamain.py
|
||||||
|
|||||||
BIN
GDPRArticles.pdf
Normal file
BIN
GDPRArticles.pdf
Normal file
Binary file not shown.
@@ -8,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from api.routers import analyze, clean, discovery
|
from api.routers import analyze, clean, discovery, detect_pii
|
||||||
|
|
||||||
# Create FastAPI app
|
# Create FastAPI app
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
@@ -37,6 +37,7 @@ app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
|
|||||||
# Include routers
|
# Include routers
|
||||||
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
|
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
|
||||||
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
|
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
|
||||||
|
app.include_router(detect_pii.router, prefix="/api", tags=["PII Detection"])
|
||||||
app.include_router(discovery.router, prefix="/api", tags=["Discover sources"])
|
app.include_router(discovery.router, prefix="/api", tags=["Discover sources"])
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
|
|||||||
224
api/routers/detect_pii.py
Normal file
224
api/routers/detect_pii.py
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
"""
|
||||||
|
PII Detection Router
|
||||||
|
Detects risky features WITHOUT anonymizing them
|
||||||
|
Returns risk classification for user review
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, File, UploadFile, HTTPException
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
# Import cleaning module
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||||
|
from data_cleaning.cleaner import DataCleaner
|
||||||
|
from data_cleaning.config import (
|
||||||
|
ENTITY_STRATEGY_MAP,
|
||||||
|
STRATEGIES,
|
||||||
|
GDPR_COMPLIANCE,
|
||||||
|
COLUMN_CONTEXT_FILTERS,
|
||||||
|
EXCLUSION_PATTERNS,
|
||||||
|
get_strategy_for_entity,
|
||||||
|
get_risk_level
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_serializable(obj):
|
||||||
|
"""Convert numpy/pandas types to native Python types for JSON serialization"""
|
||||||
|
if isinstance(obj, (np.integer, np.int64, np.int32)):
|
||||||
|
return int(obj)
|
||||||
|
elif isinstance(obj, (np.floating, np.float64, np.float32)):
|
||||||
|
return float(obj)
|
||||||
|
elif isinstance(obj, np.ndarray):
|
||||||
|
return obj.tolist()
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [convert_to_serializable(item) for item in obj]
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/detect-pii")
|
||||||
|
async def detect_pii(file: UploadFile = File(...)):
|
||||||
|
"""
|
||||||
|
Detect PII in uploaded file WITHOUT anonymizing
|
||||||
|
|
||||||
|
- **file**: CSV, JSON, or TXT file to analyze for PII
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- List of risky features with severity and recommended strategies
|
||||||
|
- Detection confidence scores
|
||||||
|
- GDPR article references
|
||||||
|
- Example values for review
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read uploaded file
|
||||||
|
contents = await file.read()
|
||||||
|
file_extension = os.path.splitext(file.filename)[1].lower()
|
||||||
|
|
||||||
|
# Determine file type and parse accordingly
|
||||||
|
if file_extension == '.csv':
|
||||||
|
df = pd.read_csv(io.BytesIO(contents))
|
||||||
|
file_type = 'csv'
|
||||||
|
elif file_extension == '.json':
|
||||||
|
df = pd.read_json(io.BytesIO(contents))
|
||||||
|
file_type = 'json'
|
||||||
|
elif file_extension in ['.txt', '.text']:
|
||||||
|
# For plain text, create a single-column dataframe
|
||||||
|
text_content = contents.decode('utf-8', errors='ignore')
|
||||||
|
# Split into lines for better granularity
|
||||||
|
lines = [line.strip() for line in text_content.split('\n') if line.strip()]
|
||||||
|
df = pd.DataFrame({'text_content': lines})
|
||||||
|
file_type = 'text'
|
||||||
|
else:
|
||||||
|
# Try to auto-detect format
|
||||||
|
try:
|
||||||
|
# Try CSV first
|
||||||
|
df = pd.read_csv(io.BytesIO(contents))
|
||||||
|
file_type = 'csv'
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
# Try JSON
|
||||||
|
df = pd.read_json(io.BytesIO(contents))
|
||||||
|
file_type = 'json'
|
||||||
|
except:
|
||||||
|
# Fall back to plain text
|
||||||
|
text_content = contents.decode('utf-8', errors='ignore')
|
||||||
|
lines = [line.strip() for line in text_content.split('\n') if line.strip()]
|
||||||
|
df = pd.DataFrame({'text_content': lines})
|
||||||
|
file_type = 'text'
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||||
|
|
||||||
|
print(f"Detecting PII in: {file.filename} ({file_type} format, {len(df)} rows, {len(df.columns)} columns)")
|
||||||
|
|
||||||
|
# Initialize Data Cleaner (with GPU if available)
|
||||||
|
cleaner = DataCleaner(df, use_gpu=True)
|
||||||
|
|
||||||
|
# Detect PII without cleaning
|
||||||
|
pii_detections = cleaner._detect_pii(
|
||||||
|
df=df,
|
||||||
|
risky_columns=None, # Scan all columns
|
||||||
|
scan_all_cells=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Classify by risk level
|
||||||
|
risk_classification = cleaner._classify_risk(pii_detections)
|
||||||
|
|
||||||
|
# Build response with detailed feature information
|
||||||
|
risky_features = []
|
||||||
|
|
||||||
|
for risk_level in ['HIGH', 'MEDIUM', 'LOW', 'UNKNOWN']:
|
||||||
|
detections = risk_classification[risk_level]
|
||||||
|
|
||||||
|
for column, entities in detections.items():
|
||||||
|
for entity_info in entities:
|
||||||
|
entity_type = entity_info['entity_type']
|
||||||
|
strategy = entity_info['strategy']
|
||||||
|
|
||||||
|
# Get example values from the column (first 3 non-null)
|
||||||
|
sample_values = df[column].dropna().head(5).astype(str).tolist()
|
||||||
|
|
||||||
|
# Get GDPR article
|
||||||
|
gdpr_article = GDPR_COMPLIANCE.get(entity_type, 'Not classified')
|
||||||
|
|
||||||
|
# Get strategy details
|
||||||
|
strategy_details = STRATEGIES.get(strategy, {})
|
||||||
|
|
||||||
|
risky_features.append({
|
||||||
|
'column': column,
|
||||||
|
'entity_type': entity_type,
|
||||||
|
'risk_level': risk_level,
|
||||||
|
'confidence': float(entity_info['confidence']),
|
||||||
|
'detection_count': int(entity_info['count']),
|
||||||
|
'recommended_strategy': strategy,
|
||||||
|
'strategy_description': strategy_details.get('description', ''),
|
||||||
|
'reversible': strategy_details.get('reversible', False),
|
||||||
|
'use_cases': strategy_details.get('use_cases', []),
|
||||||
|
'gdpr_article': gdpr_article,
|
||||||
|
'sample_values': sample_values[:3], # Show 3 examples
|
||||||
|
'explanation': _generate_risk_explanation(entity_type, risk_level, strategy)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by risk level (HIGH -> MEDIUM -> LOW)
|
||||||
|
risk_order = {'HIGH': 0, 'MEDIUM': 1, 'LOW': 2, 'UNKNOWN': 3}
|
||||||
|
risky_features.sort(key=lambda x: (risk_order[x['risk_level']], x['column']))
|
||||||
|
|
||||||
|
# Prepare summary statistics
|
||||||
|
summary = {
|
||||||
|
'total_columns_scanned': len(df.columns),
|
||||||
|
'risky_columns_found': len(set(f['column'] for f in risky_features)),
|
||||||
|
'high_risk_count': sum(1 for f in risky_features if f['risk_level'] == 'HIGH'),
|
||||||
|
'medium_risk_count': sum(1 for f in risky_features if f['risk_level'] == 'MEDIUM'),
|
||||||
|
'low_risk_count': sum(1 for f in risky_features if f['risk_level'] == 'LOW'),
|
||||||
|
'unique_entity_types': len(set(f['entity_type'] for f in risky_features))
|
||||||
|
}
|
||||||
|
|
||||||
|
response_data = {
|
||||||
|
'status': 'success',
|
||||||
|
'filename': file.filename,
|
||||||
|
'file_type': file_type,
|
||||||
|
'dataset_info': {
|
||||||
|
'rows': len(df),
|
||||||
|
'columns': len(df.columns),
|
||||||
|
'column_names': df.columns.tolist()
|
||||||
|
},
|
||||||
|
'summary': summary,
|
||||||
|
'risky_features': risky_features,
|
||||||
|
'available_strategies': STRATEGIES,
|
||||||
|
'message': f"Found {summary['risky_columns_found']} columns with PII ({summary['high_risk_count']} HIGH risk, {summary['medium_risk_count']} MEDIUM risk, {summary['low_risk_count']} LOW risk)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert all numpy/pandas types to native Python types
|
||||||
|
response_data = convert_to_serializable(response_data)
|
||||||
|
|
||||||
|
return JSONResponse(content=response_data)
|
||||||
|
|
||||||
|
except pd.errors.EmptyDataError:
|
||||||
|
raise HTTPException(status_code=400, detail="File is empty or invalid CSV format")
|
||||||
|
except ImportError as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Presidio not installed. Please install: pip install presidio-analyzer presidio-anonymizer")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during PII detection: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
raise HTTPException(status_code=500, detail=f"PII detection failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_risk_explanation(entity_type: str, risk_level: str, strategy: str) -> str:
|
||||||
|
"""Generate human-readable explanation for why a feature is risky"""
|
||||||
|
|
||||||
|
explanations = {
|
||||||
|
'CREDIT_CARD': "Credit card numbers are highly sensitive financial identifiers protected under GDPR Art. 4(1) and PCI-DSS regulations. Unauthorized disclosure can lead to fraud and identity theft.",
|
||||||
|
'US_SSN': "Social Security Numbers are government-issued identifiers that can be used for identity theft. They are strictly protected under US federal law and GDPR Art. 4(1).",
|
||||||
|
'EMAIL_ADDRESS': "Email addresses are personal identifiers under GDPR Art. 4(1) that can be used to re-identify individuals and track behavior across services.",
|
||||||
|
'PHONE_NUMBER': "Phone numbers are direct personal identifiers under GDPR Art. 4(1) that enable contact and can be used to track individuals.",
|
||||||
|
'PERSON': "Personal names are explicit identifiers under GDPR Art. 4(1) that directly identify individuals and must be protected in datasets.",
|
||||||
|
'LOCATION': "Location data reveals personal information about individuals' movements and residence, protected under GDPR Art. 4(1) as personal data.",
|
||||||
|
'IP_ADDRESS': "IP addresses are online identifiers under GDPR Art. 4(1) that can be used to track individuals across the internet.",
|
||||||
|
'DATE_TIME': "Temporal data can be used to re-identify individuals when combined with other data points, especially for rare events.",
|
||||||
|
'MEDICAL_LICENSE': "Medical information is special category data under GDPR Art. 9(1) requiring heightened protection due to health privacy concerns.",
|
||||||
|
'NRP': "Nationality, religious, or political views are special category data under GDPR Art. 9(1) that can lead to discrimination.",
|
||||||
|
'US_BANK_NUMBER': "Bank account numbers are financial identifiers that enable unauthorized access to accounts and are protected under GDPR Art. 4(1).",
|
||||||
|
'CRYPTO': "Cryptocurrency addresses are financial identifiers that can reveal transaction history and wealth, requiring protection.",
|
||||||
|
'FI_PERSONAL_ID': "Finnish personal identity numbers (HETU) are highly sensitive national identifiers under GDPR Art. 4(1) + Recital 26, granting access to government services.",
|
||||||
|
'SE_PERSONAL_ID': "Swedish Personnummer are national identifiers protected under GDPR Art. 4(1) + Recital 26, used across all government and private services.",
|
||||||
|
'NO_PERSONAL_ID': "Norwegian Fødselsnummer are national ID numbers under GDPR Art. 4(1) + Recital 26, used for all official identification.",
|
||||||
|
'DK_PERSONAL_ID': "Danish CPR numbers are national identifiers protected under GDPR Art. 4(1) + Recital 26, critical for government services.",
|
||||||
|
'FI_BUSINESS_ID': "Finnish business IDs (Y-tunnus) are organizational identifiers with lower risk than personal IDs, but still require protection for business privacy.",
|
||||||
|
}
|
||||||
|
|
||||||
|
base_explanation = explanations.get(entity_type,
|
||||||
|
f"{entity_type} detected as {risk_level} risk personal data under GDPR regulations requiring appropriate protection measures.")
|
||||||
|
|
||||||
|
strategy_note = f" Recommended action: {strategy} - this {'permanently removes' if strategy == 'REMOVE' else 'anonymizes'} the data to ensure compliance."
|
||||||
|
|
||||||
|
return base_explanation + strategy_note
|
||||||
@@ -8,6 +8,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Tuple, Optional, Any
|
from typing import Dict, List, Tuple, Optional, Any
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -375,10 +376,14 @@ class DataCleaner:
|
|||||||
) -> Dict[str, List[Dict]]:
|
) -> Dict[str, List[Dict]]:
|
||||||
"""
|
"""
|
||||||
Detect PII at column and cell level (GPU-accelerated when available)
|
Detect PII at column and cell level (GPU-accelerated when available)
|
||||||
|
With intelligent filtering for false positives
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping column names to list of detected entities
|
Dictionary mapping column names to list of detected entities
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
|
from data_cleaning.config import COLUMN_CONTEXT_FILTERS, EXCLUSION_PATTERNS
|
||||||
|
|
||||||
pii_detections = defaultdict(list)
|
pii_detections = defaultdict(list)
|
||||||
|
|
||||||
# Determine which columns to scan
|
# Determine which columns to scan
|
||||||
@@ -417,26 +422,69 @@ class DataCleaner:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
# Aggregate by entity type
|
# Aggregate by entity type with filtering
|
||||||
entity_summary = defaultdict(lambda: {'count': 0, 'scores': []})
|
entity_summary = defaultdict(lambda: {'count': 0, 'scores': [], 'filtered': 0})
|
||||||
|
filtered_reasons = []
|
||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
entity_summary[result.entity_type]['count'] += 1
|
entity_type = result.entity_type
|
||||||
entity_summary[result.entity_type]['scores'].append(result.score)
|
# Extract detected text from original string using start/end positions
|
||||||
|
detected_text = combined_text[result.start:result.end]
|
||||||
|
|
||||||
# Store detection results
|
# ✅ FILTER 1: Column Context Filtering
|
||||||
|
# Skip if entity type should be ignored based on column name
|
||||||
|
context_filtered = False
|
||||||
|
for pattern, ignored_entities in COLUMN_CONTEXT_FILTERS.items():
|
||||||
|
if re.search(pattern, column.lower()) and entity_type in ignored_entities:
|
||||||
|
context_filtered = True
|
||||||
|
entity_summary[entity_type]['filtered'] += 1
|
||||||
|
if f"column context ({pattern})" not in filtered_reasons:
|
||||||
|
filtered_reasons.append(f"column context ({pattern})")
|
||||||
|
break
|
||||||
|
|
||||||
|
if context_filtered:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ✅ FILTER 2: Value Pattern Exclusions
|
||||||
|
# Skip if detected value matches exclusion patterns
|
||||||
|
pattern_filtered = False
|
||||||
|
if entity_type in EXCLUSION_PATTERNS:
|
||||||
|
for exclusion_pattern in EXCLUSION_PATTERNS[entity_type]:
|
||||||
|
if re.match(exclusion_pattern, detected_text, re.IGNORECASE):
|
||||||
|
pattern_filtered = True
|
||||||
|
entity_summary[entity_type]['filtered'] += 1
|
||||||
|
if f"value pattern ({exclusion_pattern[:20]}...)" not in filtered_reasons:
|
||||||
|
filtered_reasons.append(f"value pattern")
|
||||||
|
break
|
||||||
|
|
||||||
|
if pattern_filtered:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ✅ Not filtered - count as valid detection
|
||||||
|
entity_summary[entity_type]['count'] += 1
|
||||||
|
entity_summary[entity_type]['scores'].append(result.score)
|
||||||
|
|
||||||
|
# Store detection results (only non-filtered)
|
||||||
|
detected_types = []
|
||||||
for entity_type, info in entity_summary.items():
|
for entity_type, info in entity_summary.items():
|
||||||
avg_confidence = np.mean(info['scores'])
|
if info['count'] > 0: # Only include if we have valid (non-filtered) detections
|
||||||
pii_detections[column].append({
|
avg_confidence = np.mean(info['scores'])
|
||||||
'entity_type': entity_type,
|
pii_detections[column].append({
|
||||||
'count': info['count'],
|
'entity_type': entity_type,
|
||||||
'avg_confidence': avg_confidence,
|
'count': info['count'],
|
||||||
'max_confidence': max(info['scores']),
|
'avg_confidence': avg_confidence,
|
||||||
'min_confidence': min(info['scores'])
|
'max_confidence': max(info['scores']),
|
||||||
})
|
'min_confidence': min(info['scores'])
|
||||||
|
})
|
||||||
|
detected_types.append(entity_type)
|
||||||
|
|
||||||
detected_types = [d['entity_type'] for d in pii_detections[column]]
|
if detected_types:
|
||||||
print(f"✓ Found: {', '.join(detected_types)}")
|
print(f"✓ Found: {', '.join(detected_types)}")
|
||||||
|
elif any(info['filtered'] > 0 for info in entity_summary.values()):
|
||||||
|
total_filtered = sum(info['filtered'] for info in entity_summary.values())
|
||||||
|
print(f"(filtered {total_filtered} false positives: {', '.join(filtered_reasons[:2])})")
|
||||||
|
else:
|
||||||
|
print("(no PII)")
|
||||||
else:
|
else:
|
||||||
print("(no PII)")
|
print("(no PII)")
|
||||||
|
|
||||||
|
|||||||
@@ -126,9 +126,63 @@ GDPR_COMPLIANCE = {
|
|||||||
# Presidio Analyzer Settings
|
# Presidio Analyzer Settings
|
||||||
PRESIDIO_CONFIG = {
|
PRESIDIO_CONFIG = {
|
||||||
'language': 'en',
|
'language': 'en',
|
||||||
'score_threshold': 0.5, # Minimum confidence to report
|
'score_threshold': 0.6, # Minimum confidence to report (raised from 0.5 to reduce false positives)
|
||||||
'entities': None, # None = detect all, or specify list like ['EMAIL_ADDRESS', 'PHONE_NUMBER']
|
'entities': None, # None = detect all, or specify list like ['EMAIL_ADDRESS', 'PHONE_NUMBER']
|
||||||
'allow_list': [], # Terms to ignore (e.g., company names that look like PII)
|
'allow_list': ['l1', 'l2', 'L1', 'L2', 'NA', 'N/A', 'null', 'none'], # Common non-PII values
|
||||||
|
}
|
||||||
|
|
||||||
|
# Column Context Filters - Ignore specific entity types based on column name patterns
|
||||||
|
# This prevents false positives when column names provide context
|
||||||
|
COLUMN_CONTEXT_FILTERS = {
|
||||||
|
# Column name pattern (regex) -> List of entity types to IGNORE in that column
|
||||||
|
r'.*credit.*': ['US_DRIVER_LICENSE', 'US_PASSPORT', 'PERSON'],
|
||||||
|
r'.*rating.*': ['US_DRIVER_LICENSE', 'US_PASSPORT'],
|
||||||
|
r'.*level.*': ['US_DRIVER_LICENSE', 'US_PASSPORT'],
|
||||||
|
r'.*score.*': ['US_DRIVER_LICENSE', 'US_PASSPORT', 'PERSON'],
|
||||||
|
r'.*category.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*status.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*type.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*grade.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*class.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*rank.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*tier.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*segment.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*group.*': ['US_DRIVER_LICENSE', 'PERSON'],
|
||||||
|
r'.*code.*': ['PERSON'], # Codes are rarely names
|
||||||
|
r'.*id$': ['PERSON'], # IDs ending in 'id' are rarely names
|
||||||
|
r'.*_id$': ['PERSON'], # Same for underscore_id
|
||||||
|
}
|
||||||
|
|
||||||
|
# Value Pattern Exclusions - Ignore values matching these patterns for specific entity types
|
||||||
|
# This catches false positives based on the actual detected value format
|
||||||
|
EXCLUSION_PATTERNS = {
|
||||||
|
'US_DRIVER_LICENSE': [
|
||||||
|
r'^[a-zA-Z]\d{1,2}$', # Single letter + 1-2 digits (e.g., l1, l2, A1, B12)
|
||||||
|
r'^[a-zA-Z]{1,2}$', # 1-2 letters only (e.g., A, AB)
|
||||||
|
r'^level\s*\d+$', # "level 1", "level 2", etc.
|
||||||
|
r'^tier\s*\d+$', # "tier 1", "tier 2", etc.
|
||||||
|
r'^grade\s*[a-zA-Z]$', # "grade A", "grade B", etc.
|
||||||
|
],
|
||||||
|
'US_PASSPORT': [
|
||||||
|
r'^[a-zA-Z]\d{1,2}$', # Single letter + 1-2 digits
|
||||||
|
r'^[a-zA-Z]{1,2}$', # 1-2 letters only
|
||||||
|
],
|
||||||
|
'PERSON': [
|
||||||
|
r'^(admin|user|guest|system|default|test|demo)$', # Generic usernames
|
||||||
|
r'^[a-zA-Z]\d*$', # Single letter with optional numbers (A, A1, B2)
|
||||||
|
r'^(yes|no|true|false|y|n|t|f)$', # Boolean values
|
||||||
|
r'^(male|female|m|f|other)$', # Gender categories
|
||||||
|
r'^(low|medium|high|good|bad|excellent|poor)$', # Rating values
|
||||||
|
],
|
||||||
|
'EMAIL_ADDRESS': [
|
||||||
|
r'^(test|demo|example|sample)@', # Test emails
|
||||||
|
r'@(test|demo|example|sample)\.', # Test domains
|
||||||
|
],
|
||||||
|
'PHONE_NUMBER': [
|
||||||
|
r'^(000|111|222|333|444|555|666|777|888|999)[-\s]', # Fake phone patterns
|
||||||
|
r'^1{6,}$', # All 1s
|
||||||
|
r'^0{6,}$', # All 0s
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Custom Recognizers (domain-specific patterns)
|
# Custom Recognizers (domain-specific patterns)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
import { TryTab } from "./Sidebar";
|
import { TryTab } from "./Sidebar";
|
||||||
import { useState, useRef, useCallback, useEffect } from "react";
|
import { useState, useRef, useCallback, useEffect } from "react";
|
||||||
import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb";
|
import { saveLatestUpload, getLatestUpload, deleteLatestUpload } from "../../lib/indexeddb";
|
||||||
import { analyzeDataset, cleanDataset, getReportUrl, type AnalyzeResponse, type CleanResponse } from "../../lib/api";
|
import { analyzeDataset, cleanDataset, detectPII, getReportUrl, type AnalyzeResponse, type CleanResponse, type DetectPIIResponse } from "../../lib/api";
|
||||||
|
|
||||||
interface CenterPanelProps {
|
interface CenterPanelProps {
|
||||||
tab: TryTab;
|
tab: TryTab;
|
||||||
@@ -38,6 +38,7 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
// Analysis results
|
// Analysis results
|
||||||
const [analyzeResult, setAnalyzeResult] = useState<AnalyzeResponse | null>(null);
|
const [analyzeResult, setAnalyzeResult] = useState<AnalyzeResponse | null>(null);
|
||||||
const [cleanResult, setCleanResult] = useState<CleanResponse | null>(null);
|
const [cleanResult, setCleanResult] = useState<CleanResponse | null>(null);
|
||||||
|
const [piiDetectionResult, setPIIDetectionResult] = useState<DetectPIIResponse | null>(null);
|
||||||
|
|
||||||
const reset = () => {
|
const reset = () => {
|
||||||
setFileMeta(null);
|
setFileMeta(null);
|
||||||
@@ -46,6 +47,7 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
setProgressLabel("Processing");
|
setProgressLabel("Processing");
|
||||||
setTablePreview(null);
|
setTablePreview(null);
|
||||||
setError(null);
|
setError(null);
|
||||||
|
setPIIDetectionResult(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle API calls
|
// Handle API calls
|
||||||
@@ -71,6 +73,27 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleDetectPII = async () => {
|
||||||
|
if (!uploadedFile) {
|
||||||
|
setError("No file uploaded");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsProcessing(true);
|
||||||
|
setError(null);
|
||||||
|
setProgressLabel("Detecting PII...");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await detectPII(uploadedFile);
|
||||||
|
setPIIDetectionResult(result);
|
||||||
|
setProgressLabel("PII detection complete!");
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(err.message || "PII detection failed");
|
||||||
|
} finally {
|
||||||
|
setIsProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleClean = async () => {
|
const handleClean = async () => {
|
||||||
if (!uploadedFile) {
|
if (!uploadedFile) {
|
||||||
setError("No file uploaded");
|
setError("No file uploaded");
|
||||||
@@ -380,6 +403,18 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{piiDetectionResult && (
|
||||||
|
<div className="mt-3 p-3 bg-blue-50 border border-blue-200 rounded-md text-sm text-blue-700">
|
||||||
|
🔍 PII Detection complete! Found {piiDetectionResult.summary.risky_columns_found} risky columns in {piiDetectionResult.file_type.toUpperCase()} file.
|
||||||
|
<div className="mt-1 text-xs">
|
||||||
|
<span className="font-semibold text-red-700">{piiDetectionResult.summary.high_risk_count} HIGH</span> •
|
||||||
|
<span className="font-semibold text-orange-600 ml-1">{piiDetectionResult.summary.medium_risk_count} MEDIUM</span> •
|
||||||
|
<span className="font-semibold text-yellow-600 ml-1">{piiDetectionResult.summary.low_risk_count} LOW</span>
|
||||||
|
</div>
|
||||||
|
<p className="mt-2 text-xs">Review detected risks in the "Bias & Risk Mitigation" tab to choose anonymization strategies.</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{analyzeResult && (
|
{analyzeResult && (
|
||||||
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
<div className="mt-3 p-3 bg-green-50 border border-green-200 rounded-md text-sm text-green-700">
|
||||||
✅ Analysis complete! View results in tabs.
|
✅ Analysis complete! View results in tabs.
|
||||||
@@ -426,6 +461,7 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
setLoadedFromCache(false);
|
setLoadedFromCache(false);
|
||||||
setAnalyzeResult(null);
|
setAnalyzeResult(null);
|
||||||
setCleanResult(null);
|
setCleanResult(null);
|
||||||
|
setPIIDetectionResult(null);
|
||||||
}}
|
}}
|
||||||
className="text-xs rounded-md border px-3 py-1.5 hover:bg-slate-50"
|
className="text-xs rounded-md border px-3 py-1.5 hover:bg-slate-50"
|
||||||
>
|
>
|
||||||
@@ -433,11 +469,11 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={handleClean}
|
onClick={handleDetectPII}
|
||||||
disabled={isProcessing}
|
disabled={isProcessing}
|
||||||
className="text-xs rounded-md bg-green-600 text-white px-3 py-1.5 hover:bg-green-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
className="text-xs rounded-md bg-blue-600 text-white px-3 py-1.5 hover:bg-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
>
|
>
|
||||||
{isProcessing ? "Processing..." : "Clean (PII)"}
|
{isProcessing ? "Processing..." : "🔍 Detect PII"}
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
@@ -445,7 +481,7 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
disabled={isProcessing}
|
disabled={isProcessing}
|
||||||
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
className="text-xs rounded-md bg-brand-600 text-white px-3 py-1.5 hover:bg-brand-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
>
|
>
|
||||||
{isProcessing ? "Processing..." : "Analyze"}
|
{isProcessing ? "Processing..." : "⚡ Analyze"}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1100,20 +1136,190 @@ export function CenterPanel({ tab, onAnalyze }: CenterPanelProps) {
|
|||||||
);
|
);
|
||||||
case "bias-risk-mitigation":
|
case "bias-risk-mitigation":
|
||||||
return (
|
return (
|
||||||
<div className="space-y-4">
|
<div className="space-y-6">
|
||||||
<h2 className="text-xl font-semibold">Mitigation Suggestions</h2>
|
<div>
|
||||||
{analyzeResult && analyzeResult.recommendations.length > 0 ? (
|
<h2 className="text-2xl font-bold mb-2">PII Detection & Anonymization Strategy</h2>
|
||||||
<div className="space-y-2">
|
<p className="text-sm text-slate-600">Review detected risky features and choose how to anonymize them</p>
|
||||||
{analyzeResult.recommendations.map((rec, i) => (
|
</div>
|
||||||
<div key={i} className="p-3 bg-blue-50 border border-blue-200 rounded-md text-sm">
|
|
||||||
{rec}
|
{piiDetectionResult ? (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* File Info Banner */}
|
||||||
|
<div className="p-3 bg-slate-100 border border-slate-300 rounded-lg text-sm">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="font-semibold text-slate-700">File:</span>
|
||||||
|
<code className="px-2 py-1 bg-white rounded border border-slate-200">{piiDetectionResult.filename}</code>
|
||||||
|
<span className="px-2 py-0.5 bg-blue-100 text-blue-800 text-xs font-semibold rounded">
|
||||||
|
{piiDetectionResult.file_type.toUpperCase()}
|
||||||
|
</span>
|
||||||
|
<span className="text-slate-600">
|
||||||
|
{piiDetectionResult.dataset_info.rows} rows × {piiDetectionResult.dataset_info.columns} columns
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
))}
|
</div>
|
||||||
|
|
||||||
|
{/* Summary Card */}
|
||||||
|
<div className="p-6 bg-gradient-to-br from-blue-50 to-indigo-50 rounded-xl border-2 border-blue-200">
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-semibold text-blue-700 mb-1">TOTAL COLUMNS SCANNED</div>
|
||||||
|
<div className="text-3xl font-bold text-blue-900">{piiDetectionResult.summary.total_columns_scanned}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-semibold text-red-700 mb-1">HIGH RISK</div>
|
||||||
|
<div className="text-3xl font-bold text-red-900">{piiDetectionResult.summary.high_risk_count}</div>
|
||||||
|
<div className="text-xs text-slate-600">Must remove</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-semibold text-orange-700 mb-1">MEDIUM RISK</div>
|
||||||
|
<div className="text-3xl font-bold text-orange-900">{piiDetectionResult.summary.medium_risk_count}</div>
|
||||||
|
<div className="text-xs text-slate-600">Hash recommended</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-semibold text-yellow-700 mb-1">LOW RISK</div>
|
||||||
|
<div className="text-3xl font-bold text-yellow-900">{piiDetectionResult.summary.low_risk_count}</div>
|
||||||
|
<div className="text-xs text-slate-600">Mask/generalize</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="mt-4 p-3 bg-white/70 rounded-lg text-sm text-slate-700">
|
||||||
|
{piiDetectionResult.message}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Risky Features List */}
|
||||||
|
<div className="space-y-3">
|
||||||
|
{piiDetectionResult.risky_features.map((feature, idx) => {
|
||||||
|
const riskColor =
|
||||||
|
feature.risk_level === 'HIGH' ? 'red' :
|
||||||
|
feature.risk_level === 'MEDIUM' ? 'orange' :
|
||||||
|
feature.risk_level === 'LOW' ? 'yellow' : 'gray';
|
||||||
|
|
||||||
|
const bgColor =
|
||||||
|
feature.risk_level === 'HIGH' ? 'bg-red-50 border-red-300' :
|
||||||
|
feature.risk_level === 'MEDIUM' ? 'bg-orange-50 border-orange-300' :
|
||||||
|
feature.risk_level === 'LOW' ? 'bg-yellow-50 border-yellow-300' : 'bg-gray-50 border-gray-300';
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={idx} className={`p-5 rounded-xl border-2 ${bgColor}`}>
|
||||||
|
{/* Header */}
|
||||||
|
<div className="flex items-start justify-between mb-3">
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="flex items-center gap-3 mb-2">
|
||||||
|
<span className={`px-3 py-1 bg-${riskColor}-600 text-white text-xs font-bold rounded-full`}>
|
||||||
|
{feature.risk_level} RISK
|
||||||
|
</span>
|
||||||
|
<span className="font-mono font-bold text-lg text-slate-800">{feature.column}</span>
|
||||||
|
</div>
|
||||||
|
<div className="text-sm text-slate-700">
|
||||||
|
<span className="font-semibold">Detected:</span> {feature.entity_type}
|
||||||
|
<span className="mx-2">•</span>
|
||||||
|
<span className="font-semibold">Confidence:</span> {(feature.confidence * 100).toFixed(1)}%
|
||||||
|
<span className="mx-2">•</span>
|
||||||
|
<span className="font-semibold">Occurrences:</span> {feature.detection_count}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Explanation */}
|
||||||
|
<div className="p-4 bg-white rounded-lg mb-4">
|
||||||
|
<div className="text-xs font-semibold text-slate-600 mb-2">WHY IS THIS RISKY?</div>
|
||||||
|
<p className="text-sm text-slate-700 leading-relaxed">{feature.explanation}</p>
|
||||||
|
<div className="mt-3 text-xs text-slate-600">
|
||||||
|
<strong>GDPR Reference:</strong> {feature.gdpr_article}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Sample Values */}
|
||||||
|
{feature.sample_values.length > 0 && (
|
||||||
|
<div className="p-4 bg-white rounded-lg mb-4">
|
||||||
|
<div className="text-xs font-semibold text-slate-600 mb-2">SAMPLE VALUES</div>
|
||||||
|
<div className="flex gap-2 flex-wrap">
|
||||||
|
{feature.sample_values.map((val, i) => (
|
||||||
|
<code key={i} className="px-2 py-1 bg-slate-100 rounded text-xs text-slate-800 border border-slate-200">
|
||||||
|
{val}
|
||||||
|
</code>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Recommended Strategy */}
|
||||||
|
<div className="p-4 bg-white rounded-lg border-2 border-green-300">
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="text-xs font-semibold text-green-700 mb-1">✓ RECOMMENDED STRATEGY</div>
|
||||||
|
<div className="font-bold text-lg text-slate-900">{feature.recommended_strategy}</div>
|
||||||
|
<div className="text-sm text-slate-700 mt-1">{feature.strategy_description}</div>
|
||||||
|
<div className="mt-2 flex gap-4 text-xs text-slate-600">
|
||||||
|
<div>
|
||||||
|
<strong>Reversible:</strong> {feature.reversible ? 'Yes' : 'No'}
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<strong>Use Cases:</strong> {feature.use_cases.join(', ')}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
className="px-4 py-2 bg-green-600 text-white text-sm font-semibold rounded-lg hover:bg-green-500"
|
||||||
|
onClick={() => alert(`Apply ${feature.recommended_strategy} to ${feature.column}`)}
|
||||||
|
>
|
||||||
|
Apply
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Alternative Strategies */}
|
||||||
|
<details className="mt-3">
|
||||||
|
<summary className="text-xs font-semibold text-slate-600 cursor-pointer hover:text-slate-800">
|
||||||
|
View Alternative Strategies
|
||||||
|
</summary>
|
||||||
|
<div className="mt-2 grid grid-cols-1 md:grid-cols-2 gap-2">
|
||||||
|
{Object.entries(piiDetectionResult.available_strategies)
|
||||||
|
.filter(([strategy]) => strategy !== feature.recommended_strategy)
|
||||||
|
.map(([strategy, details]: [string, any]) => (
|
||||||
|
<div key={strategy} className="p-3 bg-white rounded border border-slate-200 hover:border-slate-400">
|
||||||
|
<div className="font-semibold text-sm text-slate-800">{strategy}</div>
|
||||||
|
<div className="text-xs text-slate-600 mt-1">{details.description}</div>
|
||||||
|
<div className="mt-2 flex items-center justify-between">
|
||||||
|
<span className={`px-2 py-0.5 text-xs rounded ${
|
||||||
|
details.risk_level === 'HIGH' ? 'bg-red-100 text-red-800' :
|
||||||
|
details.risk_level === 'MEDIUM' ? 'bg-orange-100 text-orange-800' :
|
||||||
|
'bg-yellow-100 text-yellow-800'
|
||||||
|
}`}>
|
||||||
|
{details.risk_level} Risk
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
className="px-2 py-1 bg-blue-600 text-white text-xs rounded hover:bg-blue-500"
|
||||||
|
onClick={() => alert(`Apply ${strategy} to ${feature.column}`)}
|
||||||
|
>
|
||||||
|
Use This
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Apply All Button */}
|
||||||
|
<div className="sticky bottom-0 p-4 bg-gradient-to-t from-white via-white to-transparent">
|
||||||
|
<button
|
||||||
|
className="w-full py-3 bg-green-600 text-white font-bold rounded-lg hover:bg-green-500 shadow-lg"
|
||||||
|
onClick={() => alert('Apply all recommended strategies and clean dataset')}
|
||||||
|
>
|
||||||
|
✓ Apply All Recommended Strategies & Clean Dataset
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<p className="text-sm text-slate-600">
|
<div className="text-center py-12">
|
||||||
Recommendations will appear here after analysis.
|
<div className="text-6xl mb-4">🔍</div>
|
||||||
</p>
|
<p className="text-slate-600 mb-2">No PII detection results yet</p>
|
||||||
|
<p className="text-sm text-slate-500">Upload a dataset and click "🔍 Detect PII" to scan for risky features</p>
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -74,6 +74,46 @@ export interface CleanResponse {
|
|||||||
timestamp: string;
|
timestamp: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface DetectPIIResponse {
|
||||||
|
status: string;
|
||||||
|
filename: string;
|
||||||
|
file_type: 'csv' | 'json' | 'text';
|
||||||
|
dataset_info: {
|
||||||
|
rows: number;
|
||||||
|
columns: number;
|
||||||
|
column_names: string[];
|
||||||
|
};
|
||||||
|
summary: {
|
||||||
|
total_columns_scanned: number;
|
||||||
|
risky_columns_found: number;
|
||||||
|
high_risk_count: number;
|
||||||
|
medium_risk_count: number;
|
||||||
|
low_risk_count: number;
|
||||||
|
unique_entity_types: number;
|
||||||
|
};
|
||||||
|
risky_features: Array<{
|
||||||
|
column: string;
|
||||||
|
entity_type: string;
|
||||||
|
risk_level: 'HIGH' | 'MEDIUM' | 'LOW' | 'UNKNOWN';
|
||||||
|
confidence: number;
|
||||||
|
detection_count: number;
|
||||||
|
recommended_strategy: string;
|
||||||
|
strategy_description: string;
|
||||||
|
reversible: boolean;
|
||||||
|
use_cases: string[];
|
||||||
|
gdpr_article: string;
|
||||||
|
sample_values: string[];
|
||||||
|
explanation: string;
|
||||||
|
}>;
|
||||||
|
available_strategies: Record<string, {
|
||||||
|
description: string;
|
||||||
|
risk_level: string;
|
||||||
|
reversible: boolean;
|
||||||
|
use_cases: string[];
|
||||||
|
}>;
|
||||||
|
message: string;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyze dataset for bias and risk
|
* Analyze dataset for bias and risk
|
||||||
*/
|
*/
|
||||||
@@ -114,6 +154,26 @@ export async function cleanDataset(file: File): Promise<CleanResponse> {
|
|||||||
return response.json();
|
return response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect PII (without anonymizing) for user review
|
||||||
|
*/
|
||||||
|
export async function detectPII(file: File): Promise<DetectPIIResponse> {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
|
||||||
|
const response = await fetch(`${API_BASE_URL}/api/detect-pii`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || 'PII detection failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download report file
|
* Download report file
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ python-multipart>=0.0.6
|
|||||||
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
|
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
|
||||||
|
|
||||||
# Chatbot (WIP - not exposed in API yet)
|
# Chatbot (WIP - not exposed in API yet)
|
||||||
gpt4all>=2.0.0annotated-doc==0.0.3
|
gpt4all>=2.0.0
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
anyio==4.11.0
|
anyio==4.11.0
|
||||||
blis==1.3.0
|
blis==1.3.0
|
||||||
|
|||||||
Reference in New Issue
Block a user