feat: Add JSON explainability reports with Nordic PII detection

- Convert reports from text to structured JSON format
- Add simple and detailed explainability report types
- Implement automatic Nordic-specific entity detection (FI, SE, NO, DK)
- Include Nordic regulatory compliance information (Finnish DPA, Swedish IMY, etc.)
- Add custom JSON serialization for numpy types
- Update test suite with Nordic PII test cases
- Enhanced cleaning_config with Nordic entities (FI_PERSONAL_ID, SE_PERSONAL_ID, NO_PERSONAL_ID, DK_PERSONAL_ID, FI_KELA_ID, FI_BUSINESS_ID)
This commit is contained in:
2025-11-07 09:56:13 +05:30
parent 927b919518
commit 59d46b659f
3 changed files with 715 additions and 20 deletions

View File

@@ -55,6 +55,13 @@ ENTITY_STRATEGY_MAP = {
'SG_NRIC_FIN': 'REMOVE',
'IN_PAN': 'REMOVE',
# Nordic National IDs - HIGH RISK (CRITICAL)
'FI_PERSONAL_ID': 'REMOVE', # Finnish Henkilötunnus (HETU)
'SE_PERSONAL_ID': 'REMOVE', # Swedish Personnummer
'NO_PERSONAL_ID': 'REMOVE', # Norwegian Fødselsnummer
'DK_PERSONAL_ID': 'REMOVE', # Danish CPR-nummer
'FI_KELA_ID': 'REMOVE', # Finnish social security (Kela)
# Health Information - HIGH RISK (GDPR Art. 9)
'MEDICAL_LICENSE': 'REMOVE',
@@ -67,6 +74,9 @@ ENTITY_STRATEGY_MAP = {
'PERSON': 'HASH', # Names
'IP_ADDRESS': 'HASH',
# Nordic Business Identifiers - MEDIUM RISK
'FI_BUSINESS_ID': 'HASH', # Finnish Y-tunnus (less sensitive than personal IDs)
# Geographic Information - LOW RISK
'LOCATION': 'MASK',
'US_ZIP_CODE': 'GENERALIZE',
@@ -103,6 +113,14 @@ GDPR_COMPLIANCE = {
'MEDICAL_LICENSE': 'Art. 9(1) - Special category data (health)',
'NRP': 'Art. 9(1) - Special category data (political/religious views)',
'DATE_TIME': 'Art. 4(1) - Personal data (temporal information)',
# Nordic National IDs
'FI_PERSONAL_ID': 'Art. 4(1) - Personal data identifier + Recital 26',
'SE_PERSONAL_ID': 'Art. 4(1) - Personal data identifier + Recital 26',
'NO_PERSONAL_ID': 'Art. 4(1) - Personal data identifier + Recital 26',
'DK_PERSONAL_ID': 'Art. 4(1) - Personal data identifier + Recital 26',
'FI_KELA_ID': 'Art. 9(1) - Special category (health/social security)',
'FI_BUSINESS_ID': 'Art. 4(1) - Organizational identifier (lower risk)',
}
# Presidio Analyzer Settings