Core Issues
Privacy and Safety
Protect user privacy and build safer AI systems with technical safeguards.
Privacy in AI
AI systems process vast amounts of personal data, creating privacy risks:
Key Privacy Concerns
- Training data: Personal data used without consent
- Inference attacks: Extracting private info from model outputs
- Data minimization: Collecting more data than needed
- Re-identification: Combining data to identify individuals
Privacy-Preserving Techniques
- Data anonymization: Remove identifying information
- Differential privacy: Add statistical noise to protect individuals
- Federated learning: Train without centralizing data
- Synthetic data: Generate fake but realistic training data
AI Safety
Safety covers preventing AI systems from causing harm:
Alignment
AI systems doing what we actually want, not just what we said.
Robustness
Performing safely under distribution shift, adversarial attacks, and edge cases.
Content Safety
Preventing generation of harmful, illegal, or inappropriate content.
Example
python
import hashlib
import re
from typing import Optional
# Data anonymization utilities
class DataAnonymizer:
"""Tools for protecting personal information in AI pipelines"""
@staticmethod
def hash_identifier(value: str, salt: str = "secret_salt") -> str:
"""One-way hash for IDs — can't reverse, but consistent"""
return hashlib.sha256(f"{salt}{value}".encode()).hexdigest()[:16]
@staticmethod
def mask_email(email: str) -> str:
"""Mask email: j***@example.com"""
parts = email.split("@")
if len(parts) != 2:
return "***@***.***"
local = parts[0]
masked = local[0] + "***" if len(local) > 1 else "***"
return f"{masked}@{parts[1]}"
@staticmethod
def redact_pii(text: str) -> str:
"""Remove common PII from text"""
# Email
text = re.sub(r'[w.-]+@[w.-]+.w+', '[EMAIL]', text)
# Phone (US)
text = re.sub(r'(+1[-.s]?)?(?d{3})?[-.s]?d{3}[-.s]?d{4}', '[PHONE]', text)
# SSN
text = re.sub(r'd{3}-d{2}-d{4}', '[SSN]', text)
# Credit card (basic)
text = re.sub(r'd{4}[-s]?d{4}[-s]?d{4}[-s]?d{4}', '[CARD]', text)
return text
@staticmethod
def k_anonymize(records: list[dict], quasi_identifiers: list[str], k: int = 5) -> list[dict]:
"""
Check if dataset satisfies k-anonymity.
Each combination of quasi-identifiers appears at least k times.
"""
from collections import Counter
combos = Counter(
tuple(r.get(qi) for qi in quasi_identifiers)
for r in records
)
violations = [combo for combo, count in combos.items() if count < k]
if violations:
print(f"k-anonymity violation: {len(violations)} unique combinations found")
else:
print(f"Dataset satisfies {k}-anonymity")
return records
# Content safety check
class ContentSafetyFilter:
"""Basic content safety filtering for AI outputs"""
HARMFUL_PATTERNS = [
r'(password|secret|api.key)s*[:=]s*S+', # credentials
r'd{3}-d{2}-d{4}', # SSN pattern
]
@classmethod
def is_safe(cls, text: str) -> tuple[bool, list[str]]:
"""Check if text contains harmful patterns"""
issues = []
for pattern in cls.HARMFUL_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
issues.append(f"Pattern match: {pattern}")
return len(issues) == 0, issues
# Usage examples
anon = DataAnonymizer()
print(anon.mask_email("john.doe@company.com"))
print(anon.redact_pii("Call me at 555-123-4567 or email user@example.com"))
print(anon.hash_identifier("user_12345"))
safe, issues = ContentSafetyFilter.is_safe("My SSN is 123-45-6789")
print(f"Safe: {safe}, Issues: {issues}")Try it yourself — PYTHON