monorepo/cloud/maplepress-backend/pkg/logger/sanitizer.go

231 lines
6.5 KiB
Go

package logger
import (
"crypto/sha256"
"encoding/hex"
"regexp"
"strings"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
// SensitiveFieldRedactor provides methods to redact sensitive data before logging
// This addresses CWE-532 (Insertion of Sensitive Information into Log File)
type SensitiveFieldRedactor struct {
emailRegex *regexp.Regexp
}
// NewSensitiveFieldRedactor creates a new redactor for sensitive data
func NewSensitiveFieldRedactor() *SensitiveFieldRedactor {
return &SensitiveFieldRedactor{
emailRegex: regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`),
}
}
// RedactEmail redacts an email address for logging
// Example: "john.doe@example.com" -> "jo***@example.com"
func (r *SensitiveFieldRedactor) RedactEmail(email string) string {
if email == "" {
return "[empty]"
}
// Validate email format
if !r.emailRegex.MatchString(email) {
return "[invalid-email]"
}
parts := strings.Split(email, "@")
if len(parts) != 2 {
return "[invalid-email]"
}
localPart := parts[0]
domain := parts[1]
// Show first 2 characters of local part, redact the rest
if len(localPart) <= 2 {
return "**@" + domain
}
return localPart[:2] + "***@" + domain
}
// HashForLogging creates a consistent hash for unique identification without exposing the original value
// This allows correlation across log entries without storing PII
// Example: "john.doe@example.com" -> "a1b2c3d4"
func (r *SensitiveFieldRedactor) HashForLogging(value string) string {
if value == "" {
return "[empty]"
}
h := sha256.Sum256([]byte(value))
// Return first 8 bytes (16 hex characters) for reasonable uniqueness
return hex.EncodeToString(h[:8])
}
// RedactTenantSlug redacts a tenant slug for logging
// Example: "my-company" -> "my-***"
func (r *SensitiveFieldRedactor) RedactTenantSlug(slug string) string {
if slug == "" {
return "[empty]"
}
if len(slug) <= 3 {
return "***"
}
return slug[:2] + "***"
}
// RedactAPIKey redacts an API key for logging
// Shows only prefix and last 4 characters
// Example: "live_sk_abc123def456ghi789" -> "live_sk_***i789"
func (r *SensitiveFieldRedactor) RedactAPIKey(apiKey string) string {
if apiKey == "" {
return "[empty]"
}
// Show prefix (live_sk_ or test_sk_) and last 4 characters
if strings.HasPrefix(apiKey, "live_sk_") || strings.HasPrefix(apiKey, "test_sk_") {
prefix := apiKey[:8] // "live_sk_" or "test_sk_"
if len(apiKey) > 12 {
return prefix + "***" + apiKey[len(apiKey)-4:]
}
return prefix + "***"
}
// For other formats, just show last 4 characters
if len(apiKey) > 4 {
return "***" + apiKey[len(apiKey)-4:]
}
return "***"
}
// RedactJWTToken redacts a JWT token for logging
// Shows only first and last 8 characters
func (r *SensitiveFieldRedactor) RedactJWTToken(token string) string {
if token == "" {
return "[empty]"
}
if len(token) < 16 {
return "***"
}
return token[:8] + "..." + token[len(token)-8:]
}
// RedactIPAddress partially redacts an IP address
// IPv4: "192.168.1.100" -> "192.168.*.*"
// IPv6: Redacts last 4 groups
func (r *SensitiveFieldRedactor) RedactIPAddress(ip string) string {
if ip == "" {
return "[empty]"
}
// IPv4
if strings.Contains(ip, ".") {
parts := strings.Split(ip, ".")
if len(parts) == 4 {
return parts[0] + "." + parts[1] + ".*.*"
}
}
// IPv6
if strings.Contains(ip, ":") {
parts := strings.Split(ip, ":")
if len(parts) >= 4 {
return strings.Join(parts[:4], ":") + ":****"
}
}
return "***"
}
// Zap Field Helpers - Provide convenient zap.Field constructors
// SafeEmail creates a zap field with redacted email
func SafeEmail(key string, email string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String(key, redactor.RedactEmail(email))
}
// EmailHash creates a zap field with hashed email for correlation
func EmailHash(email string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String("email_hash", redactor.HashForLogging(email))
}
// HashString hashes a string value for safe logging
// Returns the hash string directly (not a zap.Field)
func HashString(value string) string {
redactor := NewSensitiveFieldRedactor()
return redactor.HashForLogging(value)
}
// SafeTenantSlug creates a zap field with redacted tenant slug
func SafeTenantSlug(key string, slug string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String(key, redactor.RedactTenantSlug(slug))
}
// TenantSlugHash creates a zap field with hashed tenant slug for correlation
func TenantSlugHash(slug string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String("tenant_slug_hash", redactor.HashForLogging(slug))
}
// SafeAPIKey creates a zap field with redacted API key
func SafeAPIKey(key string, apiKey string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String(key, redactor.RedactAPIKey(apiKey))
}
// SafeJWTToken creates a zap field with redacted JWT token
func SafeJWTToken(key string, token string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String(key, redactor.RedactJWTToken(token))
}
// SafeIPAddress creates a zap field with redacted IP address
func SafeIPAddress(key string, ip string) zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return zap.String(key, redactor.RedactIPAddress(ip))
}
// UserIdentifier creates safe identification fields for a user
// Includes: user_id (safe), email_hash, email_redacted
func UserIdentifier(userID string, email string) []zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return []zapcore.Field{
zap.String("user_id", userID),
zap.String("email_hash", redactor.HashForLogging(email)),
zap.String("email_redacted", redactor.RedactEmail(email)),
}
}
// TenantIdentifier creates safe identification fields for a tenant
// Includes: tenant_id (safe), slug_hash, slug_redacted
func TenantIdentifier(tenantID string, slug string) []zapcore.Field {
redactor := NewSensitiveFieldRedactor()
return []zapcore.Field{
zap.String("tenant_id", tenantID),
zap.String("tenant_slug_hash", redactor.HashForLogging(slug)),
zap.String("tenant_slug_redacted", redactor.RedactTenantSlug(slug)),
}
}
// Constants for field names
const (
FieldUserID = "user_id"
FieldEmailHash = "email_hash"
FieldEmailRedacted = "email_redacted"
FieldTenantID = "tenant_id"
FieldTenantSlugHash = "tenant_slug_hash"
FieldTenantSlugRedacted = "tenant_slug_redacted"
FieldAPIKeyRedacted = "api_key_redacted"
FieldJWTTokenRedacted = "jwt_token_redacted"
FieldIPAddressRedacted = "ip_address_redacted"
)