monorepo/cloud/maplepress-backend/pkg/validation/email.go

275 lines
7.5 KiB
Go

// File Path: monorepo/cloud/maplepress-backend/pkg/validation/email.go
package validation
import (
"fmt"
"strings"
)
// EmailValidator provides comprehensive email validation and normalization
// CWE-20: Improper Input Validation - Ensures email addresses are properly validated
type EmailValidator struct {
validator *Validator
}
// NewEmailValidator creates a new email validator
func NewEmailValidator() *EmailValidator {
return &EmailValidator{
validator: NewValidator(),
}
}
// ValidateAndNormalize validates and normalizes an email address
// Returns the normalized email and any validation error
func (ev *EmailValidator) ValidateAndNormalize(email, fieldName string) (string, error) {
// Step 1: Basic validation using existing validator
if err := ev.validator.ValidateEmail(email, fieldName); err != nil {
return "", err
}
// Step 2: Normalize the email
normalized := ev.Normalize(email)
// Step 3: Additional security checks
if err := ev.ValidateSecurityConstraints(normalized, fieldName); err != nil {
return "", err
}
return normalized, nil
}
// Normalize normalizes an email address for consistent storage and comparison
// CWE-180: Incorrect Behavior Order: Validate Before Canonicalize
func (ev *EmailValidator) Normalize(email string) string {
// Trim whitespace
email = strings.TrimSpace(email)
// Convert to lowercase (email local parts are case-sensitive per RFC 5321,
// but most providers treat them as case-insensitive for better UX)
email = strings.ToLower(email)
// Remove any null bytes
email = strings.ReplaceAll(email, "\x00", "")
// Gmail-specific normalization (optional - commented out by default)
// This removes dots and plus-aliases from Gmail addresses
// Uncomment if you want to prevent abuse via Gmail aliases
// email = ev.normalizeGmail(email)
return email
}
// ValidateSecurityConstraints performs additional security validation
func (ev *EmailValidator) ValidateSecurityConstraints(email, fieldName string) error {
// Check for suspicious patterns
// 1. Detect emails with excessive special characters (potential obfuscation)
specialCharCount := 0
for _, ch := range email {
if ch == '+' || ch == '.' || ch == '_' || ch == '-' || ch == '%' {
specialCharCount++
}
}
if specialCharCount > 10 {
return fmt.Errorf("%s: contains too many special characters", fieldName)
}
// 2. Detect potentially disposable email patterns
if ev.isLikelyDisposable(email) {
// Note: This is a warning-level check. In production, you might want to
// either reject these or flag them for review.
// For now, we'll allow them but this can be configured.
}
// 3. Check for common typos in popular domains
if typo := ev.detectCommonDomainTypo(email); typo != "" {
return fmt.Errorf("%s: possible typo detected, did you mean %s?", fieldName, typo)
}
// 4. Prevent IP-based email addresses
if ev.hasIPAddress(email) {
return fmt.Errorf("%s: IP-based email addresses are not allowed", fieldName)
}
return nil
}
// isLikelyDisposable checks if email is from a known disposable email provider
// This is a basic implementation - in production, use a service like:
// - https://github.com/disposable/disposable-email-domains
// - or an API service
func (ev *EmailValidator) isLikelyDisposable(email string) bool {
// Extract domain
parts := strings.Split(email, "@")
if len(parts) != 2 {
return false
}
domain := strings.ToLower(parts[1])
// Common disposable email patterns
disposablePatterns := []string{
"temp",
"disposable",
"throwaway",
"guerrilla",
"mailinator",
"10minute",
"trashmail",
"yopmail",
"fakeinbox",
}
for _, pattern := range disposablePatterns {
if strings.Contains(domain, pattern) {
return true
}
}
// Known disposable domains (small sample - expand as needed)
disposableDomains := map[string]bool{
"mailinator.com": true,
"guerrillamail.com": true,
"10minutemail.com": true,
"tempmailaddress.com": true,
"yopmail.com": true,
"fakeinbox.com": true,
"trashmail.com": true,
"throwaway.email": true,
}
return disposableDomains[domain]
}
// detectCommonDomainTypo checks for common typos in popular email domains
func (ev *EmailValidator) detectCommonDomainTypo(email string) string {
parts := strings.Split(email, "@")
if len(parts) != 2 {
return ""
}
localPart := parts[0]
domain := strings.ToLower(parts[1])
// Common typos map: typo -> correct
typos := map[string]string{
"gmial.com": "gmail.com",
"gmai.com": "gmail.com",
"gmil.com": "gmail.com",
"yahooo.com": "yahoo.com",
"yaho.com": "yahoo.com",
"hotmial.com": "hotmail.com",
"hotmal.com": "hotmail.com",
"outlok.com": "outlook.com",
"outloo.com": "outlook.com",
"iclodu.com": "icloud.com",
"iclod.com": "icloud.com",
"protonmai.com": "protonmail.com",
"protonmal.com": "protonmail.com",
}
if correct, found := typos[domain]; found {
return localPart + "@" + correct
}
return ""
}
// hasIPAddress checks if email domain is an IP address
func (ev *EmailValidator) hasIPAddress(email string) bool {
parts := strings.Split(email, "@")
if len(parts) != 2 {
return false
}
domain := parts[1]
// Check for IPv4 pattern: [192.168.1.1]
if strings.HasPrefix(domain, "[") && strings.HasSuffix(domain, "]") {
return true
}
// Check for unbracketed IP patterns (less common but possible)
// Simple heuristic: contains only digits and dots
hasOnlyDigitsAndDots := true
for _, ch := range domain {
if ch != '.' && (ch < '0' || ch > '9') {
hasOnlyDigitsAndDots = false
break
}
}
return hasOnlyDigitsAndDots && strings.Count(domain, ".") >= 3
}
// normalizeGmail normalizes Gmail addresses by removing dots and plus-aliases
// Gmail ignores dots in the local part and treats everything after + as an alias
// Example: john.doe+test@gmail.com -> johndoe@gmail.com
func (ev *EmailValidator) normalizeGmail(email string) string {
parts := strings.Split(email, "@")
if len(parts) != 2 {
return email
}
localPart := parts[0]
domain := strings.ToLower(parts[1])
// Only normalize for Gmail and Googlemail
if domain != "gmail.com" && domain != "googlemail.com" {
return email
}
// Remove dots from local part
localPart = strings.ReplaceAll(localPart, ".", "")
// Remove everything after + (plus-alias)
if plusIndex := strings.Index(localPart, "+"); plusIndex != -1 {
localPart = localPart[:plusIndex]
}
return localPart + "@" + domain
}
// ValidateEmailList validates a list of email addresses
// Returns the first error encountered, or nil if all are valid
func (ev *EmailValidator) ValidateEmailList(emails []string, fieldName string) ([]string, error) {
normalized := make([]string, 0, len(emails))
for i, email := range emails {
norm, err := ev.ValidateAndNormalize(email, fmt.Sprintf("%s[%d]", fieldName, i))
if err != nil {
return nil, err
}
normalized = append(normalized, norm)
}
return normalized, nil
}
// IsValidEmailDomain checks if a domain is likely valid (has proper structure)
// This is a lightweight check - for production, consider DNS MX record validation
func (ev *EmailValidator) IsValidEmailDomain(email string) bool {
parts := strings.Split(email, "@")
if len(parts) != 2 {
return false
}
domain := strings.ToLower(parts[1])
// Must have at least one dot
if !strings.Contains(domain, ".") {
return false
}
// TLD must be at least 2 characters
tldParts := strings.Split(domain, ".")
if len(tldParts) < 2 {
return false
}
tld := tldParts[len(tldParts)-1]
if len(tld) < 2 {
return false
}
return true
}