package ipcleanup import ( "context" "time" "github.com/gocql/gocql" "go.uber.org/zap" domainpage "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/page" domainsite "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/site" domaintenant "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/tenant" domainuser "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/user" "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/security/ipcrypt" ) // CleanupService handles cleanup of expired IP addresses for GDPR compliance // CWE-359: IP addresses must be deleted after 90 days (Option 2: Clear both IP and timestamp) type CleanupService struct { userRepo domainuser.Repository tenantRepo domaintenant.Repository siteRepo domainsite.Repository pageRepo domainpage.Repository ipEncryptor *ipcrypt.IPEncryptor logger *zap.Logger } // ProvideCleanupService creates a new CleanupService func ProvideCleanupService( userRepo domainuser.Repository, tenantRepo domaintenant.Repository, siteRepo domainsite.Repository, pageRepo domainpage.Repository, ipEncryptor *ipcrypt.IPEncryptor, logger *zap.Logger, ) *CleanupService { return &CleanupService{ userRepo: userRepo, tenantRepo: tenantRepo, siteRepo: siteRepo, pageRepo: pageRepo, ipEncryptor: ipEncryptor, logger: logger.Named("ip-cleanup-service"), } } // CleanupExpiredIPs removes IP addresses older than 90 days for GDPR compliance // Option 2: Clears BOTH IP address AND timestamp (complete removal) // This method should be called by a scheduled job func (s *CleanupService) CleanupExpiredIPs(ctx context.Context) error { s.logger.Info("starting IP address cleanup for GDPR compliance (Option 2: Clear both IP and timestamp)") // Calculate the date 90 days ago now := time.Now() expirationDate := now.AddDate(0, 0, -90) s.logger.Info("cleaning up IP addresses older than 90 days", zap.Time("expiration_date", expirationDate), zap.Int("retention_days", 90)) var totalCleaned int var errors []error // Clean up each entity type usersCleaned, err := s.cleanupUserIPs(ctx, expirationDate) if err != nil { s.logger.Error("failed to cleanup user IPs", zap.Error(err)) errors = append(errors, err) } totalCleaned += usersCleaned tenantsCleaned, err := s.cleanupTenantIPs(ctx, expirationDate) if err != nil { s.logger.Error("failed to cleanup tenant IPs", zap.Error(err)) errors = append(errors, err) } totalCleaned += tenantsCleaned sitesCleaned, err := s.cleanupSiteIPs(ctx, expirationDate) if err != nil { s.logger.Error("failed to cleanup site IPs", zap.Error(err)) errors = append(errors, err) } totalCleaned += sitesCleaned pagesCleaned, err := s.cleanupPageIPs(ctx, expirationDate) if err != nil { s.logger.Error("failed to cleanup page IPs", zap.Error(err)) errors = append(errors, err) } totalCleaned += pagesCleaned if len(errors) > 0 { s.logger.Warn("IP cleanup completed with errors", zap.Int("total_cleaned", totalCleaned), zap.Int("error_count", len(errors))) return errors[0] // Return first error } s.logger.Info("IP cleanup completed successfully", zap.Int("total_records_cleaned", totalCleaned), zap.Int("users", usersCleaned), zap.Int("tenants", tenantsCleaned), zap.Int("sites", sitesCleaned), zap.Int("pages", pagesCleaned)) return nil } // cleanupUserIPs cleans up expired IP addresses from User entities func (s *CleanupService) cleanupUserIPs(ctx context.Context, expirationDate time.Time) (int, error) { s.logger.Info("cleaning up user IP addresses") // Note: This implementation uses ListByDate to query users in batches // For large datasets, consider implementing a background job that processes smaller chunks // Calculate date range: from beginning of time to 90 days ago startDate := "1970-01-01" endDate := expirationDate.Format("2006-01-02") totalCleaned := 0 // Note: Users are tenant-scoped, so we would need to iterate through tenants // For now, we'll log a warning about this limitation s.logger.Warn("user IP cleanup requires tenant iteration - this is a simplified implementation", zap.String("start_date", startDate), zap.String("end_date", endDate)) // TODO: Implement tenant iteration // Example approach: // 1. Get list of all tenants // 2. For each tenant, query users by date // 3. Process each user s.logger.Info("user IP cleanup skipped (requires tenant iteration support)", zap.Int("cleaned", totalCleaned)) return totalCleaned, nil } // cleanupTenantIPs cleans up expired IP addresses from Tenant entities func (s *CleanupService) cleanupTenantIPs(ctx context.Context, expirationDate time.Time) (int, error) { s.logger.Info("cleaning up tenant IP addresses") // List all active tenants (we'll check all statuses to be thorough) statuses := []domaintenant.Status{ domaintenant.StatusActive, domaintenant.StatusInactive, domaintenant.StatusSuspended, } totalCleaned := 0 batchSize := 1000 // Process up to 1000 tenants per status for _, status := range statuses { tenants, err := s.tenantRepo.ListByStatus(ctx, status, batchSize) if err != nil { s.logger.Error("failed to list tenants by status", zap.String("status", string(status)), zap.Error(err)) continue } s.logger.Debug("processing tenants for IP cleanup", zap.String("status", string(status)), zap.Int("count", len(tenants))) for _, tenant := range tenants { needsUpdate := false // Check if created IP timestamp is expired if !tenant.CreatedFromIPTimestamp.IsZero() && tenant.CreatedFromIPTimestamp.Before(expirationDate) { tenant.CreatedFromIPAddress = "" tenant.CreatedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } // Check if modified IP timestamp is expired if !tenant.ModifiedFromIPTimestamp.IsZero() && tenant.ModifiedFromIPTimestamp.Before(expirationDate) { tenant.ModifiedFromIPAddress = "" tenant.ModifiedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } if needsUpdate { if err := s.tenantRepo.Update(ctx, tenant); err != nil { s.logger.Error("failed to update tenant IP fields", zap.String("tenant_id", tenant.ID), zap.Error(err)) continue } totalCleaned++ s.logger.Debug("cleared expired IP from tenant", zap.String("tenant_id", tenant.ID)) } } } s.logger.Info("tenant IP cleanup completed", zap.Int("cleaned", totalCleaned)) return totalCleaned, nil } // cleanupSiteIPs cleans up expired IP addresses from Site entities func (s *CleanupService) cleanupSiteIPs(ctx context.Context, expirationDate time.Time) (int, error) { s.logger.Info("cleaning up site IP addresses") // First, get all tenants so we can iterate through their sites statuses := []domaintenant.Status{ domaintenant.StatusActive, domaintenant.StatusInactive, domaintenant.StatusSuspended, } totalCleaned := 0 tenantBatchSize := 1000 siteBatchSize := 100 for _, status := range statuses { tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize) if err != nil { s.logger.Error("failed to list tenants for site cleanup", zap.String("status", string(status)), zap.Error(err)) continue } // For each tenant, list their sites and clean up expired IPs for _, tenant := range tenants { tenantUUID, err := gocql.ParseUUID(tenant.ID) if err != nil { s.logger.Error("failed to parse tenant UUID", zap.String("tenant_id", tenant.ID), zap.Error(err)) continue } // List sites for this tenant (using pagination) var pageState []byte for { sites, nextPageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, pageState) if err != nil { s.logger.Error("failed to list sites for tenant", zap.String("tenant_id", tenant.ID), zap.Error(err)) break } // Process each site for _, site := range sites { needsUpdate := false // Check if created IP timestamp is expired if !site.CreatedFromIPTimestamp.IsZero() && site.CreatedFromIPTimestamp.Before(expirationDate) { site.CreatedFromIPAddress = "" site.CreatedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } // Check if modified IP timestamp is expired if !site.ModifiedFromIPTimestamp.IsZero() && site.ModifiedFromIPTimestamp.Before(expirationDate) { site.ModifiedFromIPAddress = "" site.ModifiedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } if needsUpdate { if err := s.siteRepo.Update(ctx, site); err != nil { s.logger.Error("failed to update site IP fields", zap.String("site_id", site.ID.String()), zap.Error(err)) continue } totalCleaned++ s.logger.Debug("cleared expired IP from site", zap.String("site_id", site.ID.String())) } } // Check if there are more pages if len(nextPageState) == 0 { break } pageState = nextPageState } } } s.logger.Info("site IP cleanup completed", zap.Int("cleaned", totalCleaned)) return totalCleaned, nil } // cleanupPageIPs cleans up expired IP addresses from Page entities func (s *CleanupService) cleanupPageIPs(ctx context.Context, expirationDate time.Time) (int, error) { s.logger.Info("cleaning up page IP addresses") // Pages are partitioned by site_id, so we need to: // 1. Get all tenants // 2. For each tenant, get all sites // 3. For each site, get all pages // This is the most expensive operation due to Cassandra's data model statuses := []domaintenant.Status{ domaintenant.StatusActive, domaintenant.StatusInactive, domaintenant.StatusSuspended, } totalCleaned := 0 tenantBatchSize := 1000 siteBatchSize := 100 for _, status := range statuses { tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize) if err != nil { s.logger.Error("failed to list tenants for page cleanup", zap.String("status", string(status)), zap.Error(err)) continue } // For each tenant, list their sites for _, tenant := range tenants { tenantUUID, err := gocql.ParseUUID(tenant.ID) if err != nil { s.logger.Error("failed to parse tenant UUID for pages", zap.String("tenant_id", tenant.ID), zap.Error(err)) continue } // List sites for this tenant var sitePageState []byte for { sites, nextSitePageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, sitePageState) if err != nil { s.logger.Error("failed to list sites for page cleanup", zap.String("tenant_id", tenant.ID), zap.Error(err)) break } // For each site, get all pages for _, site := range sites { pages, err := s.pageRepo.GetBySiteID(ctx, site.ID) if err != nil { s.logger.Error("failed to get pages for site", zap.String("site_id", site.ID.String()), zap.Error(err)) continue } // Process each page for _, page := range pages { needsUpdate := false // Check if created IP timestamp is expired if !page.CreatedFromIPTimestamp.IsZero() && page.CreatedFromIPTimestamp.Before(expirationDate) { page.CreatedFromIPAddress = "" page.CreatedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } // Check if modified IP timestamp is expired if !page.ModifiedFromIPTimestamp.IsZero() && page.ModifiedFromIPTimestamp.Before(expirationDate) { page.ModifiedFromIPAddress = "" page.ModifiedFromIPTimestamp = time.Time{} // Zero value needsUpdate = true } if needsUpdate { if err := s.pageRepo.Update(ctx, page); err != nil { s.logger.Error("failed to update page IP fields", zap.String("page_id", page.PageID), zap.String("site_id", page.SiteID.String()), zap.Error(err)) continue } totalCleaned++ s.logger.Debug("cleared expired IP from page", zap.String("page_id", page.PageID), zap.String("site_id", page.SiteID.String())) } } } // Check if there are more site pages if len(nextSitePageState) == 0 { break } sitePageState = nextSitePageState } } } s.logger.Info("page IP cleanup completed", zap.Int("cleaned", totalCleaned)) return totalCleaned, nil } // ShouldCleanupIP checks if an IP address timestamp has expired func (s *CleanupService) ShouldCleanupIP(timestamp time.Time) bool { return s.ipEncryptor.IsExpired(timestamp) }