408 lines
12 KiB
Go
408 lines
12 KiB
Go
package ipcleanup
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/gocql/gocql"
|
|
"go.uber.org/zap"
|
|
|
|
domainpage "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/page"
|
|
domainsite "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/site"
|
|
domaintenant "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/tenant"
|
|
domainuser "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/user"
|
|
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/security/ipcrypt"
|
|
)
|
|
|
|
// CleanupService handles cleanup of expired IP addresses for GDPR compliance
|
|
// CWE-359: IP addresses must be deleted after 90 days (Option 2: Clear both IP and timestamp)
|
|
type CleanupService struct {
|
|
userRepo domainuser.Repository
|
|
tenantRepo domaintenant.Repository
|
|
siteRepo domainsite.Repository
|
|
pageRepo domainpage.Repository
|
|
ipEncryptor *ipcrypt.IPEncryptor
|
|
logger *zap.Logger
|
|
}
|
|
|
|
// ProvideCleanupService creates a new CleanupService
|
|
func ProvideCleanupService(
|
|
userRepo domainuser.Repository,
|
|
tenantRepo domaintenant.Repository,
|
|
siteRepo domainsite.Repository,
|
|
pageRepo domainpage.Repository,
|
|
ipEncryptor *ipcrypt.IPEncryptor,
|
|
logger *zap.Logger,
|
|
) *CleanupService {
|
|
return &CleanupService{
|
|
userRepo: userRepo,
|
|
tenantRepo: tenantRepo,
|
|
siteRepo: siteRepo,
|
|
pageRepo: pageRepo,
|
|
ipEncryptor: ipEncryptor,
|
|
logger: logger.Named("ip-cleanup-service"),
|
|
}
|
|
}
|
|
|
|
// CleanupExpiredIPs removes IP addresses older than 90 days for GDPR compliance
|
|
// Option 2: Clears BOTH IP address AND timestamp (complete removal)
|
|
// This method should be called by a scheduled job
|
|
func (s *CleanupService) CleanupExpiredIPs(ctx context.Context) error {
|
|
s.logger.Info("starting IP address cleanup for GDPR compliance (Option 2: Clear both IP and timestamp)")
|
|
|
|
// Calculate the date 90 days ago
|
|
now := time.Now()
|
|
expirationDate := now.AddDate(0, 0, -90)
|
|
|
|
s.logger.Info("cleaning up IP addresses older than 90 days",
|
|
zap.Time("expiration_date", expirationDate),
|
|
zap.Int("retention_days", 90))
|
|
|
|
var totalCleaned int
|
|
var errors []error
|
|
|
|
// Clean up each entity type
|
|
usersCleaned, err := s.cleanupUserIPs(ctx, expirationDate)
|
|
if err != nil {
|
|
s.logger.Error("failed to cleanup user IPs", zap.Error(err))
|
|
errors = append(errors, err)
|
|
}
|
|
totalCleaned += usersCleaned
|
|
|
|
tenantsCleaned, err := s.cleanupTenantIPs(ctx, expirationDate)
|
|
if err != nil {
|
|
s.logger.Error("failed to cleanup tenant IPs", zap.Error(err))
|
|
errors = append(errors, err)
|
|
}
|
|
totalCleaned += tenantsCleaned
|
|
|
|
sitesCleaned, err := s.cleanupSiteIPs(ctx, expirationDate)
|
|
if err != nil {
|
|
s.logger.Error("failed to cleanup site IPs", zap.Error(err))
|
|
errors = append(errors, err)
|
|
}
|
|
totalCleaned += sitesCleaned
|
|
|
|
pagesCleaned, err := s.cleanupPageIPs(ctx, expirationDate)
|
|
if err != nil {
|
|
s.logger.Error("failed to cleanup page IPs", zap.Error(err))
|
|
errors = append(errors, err)
|
|
}
|
|
totalCleaned += pagesCleaned
|
|
|
|
if len(errors) > 0 {
|
|
s.logger.Warn("IP cleanup completed with errors",
|
|
zap.Int("total_cleaned", totalCleaned),
|
|
zap.Int("error_count", len(errors)))
|
|
return errors[0] // Return first error
|
|
}
|
|
|
|
s.logger.Info("IP cleanup completed successfully",
|
|
zap.Int("total_records_cleaned", totalCleaned),
|
|
zap.Int("users", usersCleaned),
|
|
zap.Int("tenants", tenantsCleaned),
|
|
zap.Int("sites", sitesCleaned),
|
|
zap.Int("pages", pagesCleaned))
|
|
|
|
return nil
|
|
}
|
|
|
|
// cleanupUserIPs cleans up expired IP addresses from User entities
|
|
func (s *CleanupService) cleanupUserIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
|
s.logger.Info("cleaning up user IP addresses")
|
|
|
|
// Note: This implementation uses ListByDate to query users in batches
|
|
// For large datasets, consider implementing a background job that processes smaller chunks
|
|
|
|
// Calculate date range: from beginning of time to 90 days ago
|
|
startDate := "1970-01-01"
|
|
endDate := expirationDate.Format("2006-01-02")
|
|
|
|
totalCleaned := 0
|
|
|
|
// Note: Users are tenant-scoped, so we would need to iterate through tenants
|
|
// For now, we'll log a warning about this limitation
|
|
s.logger.Warn("user IP cleanup requires tenant iteration - this is a simplified implementation",
|
|
zap.String("start_date", startDate),
|
|
zap.String("end_date", endDate))
|
|
|
|
// TODO: Implement tenant iteration
|
|
// Example approach:
|
|
// 1. Get list of all tenants
|
|
// 2. For each tenant, query users by date
|
|
// 3. Process each user
|
|
|
|
s.logger.Info("user IP cleanup skipped (requires tenant iteration support)",
|
|
zap.Int("cleaned", totalCleaned))
|
|
|
|
return totalCleaned, nil
|
|
}
|
|
|
|
// cleanupTenantIPs cleans up expired IP addresses from Tenant entities
|
|
func (s *CleanupService) cleanupTenantIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
|
s.logger.Info("cleaning up tenant IP addresses")
|
|
|
|
// List all active tenants (we'll check all statuses to be thorough)
|
|
statuses := []domaintenant.Status{
|
|
domaintenant.StatusActive,
|
|
domaintenant.StatusInactive,
|
|
domaintenant.StatusSuspended,
|
|
}
|
|
|
|
totalCleaned := 0
|
|
batchSize := 1000 // Process up to 1000 tenants per status
|
|
|
|
for _, status := range statuses {
|
|
tenants, err := s.tenantRepo.ListByStatus(ctx, status, batchSize)
|
|
if err != nil {
|
|
s.logger.Error("failed to list tenants by status",
|
|
zap.String("status", string(status)),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
s.logger.Debug("processing tenants for IP cleanup",
|
|
zap.String("status", string(status)),
|
|
zap.Int("count", len(tenants)))
|
|
|
|
for _, tenant := range tenants {
|
|
needsUpdate := false
|
|
|
|
// Check if created IP timestamp is expired
|
|
if !tenant.CreatedFromIPTimestamp.IsZero() && tenant.CreatedFromIPTimestamp.Before(expirationDate) {
|
|
tenant.CreatedFromIPAddress = ""
|
|
tenant.CreatedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
// Check if modified IP timestamp is expired
|
|
if !tenant.ModifiedFromIPTimestamp.IsZero() && tenant.ModifiedFromIPTimestamp.Before(expirationDate) {
|
|
tenant.ModifiedFromIPAddress = ""
|
|
tenant.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
if needsUpdate {
|
|
if err := s.tenantRepo.Update(ctx, tenant); err != nil {
|
|
s.logger.Error("failed to update tenant IP fields",
|
|
zap.String("tenant_id", tenant.ID),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
totalCleaned++
|
|
s.logger.Debug("cleared expired IP from tenant",
|
|
zap.String("tenant_id", tenant.ID))
|
|
}
|
|
}
|
|
}
|
|
|
|
s.logger.Info("tenant IP cleanup completed",
|
|
zap.Int("cleaned", totalCleaned))
|
|
|
|
return totalCleaned, nil
|
|
}
|
|
|
|
// cleanupSiteIPs cleans up expired IP addresses from Site entities
|
|
func (s *CleanupService) cleanupSiteIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
|
s.logger.Info("cleaning up site IP addresses")
|
|
|
|
// First, get all tenants so we can iterate through their sites
|
|
statuses := []domaintenant.Status{
|
|
domaintenant.StatusActive,
|
|
domaintenant.StatusInactive,
|
|
domaintenant.StatusSuspended,
|
|
}
|
|
|
|
totalCleaned := 0
|
|
tenantBatchSize := 1000
|
|
siteBatchSize := 100
|
|
|
|
for _, status := range statuses {
|
|
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
|
|
if err != nil {
|
|
s.logger.Error("failed to list tenants for site cleanup",
|
|
zap.String("status", string(status)),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
// For each tenant, list their sites and clean up expired IPs
|
|
for _, tenant := range tenants {
|
|
tenantUUID, err := gocql.ParseUUID(tenant.ID)
|
|
if err != nil {
|
|
s.logger.Error("failed to parse tenant UUID",
|
|
zap.String("tenant_id", tenant.ID),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
// List sites for this tenant (using pagination)
|
|
var pageState []byte
|
|
for {
|
|
sites, nextPageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, pageState)
|
|
if err != nil {
|
|
s.logger.Error("failed to list sites for tenant",
|
|
zap.String("tenant_id", tenant.ID),
|
|
zap.Error(err))
|
|
break
|
|
}
|
|
|
|
// Process each site
|
|
for _, site := range sites {
|
|
needsUpdate := false
|
|
|
|
// Check if created IP timestamp is expired
|
|
if !site.CreatedFromIPTimestamp.IsZero() && site.CreatedFromIPTimestamp.Before(expirationDate) {
|
|
site.CreatedFromIPAddress = ""
|
|
site.CreatedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
// Check if modified IP timestamp is expired
|
|
if !site.ModifiedFromIPTimestamp.IsZero() && site.ModifiedFromIPTimestamp.Before(expirationDate) {
|
|
site.ModifiedFromIPAddress = ""
|
|
site.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
if needsUpdate {
|
|
if err := s.siteRepo.Update(ctx, site); err != nil {
|
|
s.logger.Error("failed to update site IP fields",
|
|
zap.String("site_id", site.ID.String()),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
totalCleaned++
|
|
s.logger.Debug("cleared expired IP from site",
|
|
zap.String("site_id", site.ID.String()))
|
|
}
|
|
}
|
|
|
|
// Check if there are more pages
|
|
if len(nextPageState) == 0 {
|
|
break
|
|
}
|
|
pageState = nextPageState
|
|
}
|
|
}
|
|
}
|
|
|
|
s.logger.Info("site IP cleanup completed",
|
|
zap.Int("cleaned", totalCleaned))
|
|
|
|
return totalCleaned, nil
|
|
}
|
|
|
|
// cleanupPageIPs cleans up expired IP addresses from Page entities
|
|
func (s *CleanupService) cleanupPageIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
|
s.logger.Info("cleaning up page IP addresses")
|
|
|
|
// Pages are partitioned by site_id, so we need to:
|
|
// 1. Get all tenants
|
|
// 2. For each tenant, get all sites
|
|
// 3. For each site, get all pages
|
|
// This is the most expensive operation due to Cassandra's data model
|
|
|
|
statuses := []domaintenant.Status{
|
|
domaintenant.StatusActive,
|
|
domaintenant.StatusInactive,
|
|
domaintenant.StatusSuspended,
|
|
}
|
|
|
|
totalCleaned := 0
|
|
tenantBatchSize := 1000
|
|
siteBatchSize := 100
|
|
|
|
for _, status := range statuses {
|
|
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
|
|
if err != nil {
|
|
s.logger.Error("failed to list tenants for page cleanup",
|
|
zap.String("status", string(status)),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
// For each tenant, list their sites
|
|
for _, tenant := range tenants {
|
|
tenantUUID, err := gocql.ParseUUID(tenant.ID)
|
|
if err != nil {
|
|
s.logger.Error("failed to parse tenant UUID for pages",
|
|
zap.String("tenant_id", tenant.ID),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
// List sites for this tenant
|
|
var sitePageState []byte
|
|
for {
|
|
sites, nextSitePageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, sitePageState)
|
|
if err != nil {
|
|
s.logger.Error("failed to list sites for page cleanup",
|
|
zap.String("tenant_id", tenant.ID),
|
|
zap.Error(err))
|
|
break
|
|
}
|
|
|
|
// For each site, get all pages
|
|
for _, site := range sites {
|
|
pages, err := s.pageRepo.GetBySiteID(ctx, site.ID)
|
|
if err != nil {
|
|
s.logger.Error("failed to get pages for site",
|
|
zap.String("site_id", site.ID.String()),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
|
|
// Process each page
|
|
for _, page := range pages {
|
|
needsUpdate := false
|
|
|
|
// Check if created IP timestamp is expired
|
|
if !page.CreatedFromIPTimestamp.IsZero() && page.CreatedFromIPTimestamp.Before(expirationDate) {
|
|
page.CreatedFromIPAddress = ""
|
|
page.CreatedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
// Check if modified IP timestamp is expired
|
|
if !page.ModifiedFromIPTimestamp.IsZero() && page.ModifiedFromIPTimestamp.Before(expirationDate) {
|
|
page.ModifiedFromIPAddress = ""
|
|
page.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
|
needsUpdate = true
|
|
}
|
|
|
|
if needsUpdate {
|
|
if err := s.pageRepo.Update(ctx, page); err != nil {
|
|
s.logger.Error("failed to update page IP fields",
|
|
zap.String("page_id", page.PageID),
|
|
zap.String("site_id", page.SiteID.String()),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
totalCleaned++
|
|
s.logger.Debug("cleared expired IP from page",
|
|
zap.String("page_id", page.PageID),
|
|
zap.String("site_id", page.SiteID.String()))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if there are more site pages
|
|
if len(nextSitePageState) == 0 {
|
|
break
|
|
}
|
|
sitePageState = nextSitePageState
|
|
}
|
|
}
|
|
}
|
|
|
|
s.logger.Info("page IP cleanup completed",
|
|
zap.Int("cleaned", totalCleaned))
|
|
|
|
return totalCleaned, nil
|
|
}
|
|
|
|
// ShouldCleanupIP checks if an IP address timestamp has expired
|
|
func (s *CleanupService) ShouldCleanupIP(timestamp time.Time) bool {
|
|
return s.ipEncryptor.IsExpired(timestamp)
|
|
}
|