// monorepo/cloud/maplefile-backend/internal/repo/filemetadata/anonymize_old_ips.go package filemetadata import ( "context" "time" "github.com/gocql/gocql" "go.uber.org/zap" ) // AnonymizeOldIPs anonymizes IP addresses in file tables older than the cutoff date func (impl *fileMetadataRepositoryImpl) AnonymizeOldIPs(ctx context.Context, cutoffDate time.Time) (int, error) { totalAnonymized := 0 // Anonymize files_by_id table (primary table) count, err := impl.anonymizeFilesById(ctx, cutoffDate) if err != nil { impl.Logger.Error("Failed to anonymize files_by_id", zap.Error(err), zap.Time("cutoff_date", cutoffDate)) return totalAnonymized, err } totalAnonymized += count impl.Logger.Info("IP anonymization completed for file tables", zap.Int("total_anonymized", totalAnonymized), zap.Time("cutoff_date", cutoffDate)) return totalAnonymized, nil } // anonymizeFilesById processes the files_by_id table func (impl *fileMetadataRepositoryImpl) anonymizeFilesById(ctx context.Context, cutoffDate time.Time) (int, error) { count := 0 // Query all files (efficient primary key scan, no ALLOW FILTERING) query := `SELECT id, created_at, ip_anonymized_at FROM maplefile.files_by_id` iter := impl.Session.Query(query).WithContext(ctx).Iter() var id gocql.UUID var createdAt time.Time var ipAnonymizedAt *time.Time for iter.Scan(&id, &createdAt, &ipAnonymizedAt) { // Filter in application code: older than cutoff AND not yet anonymized if createdAt.Before(cutoffDate) && ipAnonymizedAt == nil { // Update the record to anonymize IPs updateQuery := ` UPDATE maplefile.files_by_id SET created_from_ip_address = '', modified_from_ip_address = '', ip_anonymized_at = ? WHERE id = ? ` if err := impl.Session.Query(updateQuery, time.Now(), id).WithContext(ctx).Exec(); err != nil { impl.Logger.Error("Failed to anonymize file record", zap.String("file_id", id.String()), zap.Error(err)) continue } count++ } } if err := iter.Close(); err != nil { impl.Logger.Error("Error during files_by_id iteration", zap.Error(err)) return count, err } impl.Logger.Debug("Anonymized files_by_id table", zap.Int("count", count), zap.Time("cutoff_date", cutoffDate)) return count, nil }