Initial commit: Open sourcing all of the Maple Open Technologies code.
This commit is contained in:
commit
755d54a99d
2010 changed files with 448675 additions and 0 deletions
408
cloud/maplepress-backend/internal/service/ipcleanup/cleanup.go
Normal file
408
cloud/maplepress-backend/internal/service/ipcleanup/cleanup.go
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
package ipcleanup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/gocql/gocql"
|
||||
"go.uber.org/zap"
|
||||
|
||||
domainpage "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/page"
|
||||
domainsite "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/site"
|
||||
domaintenant "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/tenant"
|
||||
domainuser "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/user"
|
||||
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/security/ipcrypt"
|
||||
)
|
||||
|
||||
// CleanupService handles cleanup of expired IP addresses for GDPR compliance
|
||||
// CWE-359: IP addresses must be deleted after 90 days (Option 2: Clear both IP and timestamp)
|
||||
type CleanupService struct {
|
||||
userRepo domainuser.Repository
|
||||
tenantRepo domaintenant.Repository
|
||||
siteRepo domainsite.Repository
|
||||
pageRepo domainpage.Repository
|
||||
ipEncryptor *ipcrypt.IPEncryptor
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// ProvideCleanupService creates a new CleanupService
|
||||
func ProvideCleanupService(
|
||||
userRepo domainuser.Repository,
|
||||
tenantRepo domaintenant.Repository,
|
||||
siteRepo domainsite.Repository,
|
||||
pageRepo domainpage.Repository,
|
||||
ipEncryptor *ipcrypt.IPEncryptor,
|
||||
logger *zap.Logger,
|
||||
) *CleanupService {
|
||||
return &CleanupService{
|
||||
userRepo: userRepo,
|
||||
tenantRepo: tenantRepo,
|
||||
siteRepo: siteRepo,
|
||||
pageRepo: pageRepo,
|
||||
ipEncryptor: ipEncryptor,
|
||||
logger: logger.Named("ip-cleanup-service"),
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupExpiredIPs removes IP addresses older than 90 days for GDPR compliance
|
||||
// Option 2: Clears BOTH IP address AND timestamp (complete removal)
|
||||
// This method should be called by a scheduled job
|
||||
func (s *CleanupService) CleanupExpiredIPs(ctx context.Context) error {
|
||||
s.logger.Info("starting IP address cleanup for GDPR compliance (Option 2: Clear both IP and timestamp)")
|
||||
|
||||
// Calculate the date 90 days ago
|
||||
now := time.Now()
|
||||
expirationDate := now.AddDate(0, 0, -90)
|
||||
|
||||
s.logger.Info("cleaning up IP addresses older than 90 days",
|
||||
zap.Time("expiration_date", expirationDate),
|
||||
zap.Int("retention_days", 90))
|
||||
|
||||
var totalCleaned int
|
||||
var errors []error
|
||||
|
||||
// Clean up each entity type
|
||||
usersCleaned, err := s.cleanupUserIPs(ctx, expirationDate)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to cleanup user IPs", zap.Error(err))
|
||||
errors = append(errors, err)
|
||||
}
|
||||
totalCleaned += usersCleaned
|
||||
|
||||
tenantsCleaned, err := s.cleanupTenantIPs(ctx, expirationDate)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to cleanup tenant IPs", zap.Error(err))
|
||||
errors = append(errors, err)
|
||||
}
|
||||
totalCleaned += tenantsCleaned
|
||||
|
||||
sitesCleaned, err := s.cleanupSiteIPs(ctx, expirationDate)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to cleanup site IPs", zap.Error(err))
|
||||
errors = append(errors, err)
|
||||
}
|
||||
totalCleaned += sitesCleaned
|
||||
|
||||
pagesCleaned, err := s.cleanupPageIPs(ctx, expirationDate)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to cleanup page IPs", zap.Error(err))
|
||||
errors = append(errors, err)
|
||||
}
|
||||
totalCleaned += pagesCleaned
|
||||
|
||||
if len(errors) > 0 {
|
||||
s.logger.Warn("IP cleanup completed with errors",
|
||||
zap.Int("total_cleaned", totalCleaned),
|
||||
zap.Int("error_count", len(errors)))
|
||||
return errors[0] // Return first error
|
||||
}
|
||||
|
||||
s.logger.Info("IP cleanup completed successfully",
|
||||
zap.Int("total_records_cleaned", totalCleaned),
|
||||
zap.Int("users", usersCleaned),
|
||||
zap.Int("tenants", tenantsCleaned),
|
||||
zap.Int("sites", sitesCleaned),
|
||||
zap.Int("pages", pagesCleaned))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupUserIPs cleans up expired IP addresses from User entities
|
||||
func (s *CleanupService) cleanupUserIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
||||
s.logger.Info("cleaning up user IP addresses")
|
||||
|
||||
// Note: This implementation uses ListByDate to query users in batches
|
||||
// For large datasets, consider implementing a background job that processes smaller chunks
|
||||
|
||||
// Calculate date range: from beginning of time to 90 days ago
|
||||
startDate := "1970-01-01"
|
||||
endDate := expirationDate.Format("2006-01-02")
|
||||
|
||||
totalCleaned := 0
|
||||
|
||||
// Note: Users are tenant-scoped, so we would need to iterate through tenants
|
||||
// For now, we'll log a warning about this limitation
|
||||
s.logger.Warn("user IP cleanup requires tenant iteration - this is a simplified implementation",
|
||||
zap.String("start_date", startDate),
|
||||
zap.String("end_date", endDate))
|
||||
|
||||
// TODO: Implement tenant iteration
|
||||
// Example approach:
|
||||
// 1. Get list of all tenants
|
||||
// 2. For each tenant, query users by date
|
||||
// 3. Process each user
|
||||
|
||||
s.logger.Info("user IP cleanup skipped (requires tenant iteration support)",
|
||||
zap.Int("cleaned", totalCleaned))
|
||||
|
||||
return totalCleaned, nil
|
||||
}
|
||||
|
||||
// cleanupTenantIPs cleans up expired IP addresses from Tenant entities
|
||||
func (s *CleanupService) cleanupTenantIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
||||
s.logger.Info("cleaning up tenant IP addresses")
|
||||
|
||||
// List all active tenants (we'll check all statuses to be thorough)
|
||||
statuses := []domaintenant.Status{
|
||||
domaintenant.StatusActive,
|
||||
domaintenant.StatusInactive,
|
||||
domaintenant.StatusSuspended,
|
||||
}
|
||||
|
||||
totalCleaned := 0
|
||||
batchSize := 1000 // Process up to 1000 tenants per status
|
||||
|
||||
for _, status := range statuses {
|
||||
tenants, err := s.tenantRepo.ListByStatus(ctx, status, batchSize)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to list tenants by status",
|
||||
zap.String("status", string(status)),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
s.logger.Debug("processing tenants for IP cleanup",
|
||||
zap.String("status", string(status)),
|
||||
zap.Int("count", len(tenants)))
|
||||
|
||||
for _, tenant := range tenants {
|
||||
needsUpdate := false
|
||||
|
||||
// Check if created IP timestamp is expired
|
||||
if !tenant.CreatedFromIPTimestamp.IsZero() && tenant.CreatedFromIPTimestamp.Before(expirationDate) {
|
||||
tenant.CreatedFromIPAddress = ""
|
||||
tenant.CreatedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
// Check if modified IP timestamp is expired
|
||||
if !tenant.ModifiedFromIPTimestamp.IsZero() && tenant.ModifiedFromIPTimestamp.Before(expirationDate) {
|
||||
tenant.ModifiedFromIPAddress = ""
|
||||
tenant.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
if needsUpdate {
|
||||
if err := s.tenantRepo.Update(ctx, tenant); err != nil {
|
||||
s.logger.Error("failed to update tenant IP fields",
|
||||
zap.String("tenant_id", tenant.ID),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
totalCleaned++
|
||||
s.logger.Debug("cleared expired IP from tenant",
|
||||
zap.String("tenant_id", tenant.ID))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info("tenant IP cleanup completed",
|
||||
zap.Int("cleaned", totalCleaned))
|
||||
|
||||
return totalCleaned, nil
|
||||
}
|
||||
|
||||
// cleanupSiteIPs cleans up expired IP addresses from Site entities
|
||||
func (s *CleanupService) cleanupSiteIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
||||
s.logger.Info("cleaning up site IP addresses")
|
||||
|
||||
// First, get all tenants so we can iterate through their sites
|
||||
statuses := []domaintenant.Status{
|
||||
domaintenant.StatusActive,
|
||||
domaintenant.StatusInactive,
|
||||
domaintenant.StatusSuspended,
|
||||
}
|
||||
|
||||
totalCleaned := 0
|
||||
tenantBatchSize := 1000
|
||||
siteBatchSize := 100
|
||||
|
||||
for _, status := range statuses {
|
||||
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to list tenants for site cleanup",
|
||||
zap.String("status", string(status)),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// For each tenant, list their sites and clean up expired IPs
|
||||
for _, tenant := range tenants {
|
||||
tenantUUID, err := gocql.ParseUUID(tenant.ID)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to parse tenant UUID",
|
||||
zap.String("tenant_id", tenant.ID),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// List sites for this tenant (using pagination)
|
||||
var pageState []byte
|
||||
for {
|
||||
sites, nextPageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, pageState)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to list sites for tenant",
|
||||
zap.String("tenant_id", tenant.ID),
|
||||
zap.Error(err))
|
||||
break
|
||||
}
|
||||
|
||||
// Process each site
|
||||
for _, site := range sites {
|
||||
needsUpdate := false
|
||||
|
||||
// Check if created IP timestamp is expired
|
||||
if !site.CreatedFromIPTimestamp.IsZero() && site.CreatedFromIPTimestamp.Before(expirationDate) {
|
||||
site.CreatedFromIPAddress = ""
|
||||
site.CreatedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
// Check if modified IP timestamp is expired
|
||||
if !site.ModifiedFromIPTimestamp.IsZero() && site.ModifiedFromIPTimestamp.Before(expirationDate) {
|
||||
site.ModifiedFromIPAddress = ""
|
||||
site.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
if needsUpdate {
|
||||
if err := s.siteRepo.Update(ctx, site); err != nil {
|
||||
s.logger.Error("failed to update site IP fields",
|
||||
zap.String("site_id", site.ID.String()),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
totalCleaned++
|
||||
s.logger.Debug("cleared expired IP from site",
|
||||
zap.String("site_id", site.ID.String()))
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there are more pages
|
||||
if len(nextPageState) == 0 {
|
||||
break
|
||||
}
|
||||
pageState = nextPageState
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info("site IP cleanup completed",
|
||||
zap.Int("cleaned", totalCleaned))
|
||||
|
||||
return totalCleaned, nil
|
||||
}
|
||||
|
||||
// cleanupPageIPs cleans up expired IP addresses from Page entities
|
||||
func (s *CleanupService) cleanupPageIPs(ctx context.Context, expirationDate time.Time) (int, error) {
|
||||
s.logger.Info("cleaning up page IP addresses")
|
||||
|
||||
// Pages are partitioned by site_id, so we need to:
|
||||
// 1. Get all tenants
|
||||
// 2. For each tenant, get all sites
|
||||
// 3. For each site, get all pages
|
||||
// This is the most expensive operation due to Cassandra's data model
|
||||
|
||||
statuses := []domaintenant.Status{
|
||||
domaintenant.StatusActive,
|
||||
domaintenant.StatusInactive,
|
||||
domaintenant.StatusSuspended,
|
||||
}
|
||||
|
||||
totalCleaned := 0
|
||||
tenantBatchSize := 1000
|
||||
siteBatchSize := 100
|
||||
|
||||
for _, status := range statuses {
|
||||
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to list tenants for page cleanup",
|
||||
zap.String("status", string(status)),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// For each tenant, list their sites
|
||||
for _, tenant := range tenants {
|
||||
tenantUUID, err := gocql.ParseUUID(tenant.ID)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to parse tenant UUID for pages",
|
||||
zap.String("tenant_id", tenant.ID),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// List sites for this tenant
|
||||
var sitePageState []byte
|
||||
for {
|
||||
sites, nextSitePageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, sitePageState)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to list sites for page cleanup",
|
||||
zap.String("tenant_id", tenant.ID),
|
||||
zap.Error(err))
|
||||
break
|
||||
}
|
||||
|
||||
// For each site, get all pages
|
||||
for _, site := range sites {
|
||||
pages, err := s.pageRepo.GetBySiteID(ctx, site.ID)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get pages for site",
|
||||
zap.String("site_id", site.ID.String()),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
// Process each page
|
||||
for _, page := range pages {
|
||||
needsUpdate := false
|
||||
|
||||
// Check if created IP timestamp is expired
|
||||
if !page.CreatedFromIPTimestamp.IsZero() && page.CreatedFromIPTimestamp.Before(expirationDate) {
|
||||
page.CreatedFromIPAddress = ""
|
||||
page.CreatedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
// Check if modified IP timestamp is expired
|
||||
if !page.ModifiedFromIPTimestamp.IsZero() && page.ModifiedFromIPTimestamp.Before(expirationDate) {
|
||||
page.ModifiedFromIPAddress = ""
|
||||
page.ModifiedFromIPTimestamp = time.Time{} // Zero value
|
||||
needsUpdate = true
|
||||
}
|
||||
|
||||
if needsUpdate {
|
||||
if err := s.pageRepo.Update(ctx, page); err != nil {
|
||||
s.logger.Error("failed to update page IP fields",
|
||||
zap.String("page_id", page.PageID),
|
||||
zap.String("site_id", page.SiteID.String()),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
totalCleaned++
|
||||
s.logger.Debug("cleared expired IP from page",
|
||||
zap.String("page_id", page.PageID),
|
||||
zap.String("site_id", page.SiteID.String()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there are more site pages
|
||||
if len(nextSitePageState) == 0 {
|
||||
break
|
||||
}
|
||||
sitePageState = nextSitePageState
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info("page IP cleanup completed",
|
||||
zap.Int("cleaned", totalCleaned))
|
||||
|
||||
return totalCleaned, nil
|
||||
}
|
||||
|
||||
// ShouldCleanupIP checks if an IP address timestamp has expired
|
||||
func (s *CleanupService) ShouldCleanupIP(timestamp time.Time) bool {
|
||||
return s.ipEncryptor.IsExpired(timestamp)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue