Initial commit: Open sourcing all of the Maple Open Technologies code.

This commit is contained in:
Bartlomiej Mika 2025-12-02 14:33:08 -05:00
commit 755d54a99d
2010 changed files with 448675 additions and 0 deletions

View file

@ -0,0 +1,408 @@
package ipcleanup
import (
"context"
"time"
"github.com/gocql/gocql"
"go.uber.org/zap"
domainpage "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/page"
domainsite "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/site"
domaintenant "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/tenant"
domainuser "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/user"
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/security/ipcrypt"
)
// CleanupService handles cleanup of expired IP addresses for GDPR compliance
// CWE-359: IP addresses must be deleted after 90 days (Option 2: Clear both IP and timestamp)
type CleanupService struct {
userRepo domainuser.Repository
tenantRepo domaintenant.Repository
siteRepo domainsite.Repository
pageRepo domainpage.Repository
ipEncryptor *ipcrypt.IPEncryptor
logger *zap.Logger
}
// ProvideCleanupService creates a new CleanupService
func ProvideCleanupService(
userRepo domainuser.Repository,
tenantRepo domaintenant.Repository,
siteRepo domainsite.Repository,
pageRepo domainpage.Repository,
ipEncryptor *ipcrypt.IPEncryptor,
logger *zap.Logger,
) *CleanupService {
return &CleanupService{
userRepo: userRepo,
tenantRepo: tenantRepo,
siteRepo: siteRepo,
pageRepo: pageRepo,
ipEncryptor: ipEncryptor,
logger: logger.Named("ip-cleanup-service"),
}
}
// CleanupExpiredIPs removes IP addresses older than 90 days for GDPR compliance
// Option 2: Clears BOTH IP address AND timestamp (complete removal)
// This method should be called by a scheduled job
func (s *CleanupService) CleanupExpiredIPs(ctx context.Context) error {
s.logger.Info("starting IP address cleanup for GDPR compliance (Option 2: Clear both IP and timestamp)")
// Calculate the date 90 days ago
now := time.Now()
expirationDate := now.AddDate(0, 0, -90)
s.logger.Info("cleaning up IP addresses older than 90 days",
zap.Time("expiration_date", expirationDate),
zap.Int("retention_days", 90))
var totalCleaned int
var errors []error
// Clean up each entity type
usersCleaned, err := s.cleanupUserIPs(ctx, expirationDate)
if err != nil {
s.logger.Error("failed to cleanup user IPs", zap.Error(err))
errors = append(errors, err)
}
totalCleaned += usersCleaned
tenantsCleaned, err := s.cleanupTenantIPs(ctx, expirationDate)
if err != nil {
s.logger.Error("failed to cleanup tenant IPs", zap.Error(err))
errors = append(errors, err)
}
totalCleaned += tenantsCleaned
sitesCleaned, err := s.cleanupSiteIPs(ctx, expirationDate)
if err != nil {
s.logger.Error("failed to cleanup site IPs", zap.Error(err))
errors = append(errors, err)
}
totalCleaned += sitesCleaned
pagesCleaned, err := s.cleanupPageIPs(ctx, expirationDate)
if err != nil {
s.logger.Error("failed to cleanup page IPs", zap.Error(err))
errors = append(errors, err)
}
totalCleaned += pagesCleaned
if len(errors) > 0 {
s.logger.Warn("IP cleanup completed with errors",
zap.Int("total_cleaned", totalCleaned),
zap.Int("error_count", len(errors)))
return errors[0] // Return first error
}
s.logger.Info("IP cleanup completed successfully",
zap.Int("total_records_cleaned", totalCleaned),
zap.Int("users", usersCleaned),
zap.Int("tenants", tenantsCleaned),
zap.Int("sites", sitesCleaned),
zap.Int("pages", pagesCleaned))
return nil
}
// cleanupUserIPs cleans up expired IP addresses from User entities
func (s *CleanupService) cleanupUserIPs(ctx context.Context, expirationDate time.Time) (int, error) {
s.logger.Info("cleaning up user IP addresses")
// Note: This implementation uses ListByDate to query users in batches
// For large datasets, consider implementing a background job that processes smaller chunks
// Calculate date range: from beginning of time to 90 days ago
startDate := "1970-01-01"
endDate := expirationDate.Format("2006-01-02")
totalCleaned := 0
// Note: Users are tenant-scoped, so we would need to iterate through tenants
// For now, we'll log a warning about this limitation
s.logger.Warn("user IP cleanup requires tenant iteration - this is a simplified implementation",
zap.String("start_date", startDate),
zap.String("end_date", endDate))
// TODO: Implement tenant iteration
// Example approach:
// 1. Get list of all tenants
// 2. For each tenant, query users by date
// 3. Process each user
s.logger.Info("user IP cleanup skipped (requires tenant iteration support)",
zap.Int("cleaned", totalCleaned))
return totalCleaned, nil
}
// cleanupTenantIPs cleans up expired IP addresses from Tenant entities
func (s *CleanupService) cleanupTenantIPs(ctx context.Context, expirationDate time.Time) (int, error) {
s.logger.Info("cleaning up tenant IP addresses")
// List all active tenants (we'll check all statuses to be thorough)
statuses := []domaintenant.Status{
domaintenant.StatusActive,
domaintenant.StatusInactive,
domaintenant.StatusSuspended,
}
totalCleaned := 0
batchSize := 1000 // Process up to 1000 tenants per status
for _, status := range statuses {
tenants, err := s.tenantRepo.ListByStatus(ctx, status, batchSize)
if err != nil {
s.logger.Error("failed to list tenants by status",
zap.String("status", string(status)),
zap.Error(err))
continue
}
s.logger.Debug("processing tenants for IP cleanup",
zap.String("status", string(status)),
zap.Int("count", len(tenants)))
for _, tenant := range tenants {
needsUpdate := false
// Check if created IP timestamp is expired
if !tenant.CreatedFromIPTimestamp.IsZero() && tenant.CreatedFromIPTimestamp.Before(expirationDate) {
tenant.CreatedFromIPAddress = ""
tenant.CreatedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
// Check if modified IP timestamp is expired
if !tenant.ModifiedFromIPTimestamp.IsZero() && tenant.ModifiedFromIPTimestamp.Before(expirationDate) {
tenant.ModifiedFromIPAddress = ""
tenant.ModifiedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
if needsUpdate {
if err := s.tenantRepo.Update(ctx, tenant); err != nil {
s.logger.Error("failed to update tenant IP fields",
zap.String("tenant_id", tenant.ID),
zap.Error(err))
continue
}
totalCleaned++
s.logger.Debug("cleared expired IP from tenant",
zap.String("tenant_id", tenant.ID))
}
}
}
s.logger.Info("tenant IP cleanup completed",
zap.Int("cleaned", totalCleaned))
return totalCleaned, nil
}
// cleanupSiteIPs cleans up expired IP addresses from Site entities
func (s *CleanupService) cleanupSiteIPs(ctx context.Context, expirationDate time.Time) (int, error) {
s.logger.Info("cleaning up site IP addresses")
// First, get all tenants so we can iterate through their sites
statuses := []domaintenant.Status{
domaintenant.StatusActive,
domaintenant.StatusInactive,
domaintenant.StatusSuspended,
}
totalCleaned := 0
tenantBatchSize := 1000
siteBatchSize := 100
for _, status := range statuses {
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
if err != nil {
s.logger.Error("failed to list tenants for site cleanup",
zap.String("status", string(status)),
zap.Error(err))
continue
}
// For each tenant, list their sites and clean up expired IPs
for _, tenant := range tenants {
tenantUUID, err := gocql.ParseUUID(tenant.ID)
if err != nil {
s.logger.Error("failed to parse tenant UUID",
zap.String("tenant_id", tenant.ID),
zap.Error(err))
continue
}
// List sites for this tenant (using pagination)
var pageState []byte
for {
sites, nextPageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, pageState)
if err != nil {
s.logger.Error("failed to list sites for tenant",
zap.String("tenant_id", tenant.ID),
zap.Error(err))
break
}
// Process each site
for _, site := range sites {
needsUpdate := false
// Check if created IP timestamp is expired
if !site.CreatedFromIPTimestamp.IsZero() && site.CreatedFromIPTimestamp.Before(expirationDate) {
site.CreatedFromIPAddress = ""
site.CreatedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
// Check if modified IP timestamp is expired
if !site.ModifiedFromIPTimestamp.IsZero() && site.ModifiedFromIPTimestamp.Before(expirationDate) {
site.ModifiedFromIPAddress = ""
site.ModifiedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
if needsUpdate {
if err := s.siteRepo.Update(ctx, site); err != nil {
s.logger.Error("failed to update site IP fields",
zap.String("site_id", site.ID.String()),
zap.Error(err))
continue
}
totalCleaned++
s.logger.Debug("cleared expired IP from site",
zap.String("site_id", site.ID.String()))
}
}
// Check if there are more pages
if len(nextPageState) == 0 {
break
}
pageState = nextPageState
}
}
}
s.logger.Info("site IP cleanup completed",
zap.Int("cleaned", totalCleaned))
return totalCleaned, nil
}
// cleanupPageIPs cleans up expired IP addresses from Page entities
func (s *CleanupService) cleanupPageIPs(ctx context.Context, expirationDate time.Time) (int, error) {
s.logger.Info("cleaning up page IP addresses")
// Pages are partitioned by site_id, so we need to:
// 1. Get all tenants
// 2. For each tenant, get all sites
// 3. For each site, get all pages
// This is the most expensive operation due to Cassandra's data model
statuses := []domaintenant.Status{
domaintenant.StatusActive,
domaintenant.StatusInactive,
domaintenant.StatusSuspended,
}
totalCleaned := 0
tenantBatchSize := 1000
siteBatchSize := 100
for _, status := range statuses {
tenants, err := s.tenantRepo.ListByStatus(ctx, status, tenantBatchSize)
if err != nil {
s.logger.Error("failed to list tenants for page cleanup",
zap.String("status", string(status)),
zap.Error(err))
continue
}
// For each tenant, list their sites
for _, tenant := range tenants {
tenantUUID, err := gocql.ParseUUID(tenant.ID)
if err != nil {
s.logger.Error("failed to parse tenant UUID for pages",
zap.String("tenant_id", tenant.ID),
zap.Error(err))
continue
}
// List sites for this tenant
var sitePageState []byte
for {
sites, nextSitePageState, err := s.siteRepo.ListByTenant(ctx, tenantUUID, siteBatchSize, sitePageState)
if err != nil {
s.logger.Error("failed to list sites for page cleanup",
zap.String("tenant_id", tenant.ID),
zap.Error(err))
break
}
// For each site, get all pages
for _, site := range sites {
pages, err := s.pageRepo.GetBySiteID(ctx, site.ID)
if err != nil {
s.logger.Error("failed to get pages for site",
zap.String("site_id", site.ID.String()),
zap.Error(err))
continue
}
// Process each page
for _, page := range pages {
needsUpdate := false
// Check if created IP timestamp is expired
if !page.CreatedFromIPTimestamp.IsZero() && page.CreatedFromIPTimestamp.Before(expirationDate) {
page.CreatedFromIPAddress = ""
page.CreatedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
// Check if modified IP timestamp is expired
if !page.ModifiedFromIPTimestamp.IsZero() && page.ModifiedFromIPTimestamp.Before(expirationDate) {
page.ModifiedFromIPAddress = ""
page.ModifiedFromIPTimestamp = time.Time{} // Zero value
needsUpdate = true
}
if needsUpdate {
if err := s.pageRepo.Update(ctx, page); err != nil {
s.logger.Error("failed to update page IP fields",
zap.String("page_id", page.PageID),
zap.String("site_id", page.SiteID.String()),
zap.Error(err))
continue
}
totalCleaned++
s.logger.Debug("cleared expired IP from page",
zap.String("page_id", page.PageID),
zap.String("site_id", page.SiteID.String()))
}
}
}
// Check if there are more site pages
if len(nextSitePageState) == 0 {
break
}
sitePageState = nextSitePageState
}
}
}
s.logger.Info("page IP cleanup completed",
zap.Int("cleaned", totalCleaned))
return totalCleaned, nil
}
// ShouldCleanupIP checks if an IP address timestamp has expired
func (s *CleanupService) ShouldCleanupIP(timestamp time.Time) bool {
return s.ipEncryptor.IsExpired(timestamp)
}