monorepo/cloud/maplepress-backend/internal/usecase/page/sync.go

205 lines
6.2 KiB
Go

package page
import (
"context"
"fmt"
"time"
"github.com/gocql/gocql"
"go.uber.org/zap"
domainpage "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/page"
domainsite "codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/internal/domain/site"
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/search"
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/pkg/security/ipcrypt"
)
// SyncPagesUseCase handles page synchronization from WordPress
type SyncPagesUseCase struct {
pageRepo domainpage.Repository
siteRepo domainsite.Repository
searchClient *search.Client
ipEncryptor *ipcrypt.IPEncryptor
logger *zap.Logger
}
// ProvideSyncPagesUseCase creates a new SyncPagesUseCase
func ProvideSyncPagesUseCase(
pageRepo domainpage.Repository,
siteRepo domainsite.Repository,
searchClient *search.Client,
ipEncryptor *ipcrypt.IPEncryptor,
logger *zap.Logger,
) *SyncPagesUseCase {
return &SyncPagesUseCase{
pageRepo: pageRepo,
siteRepo: siteRepo,
searchClient: searchClient,
ipEncryptor: ipEncryptor,
logger: logger,
}
}
// SyncPageInput represents a single page to sync
type SyncPageInput struct {
PageID string `json:"page_id"`
Title string `json:"title"`
Content string `json:"content"`
Excerpt string `json:"excerpt"`
URL string `json:"url"`
Status string `json:"status"` // publish, draft, trash
PostType string `json:"post_type"` // page, post
Author string `json:"author"`
PublishedAt time.Time `json:"published_at"`
ModifiedAt time.Time `json:"modified_at"`
IPAddress string `json:"-"` // Plain IP address (will be encrypted before storage), never exposed in JSON
}
// SyncPagesInput is the input for syncing pages
type SyncPagesInput struct {
Pages []SyncPageInput `json:"pages"`
}
// SyncPagesOutput is the output after syncing pages
type SyncPagesOutput struct {
SyncedCount int `json:"synced_count"`
IndexedCount int `json:"indexed_count"`
FailedPages []string `json:"failed_pages,omitempty"`
Message string `json:"message"`
}
// SyncPages syncs a batch of pages for a site
func (uc *SyncPagesUseCase) SyncPages(ctx context.Context, tenantID, siteID gocql.UUID, input *SyncPagesInput) (*SyncPagesOutput, error) {
uc.logger.Info("syncing pages",
zap.String("tenant_id", tenantID.String()),
zap.String("site_id", siteID.String()),
zap.Int("page_count", len(input.Pages)))
// Get site to validate and check quotas
site, err := uc.siteRepo.GetByID(ctx, tenantID, siteID)
if err != nil {
uc.logger.Error("failed to get site", zap.Error(err))
return nil, domainsite.ErrSiteNotFound
}
// Verify site is verified (skip for test mode)
if site.RequiresVerification() && !site.IsVerified {
uc.logger.Warn("site not verified", zap.String("site_id", siteID.String()))
return nil, domainsite.ErrSiteNotVerified
}
// No quota limits - usage-based billing (anti-abuse via rate limiting only)
// Ensure search index exists
indexExists, err := uc.searchClient.IndexExists(siteID.String())
if err != nil {
uc.logger.Error("failed to check index existence", zap.Error(err))
return nil, fmt.Errorf("failed to check search index: %w", err)
}
if !indexExists {
uc.logger.Info("creating search index", zap.String("site_id", siteID.String()))
if err := uc.searchClient.CreateIndex(siteID.String()); err != nil {
uc.logger.Error("failed to create index", zap.Error(err))
return nil, fmt.Errorf("failed to create search index: %w", err)
}
}
// Process each page
syncedCount := 0
indexedCount := 0
var failedPages []string
var documentsToIndex []search.PageDocument
for _, pageInput := range input.Pages {
// Encrypt IP address (CWE-359: GDPR compliance)
encryptedIP, err := uc.ipEncryptor.Encrypt(pageInput.IPAddress)
if err != nil {
uc.logger.Error("failed to encrypt IP address",
zap.String("page_id", pageInput.PageID),
zap.Error(err))
failedPages = append(failedPages, pageInput.PageID)
continue
}
// Create page entity
page := domainpage.NewPage(
siteID,
site.TenantID,
pageInput.PageID,
pageInput.Title,
pageInput.Content,
pageInput.Excerpt,
pageInput.URL,
pageInput.Status,
pageInput.PostType,
pageInput.Author,
pageInput.PublishedAt,
pageInput.ModifiedAt,
encryptedIP,
)
// Upsert page to database
if err := uc.pageRepo.Upsert(ctx, page); err != nil {
uc.logger.Error("failed to upsert page",
zap.String("page_id", pageInput.PageID),
zap.Error(err))
failedPages = append(failedPages, pageInput.PageID)
continue
}
syncedCount++
// Only index published pages
if page.ShouldIndex() {
page.MarkIndexed()
// Prepare document for Meilisearch
doc := search.PageDocument{
ID: page.PageID,
SiteID: page.SiteID.String(),
TenantID: page.TenantID.String(),
Title: page.Title,
Content: page.Content,
Excerpt: page.Excerpt,
URL: page.URL,
Status: page.Status,
PostType: page.PostType,
Author: page.Author,
PublishedAt: page.PublishedAt.Unix(),
ModifiedAt: page.ModifiedAt.Unix(),
}
documentsToIndex = append(documentsToIndex, doc)
}
}
// Index documents in Meilisearch if any
if len(documentsToIndex) > 0 {
_, err := uc.searchClient.AddDocuments(siteID.String(), documentsToIndex)
if err != nil {
uc.logger.Error("failed to index documents", zap.Error(err))
return nil, fmt.Errorf("failed to index documents: %w", err)
}
indexedCount = len(documentsToIndex)
// Note: Usage tracking is handled by the service layer via UpdateSiteUsageUseCase
}
uc.logger.Info("pages synced successfully",
zap.String("site_id", siteID.String()),
zap.Int("synced", syncedCount),
zap.Int("indexed", indexedCount),
zap.Int("failed", len(failedPages)))
message := fmt.Sprintf("Successfully synced %d pages, indexed %d pages", syncedCount, indexedCount)
if len(failedPages) > 0 {
message += fmt.Sprintf(", failed %d pages", len(failedPages))
}
return &SyncPagesOutput{
SyncedCount: syncedCount,
IndexedCount: indexedCount,
FailedPages: failedPages,
Message: message,
}, nil
}