Initial commit: Open sourcing all of the Maple Open Technologies code.

This commit is contained in:
Bartlomiej Mika 2025-12-02 14:33:08 -05:00
commit 755d54a99d
2010 changed files with 448675 additions and 0 deletions

View file

@ -0,0 +1,402 @@
# Scheduler with Leader Election
The scheduler has been integrated with leader election to ensure that **scheduled tasks only run on the leader instance**.
## Overview
When multiple instances of the backend are running (e.g., behind a load balancer), you don't want scheduled tasks running on every instance. This would cause:
- ❌ Duplicate task executions
- ❌ Database conflicts
- ❌ Wasted resources
- ❌ Race conditions
With leader election integration:
- ✅ Tasks only execute on the **leader instance**
- ✅ Automatic failover if leader crashes
- ✅ No duplicate executions
- ✅ Safe for multi-instance deployments
## How It Works
```
┌─────────────────────────────────────────────────────────┐
│ Load Balancer │
└─────────────────┬───────────────┬──────────────────────┘
│ │
┌─────────▼────┐ ┌──────▼──────┐ ┌──────────────┐
│ Instance 1 │ │ Instance 2 │ │ Instance 3 │
│ (LEADER) 👑 │ │ (Follower) │ │ (Follower) │
│ │ │ │ │ │
│ Scheduler ✅ │ │ Scheduler ⏸️ │ │ Scheduler ⏸️ │
│ Runs tasks │ │ Skips tasks │ │ Skips tasks │
└──────────────┘ └─────────────┘ └──────────────┘
```
### Execution Flow
1. **All instances** have the scheduler running with registered tasks
2. **All instances** have cron triggers firing at scheduled times
3. **Only the leader** actually executes the task logic
4. **Followers** skip execution (logged at DEBUG level)
Example logs:
**Leader Instance:**
```
2025-01-12T10:00:00.000Z INFO 👑 Leader executing scheduled task task=CleanupOldRecords instance_id=instance-1
2025-01-12T10:00:05.123Z INFO ✅ Task completed successfully task=CleanupOldRecords
```
**Follower Instances:**
```
2025-01-12T10:00:00.000Z DEBUG Skipping task execution - not the leader task=CleanupOldRecords instance_id=instance-2
```
## Usage
### 1. Create a Scheduled Task
```go
package tasks
import (
"context"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/internal/interface/scheduler"
)
type CleanupTask struct {
logger *zap.Logger
// ... other dependencies
}
func NewCleanupTask(logger *zap.Logger) scheduler.Task {
return &CleanupTask{
logger: logger.Named("CleanupTask"),
}
}
func (t *CleanupTask) Name() string {
return "CleanupOldRecords"
}
func (t *CleanupTask) Schedule() string {
// Cron format: every day at 2 AM
return "0 2 * * *"
}
func (t *CleanupTask) Execute(ctx context.Context) error {
t.logger.Info("Starting cleanup of old records")
// Your task logic here
// This will ONLY run on the leader instance
t.logger.Info("Cleanup completed")
return nil
}
```
### 2. Register Tasks with the Scheduler
The scheduler is already wired through Google Wire. To register tasks, you would typically do this in your application startup:
```go
// In app/app.go or wherever you initialize your app
func (app *Application) Start() error {
// ... existing startup code ...
// Register scheduled tasks
if app.scheduler != nil {
// Create and register tasks
cleanupTask := tasks.NewCleanupTask(app.logger)
if err := app.scheduler.RegisterTask(cleanupTask); err != nil {
return fmt.Errorf("failed to register cleanup task: %w", err)
}
metricsTask := tasks.NewMetricsAggregationTask(app.logger)
if err := app.scheduler.RegisterTask(metricsTask); err != nil {
return fmt.Errorf("failed to register metrics task: %w", err)
}
// Start the scheduler
if err := app.scheduler.Start(); err != nil {
return fmt.Errorf("failed to start scheduler: %w", err)
}
}
// ... rest of startup code ...
}
```
### 3. Graceful Shutdown
```go
func (app *Application) Stop() error {
// ... other shutdown code ...
if app.scheduler != nil {
if err := app.scheduler.Stop(); err != nil {
app.logger.Error("Failed to stop scheduler", zap.Error(err))
}
}
// ... rest of shutdown code ...
}
```
## Cron Schedule Format
The scheduler uses standard cron format:
```
┌───────────── minute (0 - 59)
│ ┌───────────── hour (0 - 23)
│ │ ┌───────────── day of month (1 - 31)
│ │ │ ┌───────────── month (1 - 12)
│ │ │ │ ┌───────────── day of week (0 - 6) (Sunday to Saturday)
│ │ │ │ │
│ │ │ │ │
* * * * *
```
### Common Examples
```go
"* * * * *" // Every minute
"0 * * * *" // Every hour (on the hour)
"0 0 * * *" // Every day at midnight
"0 2 * * *" // Every day at 2:00 AM
"0 */6 * * *" // Every 6 hours
"0 0 * * 0" // Every Sunday at midnight
"0 0 1 * *" // First day of every month at midnight
"0 9 * * 1-5" // Weekdays at 9:00 AM
"*/5 * * * *" // Every 5 minutes
"0 0,12 * * *" // Twice a day (midnight and noon)
```
## Example Tasks
### Daily Cleanup Task
```go
type DailyCleanupTask struct {
logger *zap.Logger
repo *Repository
}
func (t *DailyCleanupTask) Name() string {
return "DailyCleanup"
}
func (t *DailyCleanupTask) Schedule() string {
return "0 3 * * *" // 3 AM every day
}
func (t *DailyCleanupTask) Execute(ctx context.Context) error {
t.logger.Info("Running daily cleanup")
// Delete old records
cutoffDate := time.Now().AddDate(0, 0, -30) // 30 days ago
if err := t.repo.DeleteOlderThan(ctx, cutoffDate); err != nil {
return fmt.Errorf("cleanup failed: %w", err)
}
return nil
}
```
### Hourly Metrics Task
```go
type MetricsAggregationTask struct {
logger *zap.Logger
metrics *MetricsService
}
func (t *MetricsAggregationTask) Name() string {
return "HourlyMetrics"
}
func (t *MetricsAggregationTask) Schedule() string {
return "0 * * * *" // Every hour
}
func (t *MetricsAggregationTask) Execute(ctx context.Context) error {
t.logger.Info("Aggregating hourly metrics")
if err := t.metrics.AggregateAndSend(ctx); err != nil {
return fmt.Errorf("metrics aggregation failed: %w", err)
}
return nil
}
```
### Cache Warming Task
```go
type CacheWarmingTask struct {
logger *zap.Logger
cache *CacheService
}
func (t *CacheWarmingTask) Name() string {
return "CacheWarming"
}
func (t *CacheWarmingTask) Schedule() string {
return "*/30 * * * *" // Every 30 minutes
}
func (t *CacheWarmingTask) Execute(ctx context.Context) error {
t.logger.Info("Warming application cache")
if err := t.cache.WarmFrequentlyAccessedData(ctx); err != nil {
return fmt.Errorf("cache warming failed: %w", err)
}
return nil
}
```
## Testing
### Local Testing with Multiple Instances
```bash
# Terminal 1 (will become leader)
LEADER_ELECTION_INSTANCE_ID=instance-1 ./maplefile-backend
# Terminal 2 (follower)
LEADER_ELECTION_INSTANCE_ID=instance-2 ./maplefile-backend
# Terminal 3 (follower)
LEADER_ELECTION_INSTANCE_ID=instance-3 ./maplefile-backend
```
Watch the logs:
- **Only instance-1** (leader) will execute tasks
- **instance-2 and instance-3** will skip task execution
Kill instance-1 and watch:
- Either instance-2 or instance-3 becomes the new leader
- The new leader starts executing tasks
- The remaining follower continues to skip
### Testing Task Execution
Create a test task that runs every minute:
```go
type TestTask struct {
logger *zap.Logger
}
func (t *TestTask) Name() string {
return "TestTask"
}
func (t *TestTask) Schedule() string {
return "* * * * *" // Every minute
}
func (t *TestTask) Execute(ctx context.Context) error {
t.logger.Info("TEST TASK EXECUTED - I am the leader!")
return nil
}
```
This makes it easy to see which instance is executing tasks.
## Configuration
### Enable/Disable Leader Election
Leader election for the scheduler is controlled by the `LEADER_ELECTION_ENABLED` environment variable:
```bash
# With leader election (default)
LEADER_ELECTION_ENABLED=true
# Without leader election (all instances run tasks - NOT RECOMMENDED for production)
LEADER_ELECTION_ENABLED=false
```
### Behavior Matrix
| Leader Election | Instances | Task Execution |
|----------------|-----------|----------------|
| Enabled | Single | Tasks run on that instance ✅ |
| Enabled | Multiple | Tasks run ONLY on leader ✅ |
| Disabled | Single | Tasks run on that instance ✅ |
| Disabled | Multiple | Tasks run on ALL instances ⚠️ |
## Best Practices
1. **Always enable leader election in production** when running multiple instances
2. **Keep tasks idempotent** - if a task is accidentally executed twice, it shouldn't cause problems
3. **Handle task failures gracefully** - the scheduler will log errors but continue running
4. **Don't run long tasks** - tasks block the scheduler thread
5. **Use context** - respect context cancellation for graceful shutdown
6. **Log appropriately** - use structured logging to track task execution
7. **Test failover** - verify new leader takes over task execution
## Monitoring
### Check Scheduler Status
You can check which instance is executing tasks by looking at the logs:
```bash
# Leader logs
grep "Leader executing" logs/app.log
# Follower logs (DEBUG level)
grep "Skipping task execution" logs/app.log
```
### Health Check
You could add a health check endpoint to expose scheduler status:
```go
func (h *HealthHandler) SchedulerHealth(w http.ResponseWriter, r *http.Request) {
tasks := h.scheduler.GetRegisteredTasks()
response := map[string]interface{}{
"registered_tasks": tasks,
"leader_election_enabled": h.config.LeaderElection.Enabled,
"is_leader": h.leaderElection.IsLeader(),
"will_execute_tasks": !h.config.LeaderElection.Enabled || h.leaderElection.IsLeader(),
}
json.NewEncoder(w).Encode(response)
}
```
## Troubleshooting
### Tasks not running on any instance
1. Check leader election is working: `grep "Became the leader" logs/app.log`
2. Check tasks are registered: Look for "Registering scheduled task" in logs
3. Check scheduler started: Look for "Scheduler started successfully"
### Tasks running on multiple instances
1. Check `LEADER_ELECTION_ENABLED=true` in all instances
2. Check all instances connect to the same Redis
3. Check network connectivity between instances and Redis
### Tasks not running after leader failure
1. Check `LEADER_ELECTION_LOCK_TTL` - should be < 30s for fast failover
2. Check `LEADER_ELECTION_RETRY_INTERVAL` - followers should retry frequently
3. Check new leader logs for "Became the leader"
4. Verify new leader executes tasks after election
## Related Documentation
- [Leader Election Package](../../../pkg/leaderelection/README.md)
- [Leader Election Examples](../../../pkg/leaderelection/EXAMPLE.md)

View file

@ -0,0 +1,179 @@
package scheduler
import (
"context"
"sync"
"github.com/robfig/cron/v3"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/config"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/leaderelection"
)
// Task represents a scheduled task
type Task interface {
Name() string
Schedule() string
Execute(ctx context.Context) error
}
// Scheduler manages all scheduled tasks
// Tasks are only executed if this instance is the leader (when leader election is enabled)
type Scheduler struct {
config *config.Config
logger *zap.Logger
cron *cron.Cron
tasks []Task
mu sync.RWMutex
ctx context.Context
cancel context.CancelFunc
leaderElection leaderelection.LeaderElection // Leader election instance (can be nil if disabled)
}
// ProvideScheduler creates a new Scheduler instance for Wire DI
func ProvideScheduler(
cfg *config.Config,
logger *zap.Logger,
leaderElection leaderelection.LeaderElection,
) *Scheduler {
ctx, cancel := context.WithCancel(context.Background())
logger = logger.Named("Scheduler")
return &Scheduler{
config: cfg,
logger: logger,
cron: cron.New(),
tasks: make([]Task, 0),
ctx: ctx,
cancel: cancel,
leaderElection: leaderElection,
}
}
// RegisterTask registers a task to be scheduled
func (s *Scheduler) RegisterTask(task Task) error {
s.mu.Lock()
defer s.mu.Unlock()
s.logger.Info("Registering scheduled task",
zap.String("task", task.Name()),
zap.String("schedule", task.Schedule()))
// Add task to scheduler
_, err := s.cron.AddFunc(task.Schedule(), func() {
s.executeTask(task)
})
if err != nil {
s.logger.Error("Failed to register task",
zap.String("task", task.Name()),
zap.Error(err))
return err
}
s.tasks = append(s.tasks, task)
s.logger.Info("✅ Task registered successfully",
zap.String("task", task.Name()))
return nil
}
// executeTask executes a task with error handling and logging
// Tasks are only executed if this instance is the leader (when leader election is enabled)
func (s *Scheduler) executeTask(task Task) {
// Check if leader election is enabled
if s.config.LeaderElection.Enabled && s.leaderElection != nil {
// Only execute if this instance is the leader
if !s.leaderElection.IsLeader() {
s.logger.Debug("Skipping task execution - not the leader",
zap.String("task", task.Name()),
zap.String("instance_id", s.leaderElection.GetInstanceID()))
return
}
// Log that leader is executing the task
s.logger.Info("👑 Leader executing scheduled task",
zap.String("task", task.Name()),
zap.String("instance_id", s.leaderElection.GetInstanceID()))
} else {
// Leader election disabled, execute normally
s.logger.Info("Executing scheduled task",
zap.String("task", task.Name()))
}
// Create a context for this execution
ctx := s.ctx
// Execute the task
if err := task.Execute(ctx); err != nil {
s.logger.Error("Task execution failed",
zap.String("task", task.Name()),
zap.Error(err))
return
}
s.logger.Info("✅ Task completed successfully",
zap.String("task", task.Name()))
}
// Start starts the scheduler
func (s *Scheduler) Start() error {
s.mu.RLock()
taskCount := len(s.tasks)
s.mu.RUnlock()
// Log leader election status
if s.config.LeaderElection.Enabled && s.leaderElection != nil {
s.logger.Info("🕐 Starting scheduler with leader election",
zap.Int("registered_tasks", taskCount),
zap.Bool("leader_election_enabled", true),
zap.String("instance_id", s.leaderElection.GetInstanceID()))
s.logger.Info(" Tasks will ONLY execute on the leader instance")
} else {
s.logger.Info("🕐 Starting scheduler without leader election",
zap.Int("registered_tasks", taskCount),
zap.Bool("leader_election_enabled", false))
s.logger.Warn("⚠️ Leader election is disabled - tasks will run on ALL instances")
}
if taskCount == 0 {
s.logger.Warn("No tasks registered, scheduler will run but do nothing")
}
s.cron.Start()
s.logger.Info("✅ Scheduler started successfully")
return nil
}
// Stop stops the scheduler gracefully
func (s *Scheduler) Stop() error {
s.logger.Info("Stopping scheduler...")
// Cancel all running tasks
s.cancel()
// Stop the cron scheduler
ctx := s.cron.Stop()
<-ctx.Done()
s.logger.Info("✅ Scheduler stopped successfully")
return nil
}
// GetRegisteredTasks returns a list of registered task names
func (s *Scheduler) GetRegisteredTasks() []string {
s.mu.RLock()
defer s.mu.RUnlock()
taskNames := make([]string, len(s.tasks))
for i, task := range s.tasks {
taskNames[i] = task.Name()
}
return taskNames
}

View file

@ -0,0 +1,65 @@
package tasks
import (
"context"
"time"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/config"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/internal/service/ipanonymization"
)
// IPAnonymizationTask implements scheduler.Task for IP address anonymization
type IPAnonymizationTask struct {
service ipanonymization.AnonymizeOldIPsService
config *config.Config
logger *zap.Logger
}
// ProvideIPAnonymizationTask creates a new IP anonymization task for Wire DI
func ProvideIPAnonymizationTask(
service ipanonymization.AnonymizeOldIPsService,
cfg *config.Config,
logger *zap.Logger,
) *IPAnonymizationTask {
return &IPAnonymizationTask{
service: service,
config: cfg,
logger: logger.Named("IPAnonymizationTask"),
}
}
// Name returns the task name
func (t *IPAnonymizationTask) Name() string {
return "IP Anonymization"
}
// Schedule returns the cron schedule for this task
func (t *IPAnonymizationTask) Schedule() string {
return t.config.Security.IPAnonymizationSchedule
}
// Execute runs the IP anonymization process
func (t *IPAnonymizationTask) Execute(ctx context.Context) error {
if !t.config.Security.IPAnonymizationEnabled {
t.logger.Debug("IP anonymization is disabled")
return nil
}
startTime := time.Now()
t.logger.Info("Starting IP anonymization task")
// Run the anonymization process via the service
if err := t.service.Execute(ctx); err != nil {
t.logger.Error("IP anonymization task failed",
zap.Error(err),
zap.Duration("duration", time.Since(startTime)))
return err
}
t.logger.Info("IP anonymization task completed successfully",
zap.Duration("duration", time.Since(startTime)))
return nil
}