monorepo/cloud/maplefile-backend/pkg/leaderelection/EXAMPLE.md

10 KiB

Leader Election Integration Example

Quick Integration into MapleFile Backend

Step 1: Add to Wire Providers (app/wire.go)

// In app/wire.go, add to wire.Build():

wire.Build(
    // ... existing providers ...

    // Leader Election
    leaderelection.ProvideLeaderElection,

    // ... rest of providers ...
)

Step 2: Update Application Struct (app/app.go)

import (
    "codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/leaderelection"
)

type Application struct {
    config          *config.Config
    httpServer      *http.WireServer
    logger          *zap.Logger
    migrator        *cassandradb.Migrator
    leaderElection  leaderelection.LeaderElection  // ADD THIS
}

func ProvideApplication(
    cfg *config.Config,
    httpServer *http.WireServer,
    logger *zap.Logger,
    migrator *cassandradb.Migrator,
    leaderElection leaderelection.LeaderElection,  // ADD THIS
) *Application {
    return &Application{
        config:         cfg,
        httpServer:     httpServer,
        logger:         logger,
        migrator:       migrator,
        leaderElection: leaderElection,  // ADD THIS
    }
}

Step 3: Start Leader Election in Application (app/app.go)

func (app *Application) Start() error {
    app.logger.Info("🚀 MapleFile Backend Starting (Wire DI)",
        zap.String("version", app.config.App.Version),
        zap.String("environment", app.config.App.Environment),
        zap.String("di_framework", "Google Wire"))

    // Start leader election if enabled
    if app.config.LeaderElection.Enabled {
        app.logger.Info("Starting leader election")

        // Register callbacks
        app.setupLeaderCallbacks()

        // Start election in background
        go func() {
            ctx := context.Background()
            if err := app.leaderElection.Start(ctx); err != nil {
                app.logger.Error("Leader election failed", zap.Error(err))
            }
        }()

        // Give it a moment to complete first election
        time.Sleep(500 * time.Millisecond)

        if app.leaderElection.IsLeader() {
            app.logger.Info("👑 This instance is the LEADER",
                zap.String("instance_id", app.leaderElection.GetInstanceID()))
        } else {
            app.logger.Info("👥 This instance is a FOLLOWER",
                zap.String("instance_id", app.leaderElection.GetInstanceID()))
        }
    }

    // Run database migrations (only leader should do this)
    if app.config.LeaderElection.Enabled {
        if app.leaderElection.IsLeader() {
            app.logger.Info("Running database migrations as leader...")
            if err := app.migrator.Up(); err != nil {
                app.logger.Error("Failed to run database migrations", zap.Error(err))
                return fmt.Errorf("migration failed: %w", err)
            }
            app.logger.Info("✅ Database migrations completed successfully")
        } else {
            app.logger.Info("Skipping migrations - not the leader")
        }
    } else {
        // If leader election disabled, always run migrations
        app.logger.Info("Running database migrations...")
        if err := app.migrator.Up(); err != nil {
            app.logger.Error("Failed to run database migrations", zap.Error(err))
            return fmt.Errorf("migration failed: %w", err)
        }
        app.logger.Info("✅ Database migrations completed successfully")
    }

    // Start HTTP server in goroutine
    errChan := make(chan error, 1)
    go func() {
        if err := app.httpServer.Start(); err != nil {
            errChan <- err
        }
    }()

    // Wait for interrupt signal or server error
    quit := make(chan os.Signal, 1)
    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)

    select {
    case err := <-errChan:
        app.logger.Error("HTTP server failed", zap.Error(err))
        return fmt.Errorf("server startup failed: %w", err)
    case sig := <-quit:
        app.logger.Info("Received shutdown signal", zap.String("signal", sig.String()))
    }

    app.logger.Info("👋 MapleFile Backend Shutting Down")

    // Stop leader election
    if app.config.LeaderElection.Enabled {
        if err := app.leaderElection.Stop(); err != nil {
            app.logger.Error("Failed to stop leader election", zap.Error(err))
        }
    }

    // Graceful shutdown with timeout
    ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    defer cancel()

    if err := app.httpServer.Shutdown(ctx); err != nil {
        app.logger.Error("Server shutdown error", zap.Error(err))
        return fmt.Errorf("server shutdown failed: %w", err)
    }

    app.logger.Info("✅ MapleFile Backend Stopped Successfully")
    return nil
}

// setupLeaderCallbacks configures callbacks for leader election events
func (app *Application) setupLeaderCallbacks() {
    app.leaderElection.OnBecomeLeader(func() {
        app.logger.Info("🎉 BECAME LEADER - Starting leader-only tasks",
            zap.String("instance_id", app.leaderElection.GetInstanceID()))

        // Start leader-only background tasks here
        // For example:
        // - Scheduled cleanup jobs
        // - Metrics aggregation
        // - Cache warming
        // - Periodic health checks
    })

    app.leaderElection.OnLoseLeadership(func() {
        app.logger.Warn("😢 LOST LEADERSHIP - Stopping leader-only tasks",
            zap.String("instance_id", app.leaderElection.GetInstanceID()))

        // Stop leader-only tasks here
    })
}

Step 4: Environment Variables (.env)

Add to your .env file:

# Leader Election Configuration
LEADER_ELECTION_ENABLED=true
LEADER_ELECTION_LOCK_TTL=10s
LEADER_ELECTION_HEARTBEAT_INTERVAL=3s
LEADER_ELECTION_RETRY_INTERVAL=2s
LEADER_ELECTION_INSTANCE_ID=  # Leave empty for auto-generation
LEADER_ELECTION_HOSTNAME=      # Leave empty for auto-detection

Step 5: Update .env.sample

# Leader Election
LEADER_ELECTION_ENABLED=true
LEADER_ELECTION_LOCK_TTL=10s
LEADER_ELECTION_HEARTBEAT_INTERVAL=3s
LEADER_ELECTION_RETRY_INTERVAL=2s
LEADER_ELECTION_INSTANCE_ID=
LEADER_ELECTION_HOSTNAME=

Step 6: Test Multiple Instances

Terminal 1

LEADER_ELECTION_INSTANCE_ID=instance-1 ./maplefile-backend
# Output: 👑 This instance is the LEADER

Terminal 2

LEADER_ELECTION_INSTANCE_ID=instance-2 ./maplefile-backend
# Output: 👥 This instance is a FOLLOWER

Terminal 3

LEADER_ELECTION_INSTANCE_ID=instance-3 ./maplefile-backend
# Output: 👥 This instance is a FOLLOWER

Test Failover

Stop Terminal 1 (kill the leader):

# Watch Terminal 2 or 3 logs
# One will show: 🎉 BECAME LEADER

Optional: Add Health Check Endpoint

Add to your HTTP handlers to expose leader election status:

// In internal/interface/http/server.go

func (s *Server) leaderElectionHealthHandler(w http.ResponseWriter, r *http.Request) {
    if s.leaderElection == nil {
        http.Error(w, "Leader election not enabled", http.StatusNotImplemented)
        return
    }

    info, err := s.leaderElection.GetLeaderInfo()
    if err != nil {
        s.logger.Error("Failed to get leader info", zap.Error(err))
        http.Error(w, "Failed to get leader info", http.StatusInternalServerError)
        return
    }

    response := map[string]interface{}{
        "is_leader":   s.leaderElection.IsLeader(),
        "instance_id": s.leaderElection.GetInstanceID(),
        "leader_info": info,
    }

    w.Header().Set("Content-Type", "application/json")
    json.NewEncoder(w).Encode(response)
}

// Register in registerRoutes():
s.mux.HandleFunc("GET /api/v1/leader-status", s.leaderElectionHealthHandler)

Test the endpoint:

curl http://localhost:8000/api/v1/leader-status

# Response:
{
  "is_leader": true,
  "instance_id": "instance-1",
  "leader_info": {
    "instance_id": "instance-1",
    "hostname": "macbook-pro.local",
    "started_at": "2025-01-12T10:30:00Z",
    "last_heartbeat": "2025-01-12T10:35:23Z"
  }
}

Production Deployment

Docker Compose

When deploying with docker-compose, ensure each instance has a unique ID:

version: '3.8'
services:
  backend-1:
    image: maplefile-backend:latest
    environment:
      - LEADER_ELECTION_ENABLED=true
      - LEADER_ELECTION_INSTANCE_ID=backend-1
    # ... other config

  backend-2:
    image: maplefile-backend:latest
    environment:
      - LEADER_ELECTION_ENABLED=true
      - LEADER_ELECTION_INSTANCE_ID=backend-2
    # ... other config

  backend-3:
    image: maplefile-backend:latest
    environment:
      - LEADER_ELECTION_ENABLED=true
      - LEADER_ELECTION_INSTANCE_ID=backend-3
    # ... other config

Kubernetes

For Kubernetes, the instance ID can be auto-generated from the pod name:

apiVersion: apps/v1
kind: Deployment
metadata:
  name: maplefile-backend
spec:
  replicas: 3
  template:
    spec:
      containers:
      - name: backend
        image: maplefile-backend:latest
        env:
        - name: LEADER_ELECTION_ENABLED
          value: "true"
        - name: LEADER_ELECTION_INSTANCE_ID
          valueFrom:
            fieldRef:
              fieldPath: metadata.name

Monitoring

Check logs for leader election events:

# Grep for leader election events
docker logs maplefile-backend | grep "LEADER\|election"

# Example output:
# 2025-01-12T10:30:00.000Z INFO Starting leader election instance_id=instance-1
# 2025-01-12T10:30:00.123Z INFO 🎉 Became the leader! instance_id=instance-1
# 2025-01-12T10:30:03.456Z DEBUG Heartbeat sent instance_id=instance-1

Troubleshooting

Leader keeps changing

Increase LEADER_ELECTION_LOCK_TTL:

LEADER_ELECTION_LOCK_TTL=30s

No leader elected

Check Redis connectivity:

redis-cli
> GET maplefile:leader:lock

Multiple leaders

This shouldn't happen, but if it does:

  1. Check system clock sync across instances
  2. Check Redis is working properly
  3. Check network connectivity

Next Steps

  1. Implement leader-only background jobs
  2. Add metrics for leader election events
  3. Create alerting for frequent leadership changes
  4. Add dashboards to monitor leader status