10 KiB
10 KiB
Leader Election Integration Example
Quick Integration into MapleFile Backend
Step 1: Add to Wire Providers (app/wire.go)
// In app/wire.go, add to wire.Build():
wire.Build(
// ... existing providers ...
// Leader Election
leaderelection.ProvideLeaderElection,
// ... rest of providers ...
)
Step 2: Update Application Struct (app/app.go)
import (
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/leaderelection"
)
type Application struct {
config *config.Config
httpServer *http.WireServer
logger *zap.Logger
migrator *cassandradb.Migrator
leaderElection leaderelection.LeaderElection // ADD THIS
}
func ProvideApplication(
cfg *config.Config,
httpServer *http.WireServer,
logger *zap.Logger,
migrator *cassandradb.Migrator,
leaderElection leaderelection.LeaderElection, // ADD THIS
) *Application {
return &Application{
config: cfg,
httpServer: httpServer,
logger: logger,
migrator: migrator,
leaderElection: leaderElection, // ADD THIS
}
}
Step 3: Start Leader Election in Application (app/app.go)
func (app *Application) Start() error {
app.logger.Info("🚀 MapleFile Backend Starting (Wire DI)",
zap.String("version", app.config.App.Version),
zap.String("environment", app.config.App.Environment),
zap.String("di_framework", "Google Wire"))
// Start leader election if enabled
if app.config.LeaderElection.Enabled {
app.logger.Info("Starting leader election")
// Register callbacks
app.setupLeaderCallbacks()
// Start election in background
go func() {
ctx := context.Background()
if err := app.leaderElection.Start(ctx); err != nil {
app.logger.Error("Leader election failed", zap.Error(err))
}
}()
// Give it a moment to complete first election
time.Sleep(500 * time.Millisecond)
if app.leaderElection.IsLeader() {
app.logger.Info("👑 This instance is the LEADER",
zap.String("instance_id", app.leaderElection.GetInstanceID()))
} else {
app.logger.Info("👥 This instance is a FOLLOWER",
zap.String("instance_id", app.leaderElection.GetInstanceID()))
}
}
// Run database migrations (only leader should do this)
if app.config.LeaderElection.Enabled {
if app.leaderElection.IsLeader() {
app.logger.Info("Running database migrations as leader...")
if err := app.migrator.Up(); err != nil {
app.logger.Error("Failed to run database migrations", zap.Error(err))
return fmt.Errorf("migration failed: %w", err)
}
app.logger.Info("✅ Database migrations completed successfully")
} else {
app.logger.Info("Skipping migrations - not the leader")
}
} else {
// If leader election disabled, always run migrations
app.logger.Info("Running database migrations...")
if err := app.migrator.Up(); err != nil {
app.logger.Error("Failed to run database migrations", zap.Error(err))
return fmt.Errorf("migration failed: %w", err)
}
app.logger.Info("✅ Database migrations completed successfully")
}
// Start HTTP server in goroutine
errChan := make(chan error, 1)
go func() {
if err := app.httpServer.Start(); err != nil {
errChan <- err
}
}()
// Wait for interrupt signal or server error
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
select {
case err := <-errChan:
app.logger.Error("HTTP server failed", zap.Error(err))
return fmt.Errorf("server startup failed: %w", err)
case sig := <-quit:
app.logger.Info("Received shutdown signal", zap.String("signal", sig.String()))
}
app.logger.Info("👋 MapleFile Backend Shutting Down")
// Stop leader election
if app.config.LeaderElection.Enabled {
if err := app.leaderElection.Stop(); err != nil {
app.logger.Error("Failed to stop leader election", zap.Error(err))
}
}
// Graceful shutdown with timeout
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := app.httpServer.Shutdown(ctx); err != nil {
app.logger.Error("Server shutdown error", zap.Error(err))
return fmt.Errorf("server shutdown failed: %w", err)
}
app.logger.Info("✅ MapleFile Backend Stopped Successfully")
return nil
}
// setupLeaderCallbacks configures callbacks for leader election events
func (app *Application) setupLeaderCallbacks() {
app.leaderElection.OnBecomeLeader(func() {
app.logger.Info("🎉 BECAME LEADER - Starting leader-only tasks",
zap.String("instance_id", app.leaderElection.GetInstanceID()))
// Start leader-only background tasks here
// For example:
// - Scheduled cleanup jobs
// - Metrics aggregation
// - Cache warming
// - Periodic health checks
})
app.leaderElection.OnLoseLeadership(func() {
app.logger.Warn("😢 LOST LEADERSHIP - Stopping leader-only tasks",
zap.String("instance_id", app.leaderElection.GetInstanceID()))
// Stop leader-only tasks here
})
}
Step 4: Environment Variables (.env)
Add to your .env file:
# Leader Election Configuration
LEADER_ELECTION_ENABLED=true
LEADER_ELECTION_LOCK_TTL=10s
LEADER_ELECTION_HEARTBEAT_INTERVAL=3s
LEADER_ELECTION_RETRY_INTERVAL=2s
LEADER_ELECTION_INSTANCE_ID= # Leave empty for auto-generation
LEADER_ELECTION_HOSTNAME= # Leave empty for auto-detection
Step 5: Update .env.sample
# Leader Election
LEADER_ELECTION_ENABLED=true
LEADER_ELECTION_LOCK_TTL=10s
LEADER_ELECTION_HEARTBEAT_INTERVAL=3s
LEADER_ELECTION_RETRY_INTERVAL=2s
LEADER_ELECTION_INSTANCE_ID=
LEADER_ELECTION_HOSTNAME=
Step 6: Test Multiple Instances
Terminal 1
LEADER_ELECTION_INSTANCE_ID=instance-1 ./maplefile-backend
# Output: 👑 This instance is the LEADER
Terminal 2
LEADER_ELECTION_INSTANCE_ID=instance-2 ./maplefile-backend
# Output: 👥 This instance is a FOLLOWER
Terminal 3
LEADER_ELECTION_INSTANCE_ID=instance-3 ./maplefile-backend
# Output: 👥 This instance is a FOLLOWER
Test Failover
Stop Terminal 1 (kill the leader):
# Watch Terminal 2 or 3 logs
# One will show: 🎉 BECAME LEADER
Optional: Add Health Check Endpoint
Add to your HTTP handlers to expose leader election status:
// In internal/interface/http/server.go
func (s *Server) leaderElectionHealthHandler(w http.ResponseWriter, r *http.Request) {
if s.leaderElection == nil {
http.Error(w, "Leader election not enabled", http.StatusNotImplemented)
return
}
info, err := s.leaderElection.GetLeaderInfo()
if err != nil {
s.logger.Error("Failed to get leader info", zap.Error(err))
http.Error(w, "Failed to get leader info", http.StatusInternalServerError)
return
}
response := map[string]interface{}{
"is_leader": s.leaderElection.IsLeader(),
"instance_id": s.leaderElection.GetInstanceID(),
"leader_info": info,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
// Register in registerRoutes():
s.mux.HandleFunc("GET /api/v1/leader-status", s.leaderElectionHealthHandler)
Test the endpoint:
curl http://localhost:8000/api/v1/leader-status
# Response:
{
"is_leader": true,
"instance_id": "instance-1",
"leader_info": {
"instance_id": "instance-1",
"hostname": "macbook-pro.local",
"started_at": "2025-01-12T10:30:00Z",
"last_heartbeat": "2025-01-12T10:35:23Z"
}
}
Production Deployment
Docker Compose
When deploying with docker-compose, ensure each instance has a unique ID:
version: '3.8'
services:
backend-1:
image: maplefile-backend:latest
environment:
- LEADER_ELECTION_ENABLED=true
- LEADER_ELECTION_INSTANCE_ID=backend-1
# ... other config
backend-2:
image: maplefile-backend:latest
environment:
- LEADER_ELECTION_ENABLED=true
- LEADER_ELECTION_INSTANCE_ID=backend-2
# ... other config
backend-3:
image: maplefile-backend:latest
environment:
- LEADER_ELECTION_ENABLED=true
- LEADER_ELECTION_INSTANCE_ID=backend-3
# ... other config
Kubernetes
For Kubernetes, the instance ID can be auto-generated from the pod name:
apiVersion: apps/v1
kind: Deployment
metadata:
name: maplefile-backend
spec:
replicas: 3
template:
spec:
containers:
- name: backend
image: maplefile-backend:latest
env:
- name: LEADER_ELECTION_ENABLED
value: "true"
- name: LEADER_ELECTION_INSTANCE_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
Monitoring
Check logs for leader election events:
# Grep for leader election events
docker logs maplefile-backend | grep "LEADER\|election"
# Example output:
# 2025-01-12T10:30:00.000Z INFO Starting leader election instance_id=instance-1
# 2025-01-12T10:30:00.123Z INFO 🎉 Became the leader! instance_id=instance-1
# 2025-01-12T10:30:03.456Z DEBUG Heartbeat sent instance_id=instance-1
Troubleshooting
Leader keeps changing
Increase LEADER_ELECTION_LOCK_TTL:
LEADER_ELECTION_LOCK_TTL=30s
No leader elected
Check Redis connectivity:
redis-cli
> GET maplefile:leader:lock
Multiple leaders
This shouldn't happen, but if it does:
- Check system clock sync across instances
- Check Redis is working properly
- Check network connectivity
Next Steps
- Implement leader-only background jobs
- Add metrics for leader election events
- Create alerting for frequent leadership changes
- Add dashboards to monitor leader status