Initial commit: Open sourcing all of the Maple Open Technologies code.

This commit is contained in:
Bartlomiej Mika 2025-12-02 14:33:08 -05:00
commit 755d54a99d
2010 changed files with 448675 additions and 0 deletions

View file

@ -0,0 +1,136 @@
// Package leaderelection provides distributed leader election for multiple application instances.
// It ensures only one instance acts as the leader at any given time, with automatic failover.
package leaderelection
import (
"context"
"time"
)
// LeaderElection provides distributed leader election across multiple application instances.
// It uses Redis to coordinate which instance is the current leader, with automatic failover
// if the leader crashes or becomes unavailable.
type LeaderElection interface {
// Start begins participating in leader election.
// This method blocks and runs the election loop until ctx is cancelled or an error occurs.
// The instance will automatically attempt to become leader and maintain leadership.
Start(ctx context.Context) error
// IsLeader returns true if this instance is currently the leader.
// This is a local check and does not require network communication.
IsLeader() bool
// GetLeaderID returns the unique identifier of the current leader instance.
// Returns empty string if no leader exists (should be rare).
GetLeaderID() (string, error)
// GetLeaderInfo returns detailed information about the current leader.
GetLeaderInfo() (*LeaderInfo, error)
// OnBecomeLeader registers a callback function that will be executed when
// this instance becomes the leader. Multiple callbacks can be registered.
OnBecomeLeader(callback func())
// OnLoseLeadership registers a callback function that will be executed when
// this instance loses leadership (either voluntarily or due to failure).
// Multiple callbacks can be registered.
OnLoseLeadership(callback func())
// Stop gracefully stops leader election participation.
// If this instance is the leader, it releases leadership allowing another instance to take over.
// This should be called during application shutdown.
Stop() error
// GetInstanceID returns the unique identifier for this instance.
GetInstanceID() string
}
// LeaderInfo contains information about the current leader.
type LeaderInfo struct {
// InstanceID is the unique identifier of the leader instance
InstanceID string `json:"instance_id"`
// Hostname is the hostname of the leader instance
Hostname string `json:"hostname"`
// StartedAt is when this instance became the leader
StartedAt time.Time `json:"started_at"`
// LastHeartbeat is the last time the leader renewed its lock
LastHeartbeat time.Time `json:"last_heartbeat"`
}
// Config contains configuration for leader election.
type Config struct {
// RedisKeyName is the Redis key used for leader election.
// Default: "maplefile:leader:lock"
RedisKeyName string
// RedisInfoKeyName is the Redis key used to store leader information.
// Default: "maplefile:leader:info"
RedisInfoKeyName string
// LockTTL is how long the leader lock lasts before expiring.
// The leader must renew the lock before this time expires.
// Default: 10 seconds
// Recommended: 10-30 seconds
LockTTL time.Duration
// HeartbeatInterval is how often the leader renews its lock.
// This should be significantly less than LockTTL (e.g., LockTTL / 3).
// Default: 3 seconds
// Recommended: LockTTL / 3
HeartbeatInterval time.Duration
// RetryInterval is how often followers check for leadership opportunity.
// Default: 2 seconds
// Recommended: 1-5 seconds
RetryInterval time.Duration
// InstanceID uniquely identifies this application instance.
// If empty, will be auto-generated from hostname + random suffix.
// Default: auto-generated
InstanceID string
// Hostname is the hostname of this instance.
// If empty, will be auto-detected.
// Default: os.Hostname()
Hostname string
}
// DefaultConfig returns a Config with sensible defaults.
func DefaultConfig() *Config {
return &Config{
RedisKeyName: "maplefile:leader:lock",
RedisInfoKeyName: "maplefile:leader:info",
LockTTL: 10 * time.Second,
HeartbeatInterval: 3 * time.Second,
RetryInterval: 2 * time.Second,
}
}
// Validate checks if the configuration is valid and returns an error if not.
func (c *Config) Validate() error {
if c.RedisKeyName == "" {
c.RedisKeyName = "maplefile:leader:lock"
}
if c.RedisInfoKeyName == "" {
c.RedisInfoKeyName = "maplefile:leader:info"
}
if c.LockTTL <= 0 {
c.LockTTL = 10 * time.Second
}
if c.HeartbeatInterval <= 0 {
c.HeartbeatInterval = 3 * time.Second
}
if c.RetryInterval <= 0 {
c.RetryInterval = 2 * time.Second
}
// HeartbeatInterval should be less than LockTTL
if c.HeartbeatInterval >= c.LockTTL {
c.HeartbeatInterval = c.LockTTL / 3
}
return nil
}

View file

@ -0,0 +1,30 @@
package leaderelection
import (
"github.com/redis/go-redis/v9"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/cloud/maplepress-backend/config"
)
// ProvideLeaderElection provides a LeaderElection instance for Wire DI.
func ProvideLeaderElection(
cfg *config.Config,
redisClient *redis.Client,
logger *zap.Logger,
) (LeaderElection, error) {
// Create configuration from app config
// InstanceID and Hostname are auto-generated by NewRedisLeaderElection
leConfig := &Config{
RedisKeyName: "maplepress:leader:lock",
RedisInfoKeyName: "maplepress:leader:info",
LockTTL: cfg.LeaderElection.LockTTL,
HeartbeatInterval: cfg.LeaderElection.HeartbeatInterval,
RetryInterval: cfg.LeaderElection.RetryInterval,
InstanceID: "", // Auto-generated from hostname + random suffix
Hostname: "", // Auto-detected from os.Hostname()
}
// redis.Client implements redis.UniversalClient interface
return NewRedisLeaderElection(leConfig, redisClient, logger)
}

View file

@ -0,0 +1,355 @@
package leaderelection
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"os"
"sync"
"time"
"github.com/redis/go-redis/v9"
"go.uber.org/zap"
)
// redisLeaderElection implements LeaderElection using Redis.
type redisLeaderElection struct {
config *Config
redis redis.UniversalClient
logger *zap.Logger
instanceID string
hostname string
isLeader bool
leaderMutex sync.RWMutex
becomeLeaderCbs []func()
loseLeadershipCbs []func()
callbackMutex sync.RWMutex
stopChan chan struct{}
stoppedChan chan struct{}
leaderStartTime time.Time
lastHeartbeat time.Time
lastHeartbeatMutex sync.RWMutex
}
// NewRedisLeaderElection creates a new Redis-based leader election instance.
func NewRedisLeaderElection(
config *Config,
redisClient redis.UniversalClient,
logger *zap.Logger,
) (LeaderElection, error) {
logger = logger.Named("LeaderElection")
// Validate configuration
if err := config.Validate(); err != nil {
return nil, fmt.Errorf("invalid configuration: %w", err)
}
// Generate instance ID if not provided
instanceID := config.InstanceID
if instanceID == "" {
hostname, err := os.Hostname()
if err != nil {
hostname = "unknown"
}
// Add random suffix to make it unique
instanceID = fmt.Sprintf("%s-%d", hostname, rand.Intn(100000))
logger.Info("Generated instance ID", zap.String("instance_id", instanceID))
}
// Get hostname if not provided
hostname := config.Hostname
if hostname == "" {
h, err := os.Hostname()
if err != nil {
hostname = "unknown"
} else {
hostname = h
}
}
return &redisLeaderElection{
config: config,
redis: redisClient,
logger: logger,
instanceID: instanceID,
hostname: hostname,
isLeader: false,
becomeLeaderCbs: make([]func(), 0),
loseLeadershipCbs: make([]func(), 0),
stopChan: make(chan struct{}),
stoppedChan: make(chan struct{}),
}, nil
}
// Start begins participating in leader election.
func (le *redisLeaderElection) Start(ctx context.Context) error {
le.logger.Info("Starting leader election",
zap.String("instance_id", le.instanceID),
zap.String("hostname", le.hostname),
zap.Duration("lock_ttl", le.config.LockTTL),
zap.Duration("heartbeat_interval", le.config.HeartbeatInterval),
)
defer close(le.stoppedChan)
// Main election loop
ticker := time.NewTicker(le.config.RetryInterval)
defer ticker.Stop()
// Try to become leader immediately on startup
le.tryBecomeLeader(ctx)
for {
select {
case <-ctx.Done():
le.logger.Info("Context cancelled, stopping leader election")
le.releaseLeadership(context.Background())
return ctx.Err()
case <-le.stopChan:
le.logger.Info("Stop signal received, stopping leader election")
le.releaseLeadership(context.Background())
return nil
case <-ticker.C:
if le.IsLeader() {
// If we're the leader, send heartbeat
if err := le.sendHeartbeat(ctx); err != nil {
le.logger.Error("Failed to send heartbeat, lost leadership",
zap.Error(err))
le.setLeaderStatus(false)
le.executeCallbacks(le.loseLeadershipCbs)
}
} else {
// If we're not the leader, try to become leader
le.tryBecomeLeader(ctx)
}
}
}
}
// tryBecomeLeader attempts to acquire leadership.
func (le *redisLeaderElection) tryBecomeLeader(ctx context.Context) {
// Try to set the leader key with NX (only if not exists) and EX (expiry)
success, err := le.redis.SetNX(ctx, le.config.RedisKeyName, le.instanceID, le.config.LockTTL).Result()
if err != nil {
le.logger.Error("Failed to attempt leader election",
zap.Error(err))
return
}
if success {
// We became the leader!
le.logger.Info("🎉 Became the leader!",
zap.String("instance_id", le.instanceID))
le.leaderStartTime = time.Now()
le.setLeaderStatus(true)
le.updateLeaderInfo(ctx)
le.executeCallbacks(le.becomeLeaderCbs)
} else {
// Someone else is the leader
if !le.IsLeader() {
// Only log if we weren't already aware
currentLeader, _ := le.GetLeaderID()
le.logger.Debug("Another instance is the leader",
zap.String("leader_id", currentLeader))
}
}
}
// sendHeartbeat renews the leader lock.
func (le *redisLeaderElection) sendHeartbeat(ctx context.Context) error {
// Verify we still hold the lock
currentValue, err := le.redis.Get(ctx, le.config.RedisKeyName).Result()
if err != nil {
return fmt.Errorf("failed to get current lock value: %w", err)
}
if currentValue != le.instanceID {
return fmt.Errorf("lock held by different instance: %s", currentValue)
}
// Renew the lock
err = le.redis.Expire(ctx, le.config.RedisKeyName, le.config.LockTTL).Err()
if err != nil {
return fmt.Errorf("failed to renew lock: %w", err)
}
// Update heartbeat time
le.setLastHeartbeat(time.Now())
// Update leader info
le.updateLeaderInfo(ctx)
le.logger.Debug("Heartbeat sent",
zap.String("instance_id", le.instanceID))
return nil
}
// updateLeaderInfo updates the leader information in Redis.
func (le *redisLeaderElection) updateLeaderInfo(ctx context.Context) {
info := &LeaderInfo{
InstanceID: le.instanceID,
Hostname: le.hostname,
StartedAt: le.leaderStartTime,
LastHeartbeat: le.getLastHeartbeat(),
}
data, err := json.Marshal(info)
if err != nil {
le.logger.Error("Failed to marshal leader info", zap.Error(err))
return
}
// Set with same TTL as lock
err = le.redis.Set(ctx, le.config.RedisInfoKeyName, data, le.config.LockTTL).Err()
if err != nil {
le.logger.Error("Failed to update leader info", zap.Error(err))
}
}
// releaseLeadership voluntarily releases leadership.
func (le *redisLeaderElection) releaseLeadership(ctx context.Context) {
if !le.IsLeader() {
return
}
le.logger.Info("Releasing leadership voluntarily",
zap.String("instance_id", le.instanceID))
// Only delete if we're still the owner
script := `
if redis.call("GET", KEYS[1]) == ARGV[1] then
return redis.call("DEL", KEYS[1])
else
return 0
end
`
_, err := le.redis.Eval(ctx, script, []string{le.config.RedisKeyName}, le.instanceID).Result()
if err != nil {
le.logger.Error("Failed to release leadership", zap.Error(err))
}
// Delete leader info
le.redis.Del(ctx, le.config.RedisInfoKeyName)
le.setLeaderStatus(false)
le.executeCallbacks(le.loseLeadershipCbs)
}
// IsLeader returns true if this instance is the leader.
func (le *redisLeaderElection) IsLeader() bool {
le.leaderMutex.RLock()
defer le.leaderMutex.RUnlock()
return le.isLeader
}
// GetLeaderID returns the ID of the current leader.
func (le *redisLeaderElection) GetLeaderID() (string, error) {
ctx := context.Background()
leaderID, err := le.redis.Get(ctx, le.config.RedisKeyName).Result()
if err == redis.Nil {
return "", nil
}
if err != nil {
return "", fmt.Errorf("failed to get leader ID: %w", err)
}
return leaderID, nil
}
// GetLeaderInfo returns information about the current leader.
func (le *redisLeaderElection) GetLeaderInfo() (*LeaderInfo, error) {
ctx := context.Background()
data, err := le.redis.Get(ctx, le.config.RedisInfoKeyName).Result()
if err == redis.Nil {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("failed to get leader info: %w", err)
}
var info LeaderInfo
if err := json.Unmarshal([]byte(data), &info); err != nil {
return nil, fmt.Errorf("failed to unmarshal leader info: %w", err)
}
return &info, nil
}
// OnBecomeLeader registers a callback for when this instance becomes leader.
func (le *redisLeaderElection) OnBecomeLeader(callback func()) {
le.callbackMutex.Lock()
defer le.callbackMutex.Unlock()
le.becomeLeaderCbs = append(le.becomeLeaderCbs, callback)
}
// OnLoseLeadership registers a callback for when this instance loses leadership.
func (le *redisLeaderElection) OnLoseLeadership(callback func()) {
le.callbackMutex.Lock()
defer le.callbackMutex.Unlock()
le.loseLeadershipCbs = append(le.loseLeadershipCbs, callback)
}
// Stop gracefully stops leader election.
func (le *redisLeaderElection) Stop() error {
le.logger.Info("Stopping leader election")
close(le.stopChan)
// Wait for the election loop to finish (with timeout)
select {
case <-le.stoppedChan:
le.logger.Info("Leader election stopped successfully")
return nil
case <-time.After(5 * time.Second):
le.logger.Warn("Timeout waiting for leader election to stop")
return fmt.Errorf("timeout waiting for leader election to stop")
}
}
// GetInstanceID returns this instance's unique identifier.
func (le *redisLeaderElection) GetInstanceID() string {
return le.instanceID
}
// setLeaderStatus updates the leader status (thread-safe).
func (le *redisLeaderElection) setLeaderStatus(isLeader bool) {
le.leaderMutex.Lock()
defer le.leaderMutex.Unlock()
le.isLeader = isLeader
}
// setLastHeartbeat updates the last heartbeat time (thread-safe).
func (le *redisLeaderElection) setLastHeartbeat(t time.Time) {
le.lastHeartbeatMutex.Lock()
defer le.lastHeartbeatMutex.Unlock()
le.lastHeartbeat = t
}
// getLastHeartbeat gets the last heartbeat time (thread-safe).
func (le *redisLeaderElection) getLastHeartbeat() time.Time {
le.lastHeartbeatMutex.RLock()
defer le.lastHeartbeatMutex.RUnlock()
return le.lastHeartbeat
}
// executeCallbacks executes a list of callbacks in separate goroutines.
func (le *redisLeaderElection) executeCallbacks(callbacks []func()) {
le.callbackMutex.RLock()
defer le.callbackMutex.RUnlock()
for _, callback := range callbacks {
go func(cb func()) {
defer func() {
if r := recover(); r != nil {
le.logger.Error("Panic in leader election callback",
zap.Any("panic", r))
}
}()
cb()
}(callback)
}
}