Initial commit: Open sourcing all of the Maple Open Technologies code.

This commit is contained in:
Bartlomiej Mika 2025-12-02 14:33:08 -05:00
commit 755d54a99d
2010 changed files with 448675 additions and 0 deletions

View file

@ -0,0 +1,453 @@
// codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/observability/health.go
package observability
import (
"context"
"encoding/json"
"net/http"
"sync"
"time"
"github.com/gocql/gocql"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/storage/cache/twotiercache"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/storage/object/s3"
)
// HealthStatus represents the health status of a component
type HealthStatus string
const (
HealthStatusHealthy HealthStatus = "healthy"
HealthStatusUnhealthy HealthStatus = "unhealthy"
HealthStatusDegraded HealthStatus = "degraded"
)
// HealthCheckResult represents the result of a health check
type HealthCheckResult struct {
Status HealthStatus `json:"status"`
Message string `json:"message,omitempty"`
Timestamp time.Time `json:"timestamp"`
Duration string `json:"duration,omitempty"`
Component string `json:"component"`
Details interface{} `json:"details,omitempty"`
}
// HealthResponse represents the overall health response
type HealthResponse struct {
Status HealthStatus `json:"status"`
Timestamp time.Time `json:"timestamp"`
Services map[string]HealthCheckResult `json:"services"`
Version string `json:"version"`
Uptime string `json:"uptime"`
}
// HealthChecker manages health checks for various components
type HealthChecker struct {
checks map[string]HealthCheck
mu sync.RWMutex
logger *zap.Logger
startTime time.Time
}
// HealthCheck represents a health check function
type HealthCheck func(ctx context.Context) HealthCheckResult
// NewHealthChecker creates a new health checker
func NewHealthChecker(logger *zap.Logger) *HealthChecker {
return &HealthChecker{
checks: make(map[string]HealthCheck),
logger: logger,
startTime: time.Now(),
}
}
// RegisterCheck registers a health check for a service
func (hc *HealthChecker) RegisterCheck(name string, check HealthCheck) {
hc.mu.Lock()
defer hc.mu.Unlock()
hc.checks[name] = check
}
// CheckHealth performs all registered health checks
func (hc *HealthChecker) CheckHealth(ctx context.Context) HealthResponse {
hc.mu.RLock()
checks := make(map[string]HealthCheck, len(hc.checks))
for name, check := range hc.checks {
checks[name] = check
}
hc.mu.RUnlock()
results := make(map[string]HealthCheckResult)
overallStatus := HealthStatusHealthy
// Run health checks with timeout
checkCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
for name, check := range checks {
start := time.Now()
result := check(checkCtx)
result.Duration = time.Since(start).String()
results[name] = result
// Determine overall status
if result.Status == HealthStatusUnhealthy {
overallStatus = HealthStatusUnhealthy
} else if result.Status == HealthStatusDegraded && overallStatus == HealthStatusHealthy {
overallStatus = HealthStatusDegraded
}
}
return HealthResponse{
Status: overallStatus,
Timestamp: time.Now(),
Services: results,
Version: "1.0.0", // Could be injected
Uptime: time.Since(hc.startTime).String(),
}
}
// HealthHandler creates an HTTP handler for health checks
func (hc *HealthChecker) HealthHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
ctx := r.Context()
health := hc.CheckHealth(ctx)
w.Header().Set("Content-Type", "application/json")
// Set appropriate status code
switch health.Status {
case HealthStatusHealthy:
w.WriteHeader(http.StatusOK)
case HealthStatusDegraded:
w.WriteHeader(http.StatusOK) // 200 but degraded
case HealthStatusUnhealthy:
w.WriteHeader(http.StatusServiceUnavailable)
}
if err := json.NewEncoder(w).Encode(health); err != nil {
hc.logger.Error("Failed to encode health response", zap.Error(err))
}
}
}
// ReadinessHandler creates a simple readiness probe
func (hc *HealthChecker) ReadinessHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
ctx := r.Context()
health := hc.CheckHealth(ctx)
// For readiness, we're more strict - any unhealthy component means not ready
if health.Status == HealthStatusUnhealthy {
w.WriteHeader(http.StatusServiceUnavailable)
w.Write([]byte("NOT READY"))
return
}
w.WriteHeader(http.StatusOK)
w.Write([]byte("READY"))
}
}
// LivenessHandler creates a simple liveness probe
func (hc *HealthChecker) LivenessHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
// For liveness, we just check if the service can respond
w.WriteHeader(http.StatusOK)
w.Write([]byte("ALIVE"))
}
}
// CassandraHealthCheck creates a health check for Cassandra database connectivity
func CassandraHealthCheck(session *gocql.Session, logger *zap.Logger) HealthCheck {
return func(ctx context.Context) HealthCheckResult {
start := time.Now()
// Check if session is nil
if session == nil {
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cassandra session is nil",
Timestamp: time.Now(),
Component: "cassandra",
Details: map[string]interface{}{"error": "session_nil"},
}
}
// Try to execute a simple query with context
var result string
query := session.Query("SELECT uuid() FROM system.local")
// Create a channel to handle the query execution
done := make(chan error, 1)
go func() {
done <- query.Scan(&result)
}()
// Wait for either completion or context cancellation
select {
case err := <-done:
duration := time.Since(start)
if err != nil {
logger.Warn("Cassandra health check failed",
zap.Error(err),
zap.Duration("duration", duration))
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cassandra query failed: " + err.Error(),
Timestamp: time.Now(),
Component: "cassandra",
Details: map[string]interface{}{
"error": err.Error(),
"duration": duration.String(),
},
}
}
return HealthCheckResult{
Status: HealthStatusHealthy,
Message: "Cassandra connection healthy",
Timestamp: time.Now(),
Component: "cassandra",
Details: map[string]interface{}{
"query_result": result,
"duration": duration.String(),
},
}
case <-ctx.Done():
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cassandra health check timed out",
Timestamp: time.Now(),
Component: "cassandra",
Details: map[string]interface{}{
"error": "timeout",
"duration": time.Since(start).String(),
},
}
}
}
}
// TwoTierCacheHealthCheck creates a health check for the two-tier cache system
func TwoTierCacheHealthCheck(cache twotiercache.TwoTierCacher, logger *zap.Logger) HealthCheck {
return func(ctx context.Context) HealthCheckResult {
start := time.Now()
if cache == nil {
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cache instance is nil",
Timestamp: time.Now(),
Component: "two_tier_cache",
Details: map[string]interface{}{"error": "cache_nil"},
}
}
// Test cache functionality with a health check key
healthKey := "health_check_" + time.Now().Format("20060102150405")
testValue := []byte("health_check_value")
// Test Set operation
if err := cache.Set(ctx, healthKey, testValue); err != nil {
duration := time.Since(start)
logger.Warn("Cache health check SET failed",
zap.Error(err),
zap.Duration("duration", duration))
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cache SET operation failed: " + err.Error(),
Timestamp: time.Now(),
Component: "two_tier_cache",
Details: map[string]interface{}{
"error": err.Error(),
"operation": "set",
"duration": duration.String(),
},
}
}
// Test Get operation
retrievedValue, err := cache.Get(ctx, healthKey)
if err != nil {
duration := time.Since(start)
logger.Warn("Cache health check GET failed",
zap.Error(err),
zap.Duration("duration", duration))
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "Cache GET operation failed: " + err.Error(),
Timestamp: time.Now(),
Component: "two_tier_cache",
Details: map[string]interface{}{
"error": err.Error(),
"operation": "get",
"duration": duration.String(),
},
}
}
// Verify the value
if string(retrievedValue) != string(testValue) {
duration := time.Since(start)
return HealthCheckResult{
Status: HealthStatusDegraded,
Message: "Cache value mismatch",
Timestamp: time.Now(),
Component: "two_tier_cache",
Details: map[string]interface{}{
"expected": string(testValue),
"actual": string(retrievedValue),
"duration": duration.String(),
},
}
}
// Clean up test key
_ = cache.Delete(ctx, healthKey)
duration := time.Since(start)
return HealthCheckResult{
Status: HealthStatusHealthy,
Message: "Two-tier cache healthy",
Timestamp: time.Now(),
Component: "two_tier_cache",
Details: map[string]interface{}{
"operations_tested": []string{"set", "get", "delete"},
"duration": duration.String(),
},
}
}
}
// S3HealthCheck creates a health check for S3 object storage
func S3HealthCheck(s3Storage s3.S3ObjectStorage, logger *zap.Logger) HealthCheck {
return func(ctx context.Context) HealthCheckResult {
start := time.Now()
if s3Storage == nil {
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "S3 storage instance is nil",
Timestamp: time.Now(),
Component: "s3_storage",
Details: map[string]interface{}{"error": "storage_nil"},
}
}
// Test basic S3 connectivity by listing objects (lightweight operation)
_, err := s3Storage.ListAllObjects(ctx)
duration := time.Since(start)
if err != nil {
logger.Warn("S3 health check failed",
zap.Error(err),
zap.Duration("duration", duration))
return HealthCheckResult{
Status: HealthStatusUnhealthy,
Message: "S3 connectivity failed: " + err.Error(),
Timestamp: time.Now(),
Component: "s3_storage",
Details: map[string]interface{}{
"error": err.Error(),
"operation": "list_objects",
"duration": duration.String(),
},
}
}
return HealthCheckResult{
Status: HealthStatusHealthy,
Message: "S3 storage healthy",
Timestamp: time.Now(),
Component: "s3_storage",
Details: map[string]interface{}{
"operation": "list_objects",
"duration": duration.String(),
},
}
}
}
// RegisterRealHealthChecks registers health checks for actual infrastructure components
// Note: This function was previously used with Uber FX. It can be called directly
// or wired through Google Wire if needed.
func RegisterRealHealthChecks(
hc *HealthChecker,
logger *zap.Logger,
cassandraSession *gocql.Session,
cache twotiercache.TwoTierCacher,
s3Storage s3.S3ObjectStorage,
) {
// Register Cassandra health check
hc.RegisterCheck("cassandra", CassandraHealthCheck(cassandraSession, logger))
// Register two-tier cache health check
hc.RegisterCheck("cache", TwoTierCacheHealthCheck(cache, logger))
// Register S3 storage health check
hc.RegisterCheck("s3_storage", S3HealthCheck(s3Storage, logger))
logger.Info("Real infrastructure health checks registered",
zap.Strings("components", []string{"cassandra", "cache", "s3_storage"}))
}
// StartObservabilityServer starts the observability HTTP server on a separate port
// Note: This function was previously integrated with Uber FX lifecycle.
// It should now be called manually or integrated with Google Wire if needed.
func StartObservabilityServer(
hc *HealthChecker,
ms *MetricsServer,
logger *zap.Logger,
) (*http.Server, error) {
mux := http.NewServeMux()
// Health endpoints
mux.HandleFunc("/health", hc.HealthHandler())
mux.HandleFunc("/health/ready", hc.ReadinessHandler())
mux.HandleFunc("/health/live", hc.LivenessHandler())
// Metrics endpoint
mux.Handle("/metrics", ms.Handler())
server := &http.Server{
Addr: ":8080", // Separate port for observability
Handler: mux,
ReadTimeout: 30 * time.Second,
WriteTimeout: 30 * time.Second,
IdleTimeout: 60 * time.Second,
}
go func() {
logger.Info("Starting observability server on :8080")
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
logger.Error("Observability server failed", zap.Error(err))
}
}()
return server, nil
}

View file

@ -0,0 +1,89 @@
// codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/observability/metrics.go
package observability
import (
"fmt"
"net/http"
"runtime"
"time"
"go.uber.org/zap"
)
// MetricsServer provides basic metrics endpoint
type MetricsServer struct {
logger *zap.Logger
startTime time.Time
}
// NewMetricsServer creates a new metrics server
func NewMetricsServer(logger *zap.Logger) *MetricsServer {
return &MetricsServer{
logger: logger,
startTime: time.Now(),
}
}
// Handler returns an HTTP handler that serves basic metrics
func (ms *MetricsServer) Handler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
metrics := ms.collectMetrics()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.WriteHeader(http.StatusOK)
for _, metric := range metrics {
fmt.Fprintf(w, "%s\n", metric)
}
}
}
// collectMetrics collects basic application metrics
func (ms *MetricsServer) collectMetrics() []string {
var m runtime.MemStats
runtime.ReadMemStats(&m)
uptime := time.Since(ms.startTime).Seconds()
metrics := []string{
fmt.Sprintf("# HELP mapleopentech_uptime_seconds Total uptime of the service in seconds"),
fmt.Sprintf("# TYPE mapleopentech_uptime_seconds counter"),
fmt.Sprintf("mapleopentech_uptime_seconds %.2f", uptime),
fmt.Sprintf("# HELP mapleopentech_memory_alloc_bytes Currently allocated memory in bytes"),
fmt.Sprintf("# TYPE mapleopentech_memory_alloc_bytes gauge"),
fmt.Sprintf("mapleopentech_memory_alloc_bytes %d", m.Alloc),
fmt.Sprintf("# HELP mapleopentech_memory_total_alloc_bytes Total allocated memory in bytes"),
fmt.Sprintf("# TYPE mapleopentech_memory_total_alloc_bytes counter"),
fmt.Sprintf("mapleopentech_memory_total_alloc_bytes %d", m.TotalAlloc),
fmt.Sprintf("# HELP mapleopentech_memory_sys_bytes Memory obtained from system in bytes"),
fmt.Sprintf("# TYPE mapleopentech_memory_sys_bytes gauge"),
fmt.Sprintf("mapleopentech_memory_sys_bytes %d", m.Sys),
fmt.Sprintf("# HELP mapleopentech_gc_runs_total Total number of GC runs"),
fmt.Sprintf("# TYPE mapleopentech_gc_runs_total counter"),
fmt.Sprintf("mapleopentech_gc_runs_total %d", m.NumGC),
fmt.Sprintf("# HELP mapleopentech_goroutines Current number of goroutines"),
fmt.Sprintf("# TYPE mapleopentech_goroutines gauge"),
fmt.Sprintf("mapleopentech_goroutines %d", runtime.NumGoroutine()),
}
return metrics
}
// RecordMetric records a custom metric (placeholder for future implementation)
func (ms *MetricsServer) RecordMetric(name string, value float64, labels map[string]string) {
ms.logger.Debug("Recording metric",
zap.String("name", name),
zap.Float64("value", value),
zap.Any("labels", labels),
)
}

View file

@ -0,0 +1,6 @@
// codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/observability/module.go
package observability
// Note: This file previously contained Uber FX module definitions.
// The application now uses Google Wire for dependency injection.
// Observability components should be wired through Wire providers if needed.

View file

@ -0,0 +1,92 @@
// codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/pkg/observability/routes.go
package observability
import (
"net/http"
"go.uber.org/zap"
)
// HealthRoute provides detailed health check endpoint
type HealthRoute struct {
checker *HealthChecker
logger *zap.Logger
}
func NewHealthRoute(checker *HealthChecker, logger *zap.Logger) *HealthRoute {
return &HealthRoute{
checker: checker,
logger: logger,
}
}
func (h *HealthRoute) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.checker.HealthHandler()(w, r)
}
func (h *HealthRoute) Pattern() string {
return "/health"
}
// ReadinessRoute provides readiness probe endpoint
type ReadinessRoute struct {
checker *HealthChecker
logger *zap.Logger
}
func NewReadinessRoute(checker *HealthChecker, logger *zap.Logger) *ReadinessRoute {
return &ReadinessRoute{
checker: checker,
logger: logger,
}
}
func (r *ReadinessRoute) ServeHTTP(w http.ResponseWriter, req *http.Request) {
r.checker.ReadinessHandler()(w, req)
}
func (r *ReadinessRoute) Pattern() string {
return "/health/ready"
}
// LivenessRoute provides liveness probe endpoint
type LivenessRoute struct {
checker *HealthChecker
logger *zap.Logger
}
func NewLivenessRoute(checker *HealthChecker, logger *zap.Logger) *LivenessRoute {
return &LivenessRoute{
checker: checker,
logger: logger,
}
}
func (l *LivenessRoute) ServeHTTP(w http.ResponseWriter, r *http.Request) {
l.checker.LivenessHandler()(w, r)
}
func (l *LivenessRoute) Pattern() string {
return "/health/live"
}
// MetricsRoute provides metrics endpoint
type MetricsRoute struct {
server *MetricsServer
logger *zap.Logger
}
func NewMetricsRoute(server *MetricsServer, logger *zap.Logger) *MetricsRoute {
return &MetricsRoute{
server: server,
logger: logger,
}
}
func (m *MetricsRoute) ServeHTTP(w http.ResponseWriter, r *http.Request) {
m.server.Handler()(w, r)
}
func (m *MetricsRoute) Pattern() string {
return "/metrics"
}