Skip to content

3.3 Health Checks & Heartbeats

Overview

Health checks and heartbeats are essential monitoring mechanisms that ensure system components are functioning correctly. They enable early detection of issues, automated recovery, and informed load balancing decisions.

Spring Boot Actuator Health Checks

Basic Health Check Configuration

java
@Configuration
public class HealthCheckConfig {
    
    @Bean
    public HealthContributorRegistry healthContributorRegistry() {
        return new DefaultHealthContributorRegistry();
    }
    
    @Bean
    public HealthEndpoint healthEndpoint(HealthContributorRegistry registry) {
        return new HealthEndpoint(registry, HealthEndpointGroups.of(
            Map.of("liveness", HealthEndpointGroup.of(
                Set.of("livenessStateHealthIndicator"), 
                StatusAggregator.getDefault(), 
                HttpCodeStatusMapper.DEFAULT, 
                Set.of("never")
            ),
            "readiness", HealthEndpointGroup.of(
                Set.of("readinessStateHealthIndicator", "db", "redis"), 
                StatusAggregator.getDefault(), 
                HttpCodeStatusMapper.DEFAULT, 
                Set.of("never")
            ))
        ));
    }
}

Custom Health Indicators

java
@Component
public class DatabaseHealthIndicator implements HealthIndicator {
    
    private final DataSource dataSource;
    
    public DatabaseHealthIndicator(DataSource dataSource) {
        this.dataSource = dataSource;
    }
    
    @Override
    public Health health() {
        Health.Builder builder = new Health.Builder();
        
        try (Connection connection = dataSource.getConnection()) {
            // Test database connectivity
            boolean isValid = connection.isValid(5); // 5 seconds timeout
            
            if (isValid) {
                // Additional health checks
                long responseTime = measureResponseTime(connection);
                int activeConnections = getActiveConnections();
                
                builder.up()
                    .withDetail("database", "PostgreSQL")
                    .withDetail("responseTime", responseTime + "ms")
                    .withDetail("activeConnections", activeConnections)
                    .withDetail("maxConnections", getMaxConnections());
            } else {
                builder.down()
                    .withDetail("error", "Database connection is not valid");
            }
            
        } catch (SQLException e) {
            builder.down()
                .withDetail("error", e.getMessage())
                .withException(e);
        }
        
        return builder.build();
    }
    
    private long measureResponseTime(Connection connection) {
        long startTime = System.currentTimeMillis();
        try (PreparedStatement stmt = connection.prepareStatement("SELECT 1")) {
            stmt.executeQuery();
            return System.currentTimeMillis() - startTime;
        } catch (SQLException e) {
            return -1;
        }
    }
    
    private int getActiveConnections() {
        if (dataSource instanceof HikariDataSource) {
            HikariPoolMXBean poolBean = ((HikariDataSource) dataSource).getHikariPoolMXBean();
            return poolBean.getActiveConnections();
        }
        return -1;
    }
    
    private int getMaxConnections() {
        if (dataSource instanceof HikariDataSource) {
            HikariPoolMXBean poolBean = ((HikariDataSource) dataSource).getHikariPoolMXBean();
            return poolBean.getTotalConnections();
        }
        return -1;
    }
}

Redis Health Indicator

java
@Component
@ConditionalOnProperty(name = "spring.redis.host")
public class RedisHealthIndicator implements HealthIndicator {
    
    private final RedisTemplate<String, Object> redisTemplate;
    
    public RedisHealthIndicator(RedisTemplate<String, Object> redisTemplate) {
        this.redisTemplate = redisTemplate;
    }
    
    @Override
    public Health health() {
        Health.Builder builder = new Health.Builder();
        
        try {
            // Test Redis connectivity with PING command
            String response = redisTemplate.getConnectionFactory()
                .getConnection()
                .ping();
                
            if ("PONG".equals(response)) {
                RedisConnectionInfo info = getRedisInfo();
                
                builder.up()
                    .withDetail("redis", "Available")
                    .withDetail("version", info.getVersion())
                    .withDetail("mode", info.getMode())
                    .withDetail("connectedClients", info.getConnectedClients())
                    .withDetail("usedMemory", info.getUsedMemoryHuman())
                    .withDetail("responseTime", measureRedisResponseTime() + "ms");
            } else {
                builder.down()
                    .withDetail("error", "Unexpected response: " + response);
            }
            
        } catch (Exception e) {
            builder.down()
                .withDetail("error", e.getMessage())
                .withException(e);
        }
        
        return builder.build();
    }
    
    private long measureRedisResponseTime() {
        long startTime = System.currentTimeMillis();
        try {
            redisTemplate.opsForValue().get("health-check-key");
            return System.currentTimeMillis() - startTime;
        } catch (Exception e) {
            return -1;
        }
    }
    
    private RedisConnectionInfo getRedisInfo() {
        // Implementation to get Redis server info
        return new RedisConnectionInfo("6.2.6", "standalone", 1, "1MB");
    }
    
    private static class RedisConnectionInfo {
        private final String version;
        private final String mode;
        private final int connectedClients;
        private final String usedMemoryHuman;
        
        public RedisConnectionInfo(String version, String mode, 
                                 int connectedClients, String usedMemoryHuman) {
            this.version = version;
            this.mode = mode;
            this.connectedClients = connectedClients;
            this.usedMemoryHuman = usedMemoryHuman;
        }
        
        // Getters...
        public String getVersion() { return version; }
        public String getMode() { return mode; }
        public int getConnectedClients() { return connectedClients; }
        public String getUsedMemoryHuman() { return usedMemoryHuman; }
    }
}

External Service Health Indicator

java
@Component
public class ExternalServiceHealthIndicator implements HealthIndicator {
    
    private final RestTemplate restTemplate;
    private final CircuitBreaker circuitBreaker;
    
    @Value("${external.service.health.url}")
    private String healthCheckUrl;
    
    public ExternalServiceHealthIndicator(RestTemplate restTemplate,
                                        CircuitBreakerRegistry registry) {
        this.restTemplate = restTemplate;
        this.circuitBreaker = registry.circuitBreaker("external-service-health");
    }
    
    @Override
    public Health health() {
        Health.Builder builder = new Health.Builder();
        
        try {
            Supplier<ResponseEntity<Map>> healthCheckSupplier = CircuitBreaker
                .decorateSupplier(circuitBreaker, () -> {
                    return restTemplate.getForEntity(healthCheckUrl, Map.class);
                });
            
            ResponseEntity<Map> response = healthCheckSupplier.get();
            
            if (response.getStatusCode().is2xxSuccessful()) {
                Map<String, Object> body = response.getBody();
                
                builder.up()
                    .withDetail("externalService", "Available")
                    .withDetail("responseTime", measureResponseTime())
                    .withDetail("status", body.get("status"))
                    .withDetail("version", body.get("version"));
            } else {
                builder.down()
                    .withDetail("error", "HTTP " + response.getStatusCode());
            }
            
        } catch (CallNotPermittedException e) {
            builder.down()
                .withDetail("error", "Circuit breaker is open")
                .withDetail("circuitBreakerState", circuitBreaker.getState());
                
        } catch (Exception e) {
            builder.down()
                .withDetail("error", e.getMessage())
                .withException(e);
        }
        
        return builder.build();
    }
    
    private long measureResponseTime() {
        long startTime = System.currentTimeMillis();
        try {
            restTemplate.headForHeaders(healthCheckUrl);
            return System.currentTimeMillis() - startTime;
        } catch (Exception e) {
            return -1;
        }
    }
}

Kubernetes Health Checks

Liveness and Readiness Probes

yaml
# kubernetes/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: spring-boot-app
spec:
  replicas: 3
  selector:
    matchLabels:
      app: spring-boot-app
  template:
    metadata:
      labels:
        app: spring-boot-app
    spec:
      containers:
      - name: app
        image: spring-boot-app:latest
        ports:
        - containerPort: 8080
        
        # Liveness probe - checks if container should be restarted
        livenessProbe:
          httpGet:
            path: /actuator/health/liveness
            port: 8080
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
          successThreshold: 1
        
        # Readiness probe - checks if container is ready to serve traffic
        readinessProbe:
          httpGet:
            path: /actuator/health/readiness
            port: 8080
          initialDelaySeconds: 15
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
          successThreshold: 1
        
        # Startup probe - gives more time for slow-starting containers
        startupProbe:
          httpGet:
            path: /actuator/health/liveness
            port: 8080
          initialDelaySeconds: 10
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 30
          successThreshold: 1
        
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        
        env:
        - name: SPRING_PROFILES_ACTIVE
          value: "kubernetes"

Health Check Controller

java
@RestController
@RequestMapping("/health")
public class HealthController {
    
    private final ApplicationReadinessChecker readinessChecker;
    private final ApplicationLivenessChecker livenessChecker;
    
    public HealthController(ApplicationReadinessChecker readinessChecker,
                           ApplicationLivenessChecker livenessChecker) {
        this.readinessChecker = readinessChecker;
        this.livenessChecker = livenessChecker;
    }
    
    @GetMapping("/liveness")
    public ResponseEntity<Map<String, Object>> liveness() {
        Map<String, Object> response = new HashMap<>();
        
        try {
            boolean isAlive = livenessChecker.check();
            
            if (isAlive) {
                response.put("status", "UP");
                response.put("timestamp", Instant.now());
                response.put("uptime", getUptime());
                return ResponseEntity.ok(response);
            } else {
                response.put("status", "DOWN");
                response.put("reason", "Application is not alive");
                return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                    .body(response);
            }
            
        } catch (Exception e) {
            response.put("status", "DOWN");
            response.put("error", e.getMessage());
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                .body(response);
        }
    }
    
    @GetMapping("/readiness")
    public ResponseEntity<Map<String, Object>> readiness() {
        Map<String, Object> response = new HashMap<>();
        
        try {
            ReadinessResult result = readinessChecker.check();
            
            if (result.isReady()) {
                response.put("status", "UP");
                response.put("dependencies", result.getDependencyStatus());
                response.put("timestamp", Instant.now());
                return ResponseEntity.ok(response);
            } else {
                response.put("status", "DOWN");
                response.put("failedDependencies", result.getFailedDependencies());
                response.put("dependencies", result.getDependencyStatus());
                return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                    .body(response);
            }
            
        } catch (Exception e) {
            response.put("status", "DOWN");
            response.put("error", e.getMessage());
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                .body(response);
        }
    }
    
    private String getUptime() {
        long uptimeMs = ManagementFactory.getRuntimeMXBean().getUptime();
        Duration uptime = Duration.ofMillis(uptimeMs);
        
        long days = uptime.toDays();
        long hours = uptime.toHours() % 24;
        long minutes = uptime.toMinutes() % 60;
        
        return String.format("%dd %dh %dm", days, hours, minutes);
    }
}

Readiness and Liveness Checkers

java
@Component
public class ApplicationReadinessChecker {
    
    private final List<HealthIndicator> healthIndicators;
    
    public ApplicationReadinessChecker(List<HealthIndicator> healthIndicators) {
        this.healthIndicators = healthIndicators;
    }
    
    public ReadinessResult check() {
        Map<String, Health> dependencyStatus = new HashMap<>();
        List<String> failedDependencies = new ArrayList<>();
        
        for (HealthIndicator indicator : healthIndicators) {
            String name = indicator.getClass().getSimpleName()
                .replace("HealthIndicator", "");
            
            try {
                Health health = indicator.health();
                dependencyStatus.put(name, health);
                
                if (health.getStatus() != Status.UP) {
                    failedDependencies.add(name);
                }
                
            } catch (Exception e) {
                Health errorHealth = Health.down()
                    .withDetail("error", e.getMessage())
                    .build();
                dependencyStatus.put(name, errorHealth);
                failedDependencies.add(name);
            }
        }
        
        boolean isReady = failedDependencies.isEmpty();
        return new ReadinessResult(isReady, dependencyStatus, failedDependencies);
    }
    
    public static class ReadinessResult {
        private final boolean ready;
        private final Map<String, Health> dependencyStatus;
        private final List<String> failedDependencies;
        
        public ReadinessResult(boolean ready, 
                             Map<String, Health> dependencyStatus,
                             List<String> failedDependencies) {
            this.ready = ready;
            this.dependencyStatus = dependencyStatus;
            this.failedDependencies = failedDependencies;
        }
        
        // Getters...
        public boolean isReady() { return ready; }
        public Map<String, Health> getDependencyStatus() { return dependencyStatus; }
        public List<String> getFailedDependencies() { return failedDependencies; }
    }
}

@Component
public class ApplicationLivenessChecker {
    
    private final AtomicBoolean alive = new AtomicBoolean(true);
    
    @EventListener
    public void handleContextClosedEvent(ContextClosedEvent event) {
        alive.set(false);
    }
    
    public boolean check() {
        // Check if application context is still alive
        if (!alive.get()) {
            return false;
        }
        
        // Additional liveness checks
        return checkMemoryUsage() && checkThreads();
    }
    
    private boolean checkMemoryUsage() {
        MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
        MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
        
        double usagePercentage = (double) heapUsage.getUsed() / heapUsage.getMax();
        
        // Fail liveness if memory usage is above 95%
        return usagePercentage < 0.95;
    }
    
    private boolean checkThreads() {
        ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
        int threadCount = threadBean.getThreadCount();
        
        // Fail liveness if thread count is abnormally high
        return threadCount < 1000;
    }
}

AWS Load Balancer Health Checks

Application Load Balancer Configuration

java
@RestController
@RequestMapping("/health")
public class ALBHealthController {
    
    private final HealthService healthService;
    
    public ALBHealthController(HealthService healthService) {
        this.healthService = healthService;
    }
    
    @GetMapping("/alb")
    public ResponseEntity<String> albHealthCheck(
            @RequestHeader(value = "User-Agent", required = false) String userAgent) {
        
        // AWS ALB health check user agent
        if ("ELB-HealthChecker/2.0".equals(userAgent)) {
            log.debug("ALB health check request received");
        }
        
        try {
            HealthStatus status = healthService.getOverallHealth();
            
            if (status.isHealthy()) {
                return ResponseEntity.ok("OK");
            } else {
                return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                    .body("UNHEALTHY: " + status.getMessage());
            }
            
        } catch (Exception e) {
            log.error("Health check failed", e);
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                .body("ERROR: " + e.getMessage());
        }
    }
    
    @GetMapping("/detailed")
    public ResponseEntity<Map<String, Object>> detailedHealthCheck() {
        Map<String, Object> health = healthService.getDetailedHealth();
        
        boolean isHealthy = (Boolean) health.getOrDefault("healthy", false);
        
        if (isHealthy) {
            return ResponseEntity.ok(health);
        } else {
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                .body(health);
        }
    }
}

Health Service Implementation

java
@Service
public class HealthService {
    
    private final DataSource dataSource;
    private final RedisTemplate<String, Object> redisTemplate;
    private final RestTemplate restTemplate;
    
    @Value("${health.check.timeout:5000}")
    private long healthCheckTimeout;
    
    public HealthService(DataSource dataSource,
                        RedisTemplate<String, Object> redisTemplate,
                        RestTemplate restTemplate) {
        this.dataSource = dataSource;
        this.redisTemplate = redisTemplate;
        this.restTemplate = restTemplate;
    }
    
    public HealthStatus getOverallHealth() {
        List<CompletableFuture<HealthCheck>> healthChecks = Arrays.asList(
            checkDatabaseAsync(),
            checkRedisAsync(),
            checkExternalServicesAsync()
        );
        
        try {
            CompletableFuture.allOf(healthChecks.toArray(new CompletableFuture[0]))
                .get(healthCheckTimeout, TimeUnit.MILLISECONDS);
            
            List<HealthCheck> results = healthChecks.stream()
                .map(CompletableFuture::join)
                .collect(Collectors.toList());
            
            boolean allHealthy = results.stream().allMatch(HealthCheck::isHealthy);
            
            if (allHealthy) {
                return HealthStatus.healthy();
            } else {
                String failedChecks = results.stream()
                    .filter(check -> !check.isHealthy())
                    .map(HealthCheck::getName)
                    .collect(Collectors.joining(", "));
                    
                return HealthStatus.unhealthy("Failed checks: " + failedChecks);
            }
            
        } catch (TimeoutException e) {
            return HealthStatus.unhealthy("Health check timeout");
        } catch (Exception e) {
            return HealthStatus.unhealthy("Health check error: " + e.getMessage());
        }
    }
    
    public Map<String, Object> getDetailedHealth() {
        Map<String, Object> health = new HashMap<>();
        health.put("timestamp", Instant.now());
        health.put("service", "spring-boot-app");
        health.put("version", getClass().getPackage().getImplementationVersion());
        
        Map<String, Object> checks = new HashMap<>();
        
        // Database check
        HealthCheck dbCheck = checkDatabase();
        checks.put("database", Map.of(
            "status", dbCheck.isHealthy() ? "UP" : "DOWN",
            "responseTime", dbCheck.getResponseTime() + "ms",
            "details", dbCheck.getDetails()
        ));
        
        // Redis check
        HealthCheck redisCheck = checkRedis();
        checks.put("redis", Map.of(
            "status", redisCheck.isHealthy() ? "UP" : "DOWN",
            "responseTime", redisCheck.getResponseTime() + "ms",
            "details", redisCheck.getDetails()
        ));
        
        health.put("checks", checks);
        
        boolean allHealthy = checks.values().stream()
            .allMatch(check -> "UP".equals(((Map<String, Object>) check).get("status")));
            
        health.put("healthy", allHealthy);
        health.put("status", allHealthy ? "UP" : "DOWN");
        
        return health;
    }
    
    private CompletableFuture<HealthCheck> checkDatabaseAsync() {
        return CompletableFuture.supplyAsync(this::checkDatabase);
    }
    
    private CompletableFuture<HealthCheck> checkRedisAsync() {
        return CompletableFuture.supplyAsync(this::checkRedis);
    }
    
    private CompletableFuture<HealthCheck> checkExternalServicesAsync() {
        return CompletableFuture.supplyAsync(this::checkExternalServices);
    }
    
    private HealthCheck checkDatabase() {
        long startTime = System.currentTimeMillis();
        
        try (Connection connection = dataSource.getConnection()) {
            boolean isValid = connection.isValid(3);
            long responseTime = System.currentTimeMillis() - startTime;
            
            if (isValid) {
                return HealthCheck.healthy("database", responseTime, 
                    Map.of("driver", connection.getMetaData().getDriverName()));
            } else {
                return HealthCheck.unhealthy("database", responseTime, 
                    Map.of("error", "Connection not valid"));
            }
            
        } catch (SQLException e) {
            long responseTime = System.currentTimeMillis() - startTime;
            return HealthCheck.unhealthy("database", responseTime, 
                Map.of("error", e.getMessage()));
        }
    }
    
    private HealthCheck checkRedis() {
        long startTime = System.currentTimeMillis();
        
        try {
            String response = redisTemplate.getConnectionFactory()
                .getConnection()
                .ping();
            
            long responseTime = System.currentTimeMillis() - startTime;
            
            if ("PONG".equals(response)) {
                return HealthCheck.healthy("redis", responseTime, 
                    Map.of("response", response));
            } else {
                return HealthCheck.unhealthy("redis", responseTime, 
                    Map.of("error", "Unexpected response: " + response));
            }
            
        } catch (Exception e) {
            long responseTime = System.currentTimeMillis() - startTime;
            return HealthCheck.unhealthy("redis", responseTime, 
                Map.of("error", e.getMessage()));
        }
    }
    
    private HealthCheck checkExternalServices() {
        // Implementation for external service checks
        return HealthCheck.healthy("external-services", 100, Map.of());
    }
    
    // Helper classes
    public static class HealthStatus {
        private final boolean healthy;
        private final String message;
        
        private HealthStatus(boolean healthy, String message) {
            this.healthy = healthy;
            this.message = message;
        }
        
        public static HealthStatus healthy() {
            return new HealthStatus(true, "All systems operational");
        }
        
        public static HealthStatus unhealthy(String message) {
            return new HealthStatus(false, message);
        }
        
        public boolean isHealthy() { return healthy; }
        public String getMessage() { return message; }
    }
    
    public static class HealthCheck {
        private final String name;
        private final boolean healthy;
        private final long responseTime;
        private final Map<String, Object> details;
        
        private HealthCheck(String name, boolean healthy, long responseTime, 
                          Map<String, Object> details) {
            this.name = name;
            this.healthy = healthy;
            this.responseTime = responseTime;
            this.details = details;
        }
        
        public static HealthCheck healthy(String name, long responseTime, 
                                        Map<String, Object> details) {
            return new HealthCheck(name, true, responseTime, details);
        }
        
        public static HealthCheck unhealthy(String name, long responseTime, 
                                          Map<String, Object> details) {
            return new HealthCheck(name, false, responseTime, details);
        }
        
        // Getters...
        public String getName() { return name; }
        public boolean isHealthy() { return healthy; }
        public long getResponseTime() { return responseTime; }
        public Map<String, Object> getDetails() { return details; }
    }
}

This comprehensive health check implementation provides robust monitoring capabilities for Spring Boot applications across various deployment environments including Kubernetes and AWS.

Created by Eren Demir.