Skip to content

Metrics (Prometheus, Grafana) - Metric Collection and Monitoring

Collecting and visualizing metrics is critical for monitoring application performance and system health.

Spring Boot Actuator Metrics

Micrometer Integration

Micrometer provides vendor-neutral metrics facade for integration with various monitoring systems:

xml
<!-- pom.xml - Micrometer dependencies -->
<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-actuator</artifactId>
    </dependency>
    <dependency>
        <groupId>io.micrometer</groupId>
        <artifactId>micrometer-registry-prometheus</artifactId>
    </dependency>
    <dependency>
        <groupId>io.micrometer</groupId>
        <artifactId>micrometer-tracing-bridge-brave</artifactId>
    </dependency>
</dependencies>

Metrics Configuration

yaml
# application.yml - Metrics configuration
management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,prometheus
  endpoint:
    health:
      show-details: always
    metrics:
      enabled: true
    prometheus:
      enabled: true
  metrics:
    export:
      prometheus:
        enabled: true
        descriptions: true
        step: 10s
    web:
      server:
        request:
          autotime:
            enabled: true
            percentiles: [0.5, 0.9, 0.95, 0.99]
    distribution:
      percentiles-histogram:
        http.server.requests: true
      sla:
        http.server.requests: 50ms,100ms,200ms,500ms,1s,2s
    tags:
      application: ${spring.application.name}
      instance: ${spring.cloud.client.hostname:${spring.application.name}}
      version: ${app.version:unknown}

Custom Metrics Service

java
@Service
@Component
public class ApplicationMetricsService {
    
    private final MeterRegistry meterRegistry;
    private final Counter orderCreatedCounter;
    private final Counter orderFailedCounter;
    private final Timer orderProcessingTimer;
    private final Gauge activeUsersGauge;
    private final DistributionSummary orderAmountSummary;
    
    private final AtomicInteger activeUsers = new AtomicInteger(0);
    
    public ApplicationMetricsService(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        
        // Counter metrics
        this.orderCreatedCounter = Counter.builder("orders.created.total")
                .description("Total number of orders created")
                .tag("status", "success")
                .register(meterRegistry);
                
        this.orderFailedCounter = Counter.builder("orders.failed.total")
                .description("Total number of failed orders")
                .register(meterRegistry);
        
        // Timer metrics
        this.orderProcessingTimer = Timer.builder("order.processing.duration")
                .description("Time spent processing orders")
                .publishPercentiles(0.5, 0.9, 0.95, 0.99)
                .register(meterRegistry);
        
        // Gauge metrics
        this.activeUsersGauge = Gauge.builder("users.active.current")
                .description("Current number of active users")
                .register(meterRegistry, activeUsers, AtomicInteger::get);
        
        // Distribution Summary
        this.orderAmountSummary = DistributionSummary.builder("order.amount")
                .description("Distribution of order amounts")
                .baseUnit("currency")
                .publishPercentiles(0.5, 0.9, 0.95, 0.99)
                .register(meterRegistry);
    }
    
    public void incrementOrderCreated(String customerType, String paymentMethod) {
        orderCreatedCounter.increment(
                Tags.of(
                    Tag.of("customer.type", customerType),
                    Tag.of("payment.method", paymentMethod)
                )
        );
    }
    
    public void incrementOrderFailed(String reason, String customerType) {
        orderFailedCounter.increment(
                Tags.of(
                    Tag.of("failure.reason", reason),
                    Tag.of("customer.type", customerType)
                )
        );
    }
    
    public Timer.Sample startOrderProcessing() {
        return Timer.start(meterRegistry);
    }
    
    public void recordOrderProcessingTime(Timer.Sample sample, String orderType) {
        sample.stop(Timer.builder("order.processing.duration")
                .tag("order.type", orderType)
                .register(meterRegistry));
    }
    
    public void recordOrderAmount(double amount, String currency) {
        orderAmountSummary.record(amount, 
                Tags.of(Tag.of("currency", currency)));
    }
    
    public void incrementActiveUsers() {
        activeUsers.incrementAndGet();
    }
    
    public void decrementActiveUsers() {
        activeUsers.decrementAndGet();
    }
    
    // Business metrics
    public void recordBusinessMetric(String metricName, double value, String... tags) {
        Gauge.builder(metricName)
                .tags(tags)
                .register(meterRegistry, value, (v) -> v);
    }
}

Metrics Aspects

java
@Aspect
@Component
public class MetricsAspect {
    
    private final MeterRegistry meterRegistry;
    
    public MetricsAspect(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
    }
    
    @Around("@annotation(Timed)")
    public Object measureExecutionTime(ProceedingJoinPoint joinPoint, Timed timed) throws Throwable {
        String methodName = joinPoint.getSignature().getName();
        String className = joinPoint.getTarget().getClass().getSimpleName();
        
        return Timer.builder("method.execution.time")
                .description("Method execution time")
                .tag("class", className)
                .tag("method", methodName)
                .register(meterRegistry)
                .recordCallable(() -> {
                    try {
                        return joinPoint.proceed();
                    } catch (Throwable throwable) {
                        throw new RuntimeException(throwable);
                    }
                });
    }
    
    @Around("@annotation(Counted)")
    public Object countMethodCalls(ProceedingJoinPoint joinPoint, Counted counted) throws Throwable {
        String methodName = joinPoint.getSignature().getName();
        String className = joinPoint.getTarget().getClass().getSimpleName();
        
        Counter counter = Counter.builder("method.calls.total")
                .description("Method call count")
                .tag("class", className)
                .tag("method", methodName)
                .register(meterRegistry);
        
        try {
            Object result = joinPoint.proceed();
            counter.increment(Tags.of(Tag.of("result", "success")));
            return result;
        } catch (Exception e) {
            counter.increment(Tags.of(Tag.of("result", "error")));
            throw e;
        }
    }
}

Prometheus Integration

Prometheus Configuration

yaml
# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "alert_rules.yml"

scrape_configs:
  - job_name: 'spring-boot-apps'
    static_configs:
      - targets: ['localhost:8080', 'localhost:8081']
    metrics_path: '/actuator/prometheus'
    scrape_interval: 5s
    scrape_timeout: 3s
    
  - job_name: 'kubernetes-pods'
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        target_label: __address__

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

Alert Rules

yaml
# alert_rules.yml
groups:
  - name: application_alerts
    rules:
      - alert: HighErrorRate
        expr: rate(http_server_requests_total{status=~"5.."}[5m]) > 0.1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "High error rate detected"
          description: "Error rate is {{ $value }} errors per second"
          
      - alert: HighResponseTime
        expr: histogram_quantile(0.95, rate(http_server_requests_duration_seconds_bucket[5m])) > 1
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: "High response time detected"
          description: "95th percentile response time is {{ $value }} seconds"
          
      - alert: DatabaseConnectionPoolExhausted
        expr: hikaricp_connections_active / hikaricp_connections_max > 0.9
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Database connection pool nearly exhausted"
          description: "{{ $value }}% of database connections are in use"
          
      - alert: JVMMemoryHigh
        expr: jvm_memory_used_bytes / jvm_memory_max_bytes > 0.9
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "JVM memory usage high"
          description: "JVM memory usage is {{ $value }}%"
          
      - alert: OrderProcessingFailureRateHigh
        expr: rate(orders_failed_total[5m]) / rate(orders_created_total[5m]) > 0.05
        for: 3m
        labels:
          severity: critical
        annotations:
          summary: "Order processing failure rate high"
          description: "Order failure rate is {{ $value }}%"

Docker Compose Setup

yaml
# docker-compose.yml - Prometheus & Grafana
version: '3.8'
services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - ./alert_rules.yml:/etc/prometheus/alert_rules.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
      - '--web.enable-lifecycle'
      - '--storage.tsdb.retention.time=30d'

  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/var/lib/grafana/dashboards
      - ./grafana/provisioning:/etc/grafana/provisioning

  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    ports:
      - "9093:9093"
    volumes:
      - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml

volumes:
  prometheus_data:
  grafana_data:

Grafana Dashboards

Application Overview Dashboard

json
{
  "dashboard": {
    "title": "Spring Boot Application Overview",
    "panels": [
      {
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_server_requests_total[5m])",
            "legendFormat": "{{method}} {{uri}}"
          }
        ]
      },
      {
        "title": "Response Time Percentiles",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.50, rate(http_server_requests_duration_seconds_bucket[5m]))",
            "legendFormat": "50th percentile"
          },
          {
            "expr": "histogram_quantile(0.95, rate(http_server_requests_duration_seconds_bucket[5m]))",
            "legendFormat": "95th percentile"
          },
          {
            "expr": "histogram_quantile(0.99, rate(http_server_requests_duration_seconds_bucket[5m]))",
            "legendFormat": "99th percentile"
          }
        ]
      },
      {
        "title": "Error Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_server_requests_total{status=~\"4..|5..\"}[5m])",
            "legendFormat": "{{status}}"
          }
        ]
      },
      {
        "title": "JVM Memory Usage",
        "type": "graph",
        "targets": [
          {
            "expr": "jvm_memory_used_bytes{area=\"heap\"} / 1024 / 1024",
            "legendFormat": "Heap Used (MB)"
          },
          {
            "expr": "jvm_memory_max_bytes{area=\"heap\"} / 1024 / 1024",
            "legendFormat": "Heap Max (MB)"
          }
        ]
      }
    ]
  }
}

Business Metrics Dashboard

json
{
  "dashboard": {
    "title": "Business Metrics Dashboard",
    "panels": [
      {
        "title": "Orders Created per Minute",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(orders_created_total[1m]) * 60",
            "legendFormat": "Orders/min"
          }
        ]
      },
      {
        "title": "Order Processing Time",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, rate(order_processing_duration_bucket[5m]))",
            "legendFormat": "95th percentile"
          }
        ]
      },
      {
        "title": "Active Users",
        "type": "singlestat",
        "targets": [
          {
            "expr": "users_active_current",
            "legendFormat": "Active Users"
          }
        ]
      },
      {
        "title": "Revenue per Hour",
        "type": "graph",
        "targets": [
          {
            "expr": "increase(order_amount_sum[1h])",
            "legendFormat": "Revenue/hour"
          }
        ]
      }
    ]
  }
}

Infrastructure Monitoring

java
@Component
public class InfrastructureMetrics {
    
    private final MeterRegistry meterRegistry;
    
    public InfrastructureMetrics(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        initializeSystemMetrics();
    }
    
    private void initializeSystemMetrics() {
        // CPU usage
        Gauge.builder("system.cpu.usage")
                .register(meterRegistry, this, InfrastructureMetrics::getCpuUsage);
        
        // Disk usage
        Gauge.builder("system.disk.usage")
                .tag("path", "/")
                .register(meterRegistry, this, InfrastructureMetrics::getDiskUsage);
        
        // Network I/O
        Gauge.builder("system.network.bytes.sent")
                .register(meterRegistry, this, InfrastructureMetrics::getNetworkBytesSent);
        
        // Database connection pool
        Gauge.builder("database.connections.active")
                .register(meterRegistry, this, InfrastructureMetrics::getActiveConnections);
    }
    
    private double getCpuUsage(InfrastructureMetrics metrics) {
        OperatingSystemMXBean osBean = ManagementFactory.getOperatingSystemMXBean();
        if (osBean instanceof com.sun.management.OperatingSystemMXBean) {
            return ((com.sun.management.OperatingSystemMXBean) osBean).getProcessCpuLoad();
        }
        return 0.0;
    }
    
    private double getDiskUsage(InfrastructureMetrics metrics) {
        File root = new File("/");
        long totalSpace = root.getTotalSpace();
        long usableSpace = root.getUsableSpace();
        return (double) (totalSpace - usableSpace) / totalSpace;
    }
    
    private double getNetworkBytesSent(InfrastructureMetrics metrics) {
        // Network metrics implementation
        return 0.0;
    }
    
    private double getActiveConnections(InfrastructureMetrics metrics) {
        // Database connection pool metrics
        return 0.0;
    }
}

This English metrics documentation comprehensively covers Spring Boot Actuator, Micrometer, Prometheus and Grafana integration, providing a production-ready monitoring solution.

Created by Eren Demir.