Skip to content

5.2. Rate Limiting & Throttling

Rate Limiting Strategies

Token Bucket

  • Burst handling
  • Rate configuration
  • Token distribution
  • Overflow handling

Leaky Bucket

  • Constant rate processing
  • Queue management
  • Overflow handling
  • Rate smoothing

Fixed Window

  • Time-based limits
  • Request counting
  • Reset intervals
  • Simple implementation

Throttling Mechanisms

Request Throttling

  • Concurrent request limits
  • Queue depth monitoring
  • Rejection policies
  • Retry mechanisms

Resource Throttling

  • CPU/Memory limits
  • Connection pool limits
  • Thread pool management
  • Database connection limits

Implementation Tools

NGINX

  • limit_req_zone
  • limit_conn_zone
  • Custom rate limiting
  • IP-based limiting

Kong

  • Rate limiting plugin
  • Response rate limiting
  • Custom policies
  • Cluster-wide limiting

AWS API Gateway

  • Usage plans
  • API keys
  • Throttling settings
  • Burst handling

Spring Boot Rate Limiting Implementation

Redis-Based Token Bucket Implementation

java
@Component
@Slf4j
public class RedisTokenBucketRateLimiter {
    
    @Autowired
    private StringRedisTemplate redisTemplate;
    
    @Value("${rate-limiter.default-capacity:100}")
    private int defaultCapacity;
    
    @Value("${rate-limiter.default-tokens-per-second:10}")
    private int defaultTokensPerSecond;
    
    public boolean tryConsume(String key, int tokens) {
        return tryConsume(key, tokens, defaultCapacity, defaultTokensPerSecond);
    }
    
    public boolean tryConsume(String key, int tokens, int capacity, int tokensPerSecond) {
        String script = """
            local key = KEYS[1]
            local capacity = tonumber(ARGV[1])
            local tokens = tonumber(ARGV[2])
            local interval = tonumber(ARGV[3])
            local requested = tonumber(ARGV[4])
            
            local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
            local current_tokens = tonumber(bucket[1]) or capacity
            local last_refill = tonumber(bucket[2]) or redis.call('TIME')[1]
            
            local now = redis.call('TIME')[1]
            local elapsed = now - last_refill
            
            -- Refill tokens based on elapsed time
            local new_tokens = math.min(capacity, current_tokens + (elapsed * tokens / interval))
            
            if new_tokens >= requested then
                new_tokens = new_tokens - requested
                redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
                redis.call('EXPIRE', key, interval * 2)
                return 1
            else
                redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
                redis.call('EXPIRE', key, interval * 2)
                return 0
            end
        """;
        
        List<String> keys = Arrays.asList(key);
        List<String> args = Arrays.asList(
            String.valueOf(capacity),
            String.valueOf(tokensPerSecond),
            "1", // 1 second interval
            String.valueOf(tokens)
        );
        
        Object result = redisTemplate.execute(
            (RedisCallback<Object>) connection -> 
                connection.eval(script.getBytes(), ReturnType.INTEGER,
                    keys.size(), keys.toArray(new String[0]),
                    args.toArray(new String[0]))
        );
        
        return Long.valueOf(1).equals(result);
    }
    
    public RateLimitInfo getRateLimitInfo(String key) {
        return getRateLimitInfo(key, defaultCapacity, defaultTokensPerSecond);
    }
    
    public RateLimitInfo getRateLimitInfo(String key, int capacity, int tokensPerSecond) {
        String script = """
            local key = KEYS[1]
            local capacity = tonumber(ARGV[1])
            local tokens_per_second = tonumber(ARGV[2])
            
            local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
            local current_tokens = tonumber(bucket[1]) or capacity
            local last_refill = tonumber(bucket[2]) or redis.call('TIME')[1]
            
            local now = redis.call('TIME')[1]
            local elapsed = now - last_refill
            
            local new_tokens = math.min(capacity, current_tokens + (elapsed * tokens_per_second))
            
            return {new_tokens, capacity, now, last_refill}
        """;
        
        List<String> keys = Arrays.asList(key);
        List<String> args = Arrays.asList(
            String.valueOf(capacity),
            String.valueOf(tokensPerSecond)
        );
        
        @SuppressWarnings("unchecked")
        List<Object> result = (List<Object>) redisTemplate.execute(
            (RedisCallback<Object>) connection -> 
                connection.eval(script.getBytes(), ReturnType.MULTI,
                    keys.size(), keys.toArray(new String[0]),
                    args.toArray(new String[0]))
        );
        
        if (result != null && result.size() >= 4) {
            double currentTokens = ((Number) result.get(0)).doubleValue();
            int maxCapacity = ((Number) result.get(1)).intValue();
            long now = ((Number) result.get(2)).longValue();
            long lastRefill = ((Number) result.get(3)).longValue();
            
            return new RateLimitInfo(
                (int) currentTokens,
                maxCapacity,
                tokensPerSecond,
                Instant.ofEpochSecond(now),
                Instant.ofEpochSecond(lastRefill)
            );
        }
        
        return new RateLimitInfo(capacity, capacity, tokensPerSecond, Instant.now(), Instant.now());
    }
}

@Data
@AllArgsConstructor
public class RateLimitInfo {
    private int currentTokens;
    private int capacity;
    private int tokensPerSecond;
    private Instant currentTime;
    private Instant lastRefill;
    
    public int getSecondsUntilRefill() {
        if (currentTokens >= capacity) {
            return 0;
        }
        int tokensNeeded = capacity - currentTokens;
        return (int) Math.ceil((double) tokensNeeded / tokensPerSecond);
    }
}

Sliding Window Rate Limiter

java
@Component
@Slf4j
public class SlidingWindowRateLimiter {
    
    @Autowired
    private StringRedisTemplate redisTemplate;
    
    public boolean isAllowed(String key, int limit, Duration window) {
        long windowSizeMs = window.toMillis();
        long currentTimeMs = System.currentTimeMillis();
        long windowStart = currentTimeMs - windowSizeMs;
        
        String script = """
            local key = KEYS[1]
            local window_start = tonumber(ARGV[1])
            local current_time = tonumber(ARGV[2])
            local limit = tonumber(ARGV[3])
            
            -- Remove old entries outside the window
            redis.call('ZREMRANGEBYSCORE', key, 0, window_start)
            
            -- Count current requests in the window
            local current_count = redis.call('ZCARD', key)
            
            if current_count < limit then
                -- Add current request
                redis.call('ZADD', key, current_time, current_time)
                redis.call('EXPIRE', key, math.ceil((current_time - window_start) / 1000))
                return {1, current_count + 1, limit}
            else
                return {0, current_count, limit}
            end
        """;
        
        List<String> keys = Arrays.asList(key);
        List<String> args = Arrays.asList(
            String.valueOf(windowStart),
            String.valueOf(currentTimeMs),
            String.valueOf(limit)
        );
        
        @SuppressWarnings("unchecked")
        List<Object> result = (List<Object>) redisTemplate.execute(
            (RedisCallback<Object>) connection -> 
                connection.eval(script.getBytes(), ReturnType.MULTI,
                    keys.size(), keys.toArray(new String[0]),
                    args.toArray(new String[0]))
        );
        
        if (result != null && result.size() >= 3) {
            boolean allowed = Long.valueOf(1).equals(result.get(0));
            long currentCount = ((Number) result.get(1)).longValue();
            long maxLimit = ((Number) result.get(2)).longValue();
            
            log.debug("Rate limit check for key {}: allowed={}, count={}/{}", 
                key, allowed, currentCount, maxLimit);
                
            return allowed;
        }
        
        return false;
    }
    
    public RateLimitStatus getRateLimitStatus(String key, Duration window) {
        long windowSizeMs = window.toMillis();
        long currentTimeMs = System.currentTimeMillis();
        long windowStart = currentTimeMs - windowSizeMs;
        
        String script = """
            local key = KEYS[1]
            local window_start = tonumber(ARGV[1])
            
            -- Remove old entries
            redis.call('ZREMRANGEBYSCORE', key, 0, window_start)
            
            -- Get current count and oldest entry
            local current_count = redis.call('ZCARD', key)
            local oldest_entries = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
            
            local oldest_timestamp = nil
            if #oldest_entries > 0 then
                oldest_timestamp = oldest_entries[2]
            end
            
            return {current_count, oldest_timestamp}
        """;
        
        List<String> keys = Arrays.asList(key);
        List<String> args = Arrays.asList(String.valueOf(windowStart));
        
        @SuppressWarnings("unchecked")
        List<Object> result = (List<Object>) redisTemplate.execute(
            (RedisCallback<Object>) connection -> 
                connection.eval(script.getBytes(), ReturnType.MULTI,
                    keys.size(), keys.toArray(new String[0]),
                    args.toArray(new String[0]))
        );
        
        if (result != null && result.size() >= 2) {
            long currentCount = ((Number) result.get(0)).longValue();
            Long oldestTimestamp = result.get(1) != null ? 
                ((Number) result.get(1)).longValue() : null;
            
            return new RateLimitStatus(
                (int) currentCount,
                oldestTimestamp != null ? Instant.ofEpochMilli(oldestTimestamp) : null,
                Instant.ofEpochMilli(currentTimeMs),
                window
            );
        }
        
        return new RateLimitStatus(0, null, Instant.ofEpochMilli(currentTimeMs), window);
    }
}

@Data
@AllArgsConstructor
public class RateLimitStatus {
    private int currentCount;
    private Instant oldestRequest;
    private Instant currentTime;
    private Duration windowSize;
    
    public Instant getWindowResetTime() {
        if (oldestRequest != null) {
            return oldestRequest.plus(windowSize);
        }
        return currentTime.plus(windowSize);
    }
    
    public Duration getTimeUntilReset() {
        Instant resetTime = getWindowResetTime();
        return Duration.between(currentTime, resetTime);
    }
}

Rate Limiting Interceptor

java
@Component
@Order(1)
public class RateLimitingInterceptor implements HandlerInterceptor {
    
    @Autowired
    private RedisTokenBucketRateLimiter tokenBucketLimiter;
    
    @Autowired
    private SlidingWindowRateLimiter slidingWindowLimiter;
    
    @Autowired
    private RateLimitConfigurationService configService;
    
    @Override
    public boolean preHandle(HttpServletRequest request, HttpServletResponse response, 
                           Object handler) throws Exception {
        
        // Skip rate limiting for health checks
        if (request.getRequestURI().startsWith("/actuator/health")) {
            return true;
        }
        
        String clientId = getClientIdentifier(request);
        String endpoint = getEndpointIdentifier(request);
        
        // Get rate limit configuration
        RateLimitConfig config = configService.getRateLimitConfig(clientId, endpoint);
        
        if (config == null) {
            return true; // No rate limiting configured
        }
        
        boolean allowed = false;
        RateLimitResult result = null;
        
        switch (config.getStrategy()) {
            case TOKEN_BUCKET:
                allowed = tokenBucketLimiter.tryConsume(
                    "rate_limit:tb:" + clientId + ":" + endpoint,
                    1,
                    config.getCapacity(),
                    config.getTokensPerSecond()
                );
                
                if (!allowed) {
                    RateLimitInfo info = tokenBucketLimiter.getRateLimitInfo(
                        "rate_limit:tb:" + clientId + ":" + endpoint,
                        config.getCapacity(),
                        config.getTokensPerSecond()
                    );
                    result = new RateLimitResult(false, info.getCurrentTokens(), 
                        config.getCapacity(), info.getSecondsUntilRefill());
                }
                break;
                
            case SLIDING_WINDOW:
                allowed = slidingWindowLimiter.isAllowed(
                    "rate_limit:sw:" + clientId + ":" + endpoint,
                    config.getLimit(),
                    config.getWindow()
                );
                
                if (!allowed) {
                    RateLimitStatus status = slidingWindowLimiter.getRateLimitStatus(
                        "rate_limit:sw:" + clientId + ":" + endpoint,
                        config.getWindow()
                    );
                    result = new RateLimitResult(false, status.getCurrentCount(),
                        config.getLimit(), (int) status.getTimeUntilReset().getSeconds());
                }
                break;
        }
        
        if (!allowed) {
            handleRateLimitExceeded(response, result, config);
            return false;
        }
        
        // Add rate limit headers
        addRateLimitHeaders(response, config, clientId, endpoint);
        
        return true;
    }
    
    private String getClientIdentifier(HttpServletRequest request) {
        // Try API key first
        String apiKey = request.getHeader("X-API-Key");
        if (apiKey != null) {
            return "api_key:" + apiKey;
        }
        
        // Try authenticated user
        String userId = request.getHeader("X-User-ID");
        if (userId != null) {
            return "user:" + userId;
        }
        
        // Fall back to IP address
        String clientIp = getClientIpAddress(request);
        return "ip:" + clientIp;
    }
    
    private String getClientIpAddress(HttpServletRequest request) {
        String xForwardedFor = request.getHeader("X-Forwarded-For");
        if (xForwardedFor != null && !xForwardedFor.isEmpty()) {
            return xForwardedFor.split(",")[0].trim();
        }
        
        String xRealIp = request.getHeader("X-Real-IP");
        if (xRealIp != null && !xRealIp.isEmpty()) {
            return xRealIp;
        }
        
        return request.getRemoteAddr();
    }
    
    private String getEndpointIdentifier(HttpServletRequest request) {
        String method = request.getMethod();
        String path = request.getRequestURI();
        
        // Normalize path (remove path parameters, query parameters)
        path = path.replaceAll("/\\d+", "/{id}");
        path = path.replaceAll("\\?.*", "");
        
        return method + ":" + path;
    }
    
    private void handleRateLimitExceeded(HttpServletResponse response, 
                                       RateLimitResult result, 
                                       RateLimitConfig config) throws IOException {
        response.setStatus(HttpStatus.TOO_MANY_REQUESTS.value());
        response.setContentType("application/json");
        response.setHeader("Retry-After", String.valueOf(result.getRetryAfterSeconds()));
        
        RateLimitErrorResponse errorResponse = new RateLimitErrorResponse(
            "Rate limit exceeded",
            "Too many requests. Current: " + result.getCurrentCount() + 
            ", Limit: " + result.getLimit(),
            result.getRetryAfterSeconds(),
            config.getStrategy().name().toLowerCase()
        );
        
        ObjectMapper objectMapper = new ObjectMapper();
        response.getWriter().write(objectMapper.writeValueAsString(errorResponse));
    }
    
    private void addRateLimitHeaders(HttpServletResponse response, 
                                   RateLimitConfig config, 
                                   String clientId, 
                                   String endpoint) {
        try {
            switch (config.getStrategy()) {
                case TOKEN_BUCKET:
                    RateLimitInfo info = tokenBucketLimiter.getRateLimitInfo(
                        "rate_limit:tb:" + clientId + ":" + endpoint,
                        config.getCapacity(),
                        config.getTokensPerSecond()
                    );
                    response.setHeader("X-RateLimit-Limit", String.valueOf(config.getCapacity()));
                    response.setHeader("X-RateLimit-Remaining", String.valueOf(info.getCurrentTokens()));
                    response.setHeader("X-RateLimit-Reset", String.valueOf(info.getSecondsUntilRefill()));
                    break;
                    
                case SLIDING_WINDOW:
                    RateLimitStatus status = slidingWindowLimiter.getRateLimitStatus(
                        "rate_limit:sw:" + clientId + ":" + endpoint,
                        config.getWindow()
                    );
                    response.setHeader("X-RateLimit-Limit", String.valueOf(config.getLimit()));
                    response.setHeader("X-RateLimit-Remaining", 
                        String.valueOf(config.getLimit() - status.getCurrentCount()));
                    response.setHeader("X-RateLimit-Reset", 
                        String.valueOf(status.getWindowResetTime().getEpochSecond()));
                    break;
            }
        } catch (Exception e) {
            log.warn("Failed to add rate limit headers", e);
        }
    }
}

@Data
@AllArgsConstructor
public class RateLimitResult {
    private boolean allowed;
    private int currentCount;
    private int limit;
    private int retryAfterSeconds;
}

@Data
@AllArgsConstructor
public class RateLimitErrorResponse {
    private String error;
    private String message;
    private int retryAfterSeconds;
    private String strategy;
}

Rate Limit Configuration Service

java
@Service
@Slf4j
public class RateLimitConfigurationService {
    
    @Autowired
    private RedisTemplate<String, Object> redisTemplate;
    
    private final Map<String, RateLimitConfig> defaultConfigs = new HashMap<>();
    
    @PostConstruct
    public void initializeDefaultConfigs() {
        // Default configurations
        defaultConfigs.put("default", new RateLimitConfig(
            RateLimitStrategy.TOKEN_BUCKET, 100, 10, Duration.ofMinutes(1)
        ));
        
        defaultConfigs.put("premium", new RateLimitConfig(
            RateLimitStrategy.TOKEN_BUCKET, 1000, 100, Duration.ofMinutes(1)
        ));
        
        defaultConfigs.put("public", new RateLimitConfig(
            RateLimitStrategy.SLIDING_WINDOW, 50, 0, Duration.ofMinutes(1)
        ));
        
        log.info("Rate limit configurations initialized: {}", defaultConfigs.keySet());
    }
    
    public RateLimitConfig getRateLimitConfig(String clientId, String endpoint) {
        // Try client-specific configuration first
        String clientConfigKey = "rate_limit:config:client:" + clientId;
        RateLimitConfig clientConfig = (RateLimitConfig) redisTemplate.opsForValue().get(clientConfigKey);
        if (clientConfig != null) {
            return clientConfig;
        }
        
        // Try endpoint-specific configuration
        String endpointConfigKey = "rate_limit:config:endpoint:" + endpoint;
        RateLimitConfig endpointConfig = (RateLimitConfig) redisTemplate.opsForValue().get(endpointConfigKey);
        if (endpointConfig != null) {
            return endpointConfig;
        }
        
        // Determine default based on client type
        if (clientId.startsWith("api_key:")) {
            return defaultConfigs.get("premium");
        } else if (clientId.startsWith("user:")) {
            return defaultConfigs.get("default");
        } else {
            return defaultConfigs.get("public");
        }
    }
    
    public void setClientRateLimit(String clientId, RateLimitConfig config) {
        String configKey = "rate_limit:config:client:" + clientId;
        redisTemplate.opsForValue().set(configKey, config, Duration.ofDays(1));
        log.info("Rate limit configuration set for client {}: {}", clientId, config);
    }
    
    public void setEndpointRateLimit(String endpoint, RateLimitConfig config) {
        String configKey = "rate_limit:config:endpoint:" + endpoint;
        redisTemplate.opsForValue().set(configKey, config, Duration.ofDays(1));
        log.info("Rate limit configuration set for endpoint {}: {}", endpoint, config);
    }
    
    public void removeClientRateLimit(String clientId) {
        String configKey = "rate_limit:config:client:" + clientId;
        redisTemplate.delete(configKey);
        log.info("Rate limit configuration removed for client: {}", clientId);
    }
}

@Data
@AllArgsConstructor
@NoArgsConstructor
public class RateLimitConfig implements Serializable {
    private RateLimitStrategy strategy;
    private int capacity; // For token bucket or limit for sliding window
    private int tokensPerSecond; // For token bucket only
    private Duration window; // For sliding window only
    
    // Convenience constructors
    public RateLimitConfig(int limit, Duration window) {
        this(RateLimitStrategy.SLIDING_WINDOW, limit, 0, window);
    }
    
    public RateLimitConfig(int capacity, int tokensPerSecond) {
        this(RateLimitStrategy.TOKEN_BUCKET, capacity, tokensPerSecond, Duration.ofMinutes(1));
    }
    
    public int getLimit() {
        return capacity;
    }
}

public enum RateLimitStrategy {
    TOKEN_BUCKET,
    SLIDING_WINDOW,
    FIXED_WINDOW
}

Custom Rate Limiting Annotations

java
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimit {
    int value() default 100; // requests per period
    int period() default 60; // period in seconds
    RateLimitStrategy strategy() default RateLimitStrategy.TOKEN_BUCKET;
    String key() default ""; // custom key expression
    String message() default "Rate limit exceeded";
}

@Aspect
@Component
@Slf4j
public class RateLimitingAspect {
    
    @Autowired
    private RedisTokenBucketRateLimiter tokenBucketLimiter;
    
    @Autowired
    private SlidingWindowRateLimiter slidingWindowLimiter;
    
    @Around("@annotation(rateLimit)")
    public Object handleRateLimit(ProceedingJoinPoint joinPoint, RateLimit rateLimit) throws Throwable {
        HttpServletRequest request = getCurrentRequest();
        if (request == null) {
            return joinPoint.proceed();
        }
        
        String key = buildRateLimitKey(joinPoint, rateLimit, request);
        boolean allowed = false;
        
        switch (rateLimit.strategy()) {
            case TOKEN_BUCKET:
                allowed = tokenBucketLimiter.tryConsume(key, 1, rateLimit.value(), 
                    rateLimit.value() / rateLimit.period());
                break;
                
            case SLIDING_WINDOW:
                allowed = slidingWindowLimiter.isAllowed(key, rateLimit.value(), 
                    Duration.ofSeconds(rateLimit.period()));
                break;
        }
        
        if (!allowed) {
            throw new RateLimitExceededException(rateLimit.message());
        }
        
        return joinPoint.proceed();
    }
    
    private String buildRateLimitKey(ProceedingJoinPoint joinPoint, RateLimit rateLimit, 
                                   HttpServletRequest request) {
        if (!rateLimit.key().isEmpty()) {
            // Use SpEL for custom key
            return evaluateSpelExpression(rateLimit.key(), joinPoint, request);
        }
        
        // Default key: method + client identifier
        String methodName = joinPoint.getSignature().getName();
        String clientId = getClientIdentifier(request);
        return "rate_limit:" + methodName + ":" + clientId;
    }
    
    private String evaluateSpelExpression(String expression, ProceedingJoinPoint joinPoint, 
                                        HttpServletRequest request) {
        SpelExpressionParser parser = new SpelExpressionParser();
        Expression exp = parser.parseExpression(expression);
        
        StandardEvaluationContext context = new StandardEvaluationContext();
        context.setVariable("request", request);
        context.setVariable("method", joinPoint.getSignature().getName());
        context.setVariable("args", joinPoint.getArgs());
        
        return exp.getValue(context, String.class);
    }
    
    private HttpServletRequest getCurrentRequest() {
        RequestAttributes requestAttributes = RequestContextHolder.getRequestAttributes();
        if (requestAttributes instanceof ServletRequestAttributes) {
            return ((ServletRequestAttributes) requestAttributes).getRequest();
        }
        return null;
    }
    
    private String getClientIdentifier(HttpServletRequest request) {
        String apiKey = request.getHeader("X-API-Key");
        if (apiKey != null) {
            return "api_key:" + apiKey;
        }
        
        String userId = request.getHeader("X-User-ID");
        if (userId != null) {
            return "user:" + userId;
        }
        
        return "ip:" + request.getRemoteAddr();
    }
}

@ResponseStatus(HttpStatus.TOO_MANY_REQUESTS)
public class RateLimitExceededException extends RuntimeException {
    public RateLimitExceededException(String message) {
        super(message);
    }
}

// Usage example
@RestController
@RequestMapping("/api/users")
public class RateLimitedUserController {
    
    @GetMapping("/{id}")
    @RateLimit(value = 100, period = 60, strategy = RateLimitStrategy.TOKEN_BUCKET)
    public ResponseEntity<User> getUser(@PathVariable Long id) {
        // Method implementation
        return ResponseEntity.ok(userService.findById(id));
    }
    
    @PostMapping
    @RateLimit(value = 10, period = 60, strategy = RateLimitStrategy.SLIDING_WINDOW,
               key = "'create_user:' + #request.getHeader('X-User-ID')")
    public ResponseEntity<User> createUser(@RequestBody UserRequest request) {
        // Method implementation
        return ResponseEntity.status(HttpStatus.CREATED).body(userService.create(request));
    }
}

This implementation integrates various rate limiting strategies with Spring Boot and Redis, providing production-ready solutions.

Created by Eren Demir.