Database Sharding and Partitioning - Spring Boot Data Management
Database sharding and partitioning are critical techniques used to optimize data management in large-scale applications. These approaches improve database performance, provide scalability, and enhance system resilience.
Sharding Overview
Sharding Strategies
Partitioning Types
Horizontal Sharding Strategies
1. Range-Based Sharding
Data is distributed based on value ranges of the shard key.
Advantages:
- Simple to implement and understand
- Range queries are efficient
- Natural data organization
Disadvantages:
- Risk of data hotspots
- Difficult to predict data distribution
- Manual rebalancing may be required
2. Hash-Based Sharding
Data is distributed using a hash function applied to the shard key.
Advantages:
- Uniform data distribution
- Automatic load balancing
- No hotspot issues
Disadvantages:
- Range queries require scanning all shards
- Resharding is complex
- Hash function changes affect all data
3. Directory-Based Sharding
A lookup service maintains the mapping between shard keys and shard locations.
Advantages:
- Flexible shard assignment
- Easy to add/remove shards
- Custom distribution logic
Disadvantages:
- Additional lookup overhead
- Directory service becomes a potential bottleneck
- Increased system complexity
yaml
# Sharding Strategies
strategies:
range_based:
description: "Distribute data based on key ranges"
example:
- key_range: "A-M"
shard: "shard_1"
users: ["Alice", "Bob", "Michael"]
- key_range: "N-Z"
shard: "shard_2"
users: ["Nancy", "Peter", "Zoe"]
hash_based:
description: "Use hash function for uniform distribution"
example:
hash_function: "user_id % 4"
shards: ["shard_0", "shard_1", "shard_2", "shard_3"]
distribution: "Even distribution regardless of user_id values"
directory_based:
description: "Lookup service manages shard mapping"
example:
lookup_service: "shard_coordinator"
mapping: "user_id -> shard_id"
flexibility: "Custom rules, geographic placement, load balancing"
Sharding Implementation with Spring Boot
1. Custom Sharding Configuration
java
@Configuration
@EnableJpaRepositories(
basePackages = "com.example.sharding.repository",
repositoryFactoryBeanClass = ShardingRepositoryFactoryBean.class
)
public class ShardingConfiguration {
@Bean
@Primary
public ShardingDataSource shardingDataSource() {
Map<String, DataSource> dataSourceMap = new HashMap<>();
// Shard 1 - Users A-M
dataSourceMap.put("shard1", createDataSource(
"jdbc:postgresql://shard1-db:5432/users",
"shard1_user", "shard1_pass"
));
// Shard 2 - Users N-Z
dataSourceMap.put("shard2", createDataSource(
"jdbc:postgresql://shard2-db:5432/users",
"shard2_user", "shard2_pass"
));
return new ShardingDataSource(dataSourceMap);
}
private DataSource createDataSource(String url, String username, String password) {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(url);
config.setUsername(username);
config.setPassword(password);
config.setMaximumPoolSize(20);
config.setMinimumIdle(5);
config.setConnectionTimeout(30000);
config.setIdleTimeout(600000);
config.setMaxLifetime(1800000);
return new HikariDataSource(config);
}
}
2. Sharding Strategy Interface
java
public interface ShardingStrategy {
String determineShard(Object shardingKey);
}
@Component
public class UserIdHashShardingStrategy implements ShardingStrategy {
private static final int SHARD_COUNT = 4;
@Override
public String determineShard(Object shardingKey) {
if (shardingKey instanceof Long) {
Long userId = (Long) shardingKey;
int shardIndex = Math.abs(userId.hashCode() % SHARD_COUNT);
return "shard" + shardIndex;
}
throw new IllegalArgumentException("Invalid sharding key type");
}
}
@Component
public class UserNameRangeShardingStrategy implements ShardingStrategy {
@Override
public String determineShard(Object shardingKey) {
if (shardingKey instanceof String) {
String username = (String) shardingKey;
char firstChar = Character.toLowerCase(username.charAt(0));
if (firstChar >= 'a' && firstChar <= 'm') {
return "shard1";
} else {
return "shard2";
}
}
throw new IllegalArgumentException("Invalid sharding key type");
}
}
3. Sharding Aware Repository
java
@Repository
public class ShardedUserRepository {
private final Map<String, JdbcTemplate> shardTemplates;
private final ShardingStrategy shardingStrategy;
public ShardedUserRepository(
ShardingDataSource shardingDataSource,
@Qualifier("userIdHashShardingStrategy") ShardingStrategy shardingStrategy) {
this.shardingStrategy = shardingStrategy;
this.shardTemplates = new HashMap<>();
shardingDataSource.getDataSources().forEach((shardName, dataSource) -> {
shardTemplates.put(shardName, new JdbcTemplate(dataSource));
});
}
public User findById(Long userId) {
String shardName = shardingStrategy.determineShard(userId);
JdbcTemplate template = shardTemplates.get(shardName);
try {
return template.queryForObject(
"SELECT * FROM users WHERE id = ?",
new Object[]{userId},
new UserRowMapper()
);
} catch (EmptyResultDataAccessException e) {
return null;
}
}
public void save(User user) {
String shardName = shardingStrategy.determineShard(user.getId());
JdbcTemplate template = shardTemplates.get(shardName);
template.update(
"INSERT INTO users (id, username, email, created_at) VALUES (?, ?, ?, ?)",
user.getId(), user.getUsername(), user.getEmail(), user.getCreatedAt()
);
}
public List<User> findAll() {
List<User> allUsers = new ArrayList<>();
// Parallel execution across shards
List<CompletableFuture<List<User>>> futures = shardTemplates.entrySet()
.stream()
.map(entry -> CompletableFuture.supplyAsync(() -> {
return entry.getValue().query(
"SELECT * FROM users ORDER BY created_at DESC",
new UserRowMapper()
);
}))
.collect(Collectors.toList());
futures.forEach(future -> {
try {
allUsers.addAll(future.get());
} catch (Exception e) {
log.error("Error fetching from shard", e);
}
});
return allUsers.stream()
.sorted(Comparator.comparing(User::getCreatedAt).reversed())
.collect(Collectors.toList());
}
}
Partitioning Strategies
1. Horizontal Partitioning (Sharding)
sql
-- Range-based partitioning
CREATE TABLE users_2023 (
id BIGSERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) PARTITION BY RANGE (created_at);
CREATE TABLE users_2023_q1 PARTITION OF users_2023
FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');
CREATE TABLE users_2023_q2 PARTITION OF users_2023
FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');
2. Vertical Partitioning
java
// Basic user information
@Entity
@Table(name = "user_profiles")
public class UserProfile {
@Id
private Long userId;
private String username;
private String email;
private LocalDateTime createdAt;
}
// Detailed user information (less accessed)
@Entity
@Table(name = "user_details")
public class UserDetail {
@Id
private Long userId;
private String bio;
private String preferences;
private byte[] profileImage;
}
Advanced Sharding Patterns
1. Consistent Hashing
java
@Component
public class ConsistentHashingStrategy implements ShardingStrategy {
private final TreeMap<Long, String> ring = new TreeMap<>();
private final int virtualNodes = 100;
@PostConstruct
public void initializeRing() {
List<String> shards = Arrays.asList("shard1", "shard2", "shard3", "shard4");
for (String shard : shards) {
for (int i = 0; i < virtualNodes; i++) {
String virtualNode = shard + ":" + i;
long hash = hashFunction(virtualNode);
ring.put(hash, shard);
}
}
}
@Override
public String determineShard(Object shardingKey) {
long hash = hashFunction(shardingKey.toString());
Map.Entry<Long, String> entry = ring.ceilingEntry(hash);
if (entry == null) {
entry = ring.firstEntry();
}
return entry.getValue();
}
private long hashFunction(String input) {
return Hashing.murmur3_128().hashString(input, StandardCharsets.UTF_8)
.asLong();
}
}
2. Shard Coordinator Service
java
@Service
public class ShardCoordinatorService {
private final RedisTemplate<String, String> redisTemplate;
private final Map<String, DataSource> shardDataSources;
public String getShardForUser(Long userId) {
String cacheKey = "user:shard:" + userId;
String cachedShard = redisTemplate.opsForValue().get(cacheKey);
if (cachedShard != null) {
return cachedShard;
}
// Fallback to calculation
String shard = calculateShard(userId);
redisTemplate.opsForValue().set(cacheKey, shard, Duration.ofHours(24));
return shard;
}
public void reshardUser(Long userId, String fromShard, String toShard) {
try {
// Start transaction
User user = fetchUserFromShard(userId, fromShard);
saveUserToShard(user, toShard);
deleteUserFromShard(userId, fromShard);
// Update cache
String cacheKey = "user:shard:" + userId;
redisTemplate.opsForValue().set(cacheKey, toShard);
log.info("Successfully resharded user {} from {} to {}",
userId, fromShard, toShard);
} catch (Exception e) {
log.error("Failed to reshard user {}", userId, e);
throw new ShardingException("Resharding failed", e);
}
}
}
Monitoring and Health Checks
1. Shard Health Monitoring
java
@Component
public class ShardHealthMonitor {
private final Map<String, DataSource> shardDataSources;
private final MeterRegistry meterRegistry;
@Scheduled(fixedRate = 30000) // 30 seconds
public void checkShardHealth() {
shardDataSources.forEach((shardName, dataSource) -> {
Timer.Sample sample = Timer.start(meterRegistry);
try (Connection connection = dataSource.getConnection()) {
boolean isHealthy = connection.isValid(5); // 5 second timeout
meterRegistry.gauge("shard.health",
Tags.of("shard", shardName),
isHealthy ? 1.0 : 0.0);
if (!isHealthy) {
log.warn("Shard {} is unhealthy", shardName);
// Trigger alerts
triggerShardAlert(shardName, "Shard health check failed");
}
} catch (SQLException e) {
log.error("Failed to check health for shard {}", shardName, e);
meterRegistry.gauge("shard.health",
Tags.of("shard", shardName), 0.0);
} finally {
sample.stop(Timer.builder("shard.health.check.duration")
.tag("shard", shardName)
.register(meterRegistry));
}
});
}
}
2. Performance Metrics
java
@Aspect
@Component
public class ShardingMetricsAspect {
private final MeterRegistry meterRegistry;
@Around("@annotation(Sharded)")
public Object measureShardingOperation(ProceedingJoinPoint joinPoint) throws Throwable {
String operation = joinPoint.getSignature().getName();
Timer.Sample sample = Timer.start(meterRegistry);
try {
Object result = joinPoint.proceed();
meterRegistry.counter("sharding.operation.success",
"operation", operation).increment();
return result;
} catch (Exception e) {
meterRegistry.counter("sharding.operation.error",
"operation", operation,
"error", e.getClass().getSimpleName()).increment();
throw e;
} finally {
sample.stop(Timer.builder("sharding.operation.duration")
.tag("operation", operation)
.register(meterRegistry));
}
}
}
Multi-Shard Setup with Docker Compose
yaml
version: '3.8'
services:
shard1-db:
image: postgres:15
environment:
POSTGRES_DB: users_shard1
POSTGRES_USER: shard1_user
POSTGRES_PASSWORD: shard1_pass
ports:
- "5432:5432"
volumes:
- shard1_data:/var/lib/postgresql/data
shard2-db:
image: postgres:15
environment:
POSTGRES_DB: users_shard2
POSTGRES_USER: shard2_user
POSTGRES_PASSWORD: shard2_pass
ports:
- "5433:5432"
volumes:
- shard2_data:/var/lib/postgresql/data
shard3-db:
image: postgres:15
environment:
POSTGRES_DB: users_shard3
POSTGRES_USER: shard3_user
POSTGRES_PASSWORD: shard3_pass
ports:
- "5434:5432"
volumes:
- shard3_data:/var/lib/postgresql/data
redis-coordinator:
image: redis:7-alpine
ports:
- "6379:6379"
command: redis-server --appendonly yes
volumes:
- redis_data:/data
application:
build: .
ports:
- "8080:8080"
environment:
SPRING_PROFILES_ACTIVE: sharded
SHARD1_URL: jdbc:postgresql://shard1-db:5432/users_shard1
SHARD2_URL: jdbc:postgresql://shard2-db:5432/users_shard2
SHARD3_URL: jdbc:postgresql://shard3-db:5432/users_shard3
REDIS_URL: redis://redis-coordinator:6379
depends_on:
- shard1-db
- shard2-db
- shard3-db
- redis-coordinator
volumes:
shard1_data:
shard2_data:
shard3_data:
redis_data:
Resharding Strategy
Monitoring and Health Checks
Best Practices
Sharding is an inevitable requirement in large-scale systems, but it adds complexity. With the right strategy and implementation, it can significantly improve system performance.