📈 Cloud Scalability Patterns
1. 🔄 Horizontal Scaling Pattern
1.1 Auto-Scaling Groups
Concept: Automatically adjusts the number of instances based on demand.
AWS Auto-Scaling Example:
@Configuration
public class AutoScalingConfig {
@Bean
public AmazonAutoScaling autoScalingClient() {
return AmazonAutoScalingClientBuilder.standard()
.withRegion(Regions.US_EAST_1)
.build();
}
}
@Service
public class AutoScalingService {
private final AmazonAutoScaling autoScalingClient;
public void configureAutoScaling(String groupName) {
CreateAutoScalingGroupRequest request = new CreateAutoScalingGroupRequest()
.withAutoScalingGroupName(groupName)
.withMinSize(1)
.withMaxSize(10)
.withDesiredCapacity(2)
.withHealthCheckType("EC2")
.withHealthCheckGracePeriod(300);
// CPU Utilization scaling policy
PutScalingPolicyRequest policyRequest = new PutScalingPolicyRequest()
.withAutoScalingGroupName(groupName)
.withPolicyName("CPUUtilizationScaling")
.withPolicyType(PolicyType.TargetTrackingScaling)
.withTargetTrackingConfiguration(new TargetTrackingConfiguration()
.withTargetValue(70.0)
.withPredefinedMetricSpecification(
new PredefinedMetricSpecification()
.withPredefinedMetricType("ASGAverageCPUUtilization")
));
autoScalingClient.createAutoScalingGroup(request);
autoScalingClient.putScalingPolicy(policyRequest);
}
}
1.2 Kubernetes Horizontal Pod Autoscaling
Example:
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: app-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: app-deployment
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 30
policies:
- type: Percent
value: 100
periodSeconds: 15
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 100
periodSeconds: 15
2. 🔀 Load Balancing Pattern
2.1 Application Load Balancer
Concept: Distributes incoming traffic across multiple instances.
Implementation:
@Configuration
public class LoadBalancerConfig {
@Bean
@LoadBalanced
public RestTemplate loadBalancedRestTemplate() {
return new RestTemplate();
}
}
@Service
public class LoadBalancedService {
private final RestTemplate restTemplate;
private final LoadBalancerClient loadBalancerClient;
public Response callService(String serviceId) {
ServiceInstance instance = loadBalancerClient.choose(serviceId);
String url = String.format("http://%s:%s",
instance.getHost(),
instance.getPort());
return restTemplate.getForObject(url + "/api/endpoint",
Response.class);
}
@PostConstruct
public void configureLoadBalancer() {
// Configure custom load balancing strategy
IRule loadBalancingRule = new WeightedResponseTimeRule();
DynamicServerListLoadBalancer<?> loadBalancer =
(DynamicServerListLoadBalancer<?>) loadBalancerClient;
loadBalancer.setRule(loadBalancingRule);
}
}
3. 📊 Database Scalability Pattern
3.1 Read Replicas
Concept: Scales read operations by creating database replicas.
Implementation:
@Configuration
public class DatabaseConfig {
@Bean
@Primary
@ConfigurationProperties(prefix = "spring.datasource.writer")
public DataSource writerDataSource() {
return DataSourceBuilder.create().build();
}
@Bean
@ConfigurationProperties(prefix = "spring.datasource.reader")
public DataSource readerDataSource() {
return DataSourceBuilder.create().build();
}
}
@Service
public class DatabaseService {
@Autowired
@Qualifier("writerDataSource")
private DataSource writerDataSource;
@Autowired
@Qualifier("readerDataSource")
private DataSource readerDataSource;
@Transactional(readOnly = true)
public Data readData(String id) {
try (Connection conn = readerDataSource.getConnection()) {
// Read operations
}
}
@Transactional
public void writeData(Data data) {
try (Connection conn = writerDataSource.getConnection()) {
// Write operations
}
}
}
3.2 Database Sharding
Concept: Partitions data across multiple databases.
Implementation:
@Configuration
public class ShardingConfig {
@Bean
public ShardingDataSource shardingDataSource() {
Map<String, DataSource> dataSourceMap = new HashMap<>();
dataSourceMap.put("shard0", createDataSource("shard0"));
dataSourceMap.put("shard1", createDataSource("shard1"));
ShardingRuleConfiguration shardingRuleConfig = new ShardingRuleConfiguration();
// Configure sharding rule
TableRuleConfiguration tableRuleConfig = new TableRuleConfiguration("users");
tableRuleConfig.setActualDataNodes("shard${0..1}.users");
// Sharding strategy
tableRuleConfig.setTableShardingStrategyConfig(
new StandardShardingStrategyConfiguration("user_id",
new ModuloShardingTableAlgorithm()));
shardingRuleConfig.getTableRuleConfigs().add(tableRuleConfig);
return ShardingDataSourceFactory.createDataSource(
dataSourceMap,
shardingRuleConfig,
new Properties());
}
}
public class ModuloShardingTableAlgorithm
implements PreciseShardingAlgorithm<Long> {
@Override
public String doSharding(
Collection<String> availableTargetNames,
PreciseShardingValue<Long> shardingValue) {
for (String targetName : availableTargetNames) {
if (targetName.endsWith(shardingValue.getValue() % 2 + "")) {
return targetName;
}
}
throw new UnsupportedOperationException();
}
}
4. 🗄️ Caching Patterns
4.1 Distributed Cache
Concept: Improves performance by caching data across multiple nodes.
Implementation:
@Configuration
@EnableCaching
public class CacheConfig extends CachingConfigurerSupport {
@Bean
public RedisCacheManager cacheManager(
RedisConnectionFactory connectionFactory) {
RedisCacheConfiguration config = RedisCacheConfiguration
.defaultCacheConfig()
.prefixCacheNameWith("cache:")
.entryTtl(Duration.ofMinutes(60))
.disableCachingNullValues();
return RedisCacheManager.builder(connectionFactory)
.cacheDefaults(config)
.build();
}
}
@Service
public class CachingService {
private final CacheManager cacheManager;
@Cacheable(value = "items",
key = "#id",
unless = "#result == null")
public Item getItem(String id) {
// Expensive operation to fetch item
return itemRepository.findById(id);
}
@CachePut(value = "items", key = "#item.id")
public Item updateItem(Item item) {
return itemRepository.save(item);
}
@CacheEvict(value = "items", key = "#id")
public void deleteItem(String id) {
itemRepository.deleteById(id);
}
}
5. 📤 Message Queue Pattern
5.1 Asynchronous Processing
Concept: Handles high load through asynchronous message processing.
Implementation:
@Configuration
public class MessagingConfig {
@Bean
public Queue queue() {
return new Queue("taskQueue", true);
}
@Bean
public RabbitTemplate rabbitTemplate(
ConnectionFactory connectionFactory) {
RabbitTemplate template = new RabbitTemplate(connectionFactory);
template.setMessageConverter(new Jackson2JsonMessageConverter());
return template;
}
}
@Service
public class MessageProcessingService {
private final RabbitTemplate rabbitTemplate;
@Async
public void processTask(Task task) {
rabbitTemplate.convertAndSend("taskQueue", task);
}
@RabbitListener(queues = "taskQueue")
public void handleTask(Task task) {
// Process task asynchronously
processTaskAsync(task);
}
private void processTaskAsync(Task task) {
CompletableFuture.runAsync(() -> {
// Task processing logic
}).thenAccept(result -> {
// Handle completion
}).exceptionally(throwable -> {
// Handle errors
return null;
});
}
}
6. 🌊 Rate Limiting Pattern
6.1 API Rate Limiting
Concept: Controls request rates to prevent overload.
Implementation:
@Configuration
public class RateLimitConfig {
@Bean
public RateLimiter rateLimiter() {
return RateLimiter.create(100.0); // 100 requests per second
}
}
@Component
public class RateLimitingFilter implements Filter {
private final RateLimiter rateLimiter;
private final Cache<String, RateLimiter> userRateLimiters;
public RateLimitingFilter() {
this.userRateLimiters = CacheBuilder.newBuilder()
.expireAfterWrite(1, TimeUnit.HOURS)
.build();
}
@Override
public void doFilter(
ServletRequest request,
ServletResponse response,
FilterChain chain) throws IOException, ServletException {
HttpServletRequest httpRequest = (HttpServletRequest) request;
String user = httpRequest.getHeader("User-Id");
RateLimiter limiter = userRateLimiters.get(user, () ->
RateLimiter.create(10.0)); // 10 requests per second per user
if (limiter.tryAcquire()) {
chain.doFilter(request, response);
} else {
HttpServletResponse httpResponse =
(HttpServletResponse) response;
httpResponse.setStatus(
HttpStatus.TOO_MANY_REQUESTS.value());
httpResponse.getWriter()
.write("Too many requests");
}
}
}
7. 📊 Monitoring and Metrics
7.1 Scalability Metrics Collection
Implementation:
@Configuration
public class MetricsConfig {
@Bean
public MeterRegistry meterRegistry() {
return new SimpleMeterRegistry();
}
}
@Service
public class MetricsService {
private final MeterRegistry registry;
private final Counter requestCounter;
private final Timer responseTimer;
public MetricsService(MeterRegistry registry) {
this.registry = registry;
this.requestCounter = Counter
.builder("app.requests")
.description("Total requests")
.register(registry);
this.responseTimer = Timer
.builder("app.response.time")
.description("Response time")
.register(registry);
}
public void recordMetrics(Runnable operation) {
requestCounter.increment();
Timer.Sample sample = Timer.start(registry);
try {
operation.run();
} finally {
sample.stop(responseTimer);
}
}
}
8. Best Practices
-
Design for Scalability
- Use stateless services
- Implement proper caching
- Design for failure
-
Monitoring and Alerting
- Track key metrics
- Set up alerts
- Monitor resource usage
-
Performance Optimization
- Use asynchronous operations
- Implement caching
- Optimize database queries
-
Cost Management
- Monitor resource usage
- Implement auto-scaling
- Use appropriate instance types
9. References
- "Patterns of Scalable Systems" by Martin Kleppmann
- "Building Microservices" by Sam Newman
- AWS Well-Architected Framework
- Azure Architecture Center
- Google Cloud Architecture Framework