Skip to main content

📈 Cloud Scalability Patterns

1. 🔄 Horizontal Scaling Pattern

1.1 Auto-Scaling Groups

Concept: Automatically adjusts the number of instances based on demand.

AWS Auto-Scaling Example:

@Configuration
public class AutoScalingConfig {
@Bean
public AmazonAutoScaling autoScalingClient() {
return AmazonAutoScalingClientBuilder.standard()
.withRegion(Regions.US_EAST_1)
.build();
}
}

@Service
public class AutoScalingService {
private final AmazonAutoScaling autoScalingClient;

public void configureAutoScaling(String groupName) {
CreateAutoScalingGroupRequest request = new CreateAutoScalingGroupRequest()
.withAutoScalingGroupName(groupName)
.withMinSize(1)
.withMaxSize(10)
.withDesiredCapacity(2)
.withHealthCheckType("EC2")
.withHealthCheckGracePeriod(300);

// CPU Utilization scaling policy
PutScalingPolicyRequest policyRequest = new PutScalingPolicyRequest()
.withAutoScalingGroupName(groupName)
.withPolicyName("CPUUtilizationScaling")
.withPolicyType(PolicyType.TargetTrackingScaling)
.withTargetTrackingConfiguration(new TargetTrackingConfiguration()
.withTargetValue(70.0)
.withPredefinedMetricSpecification(
new PredefinedMetricSpecification()
.withPredefinedMetricType("ASGAverageCPUUtilization")
));

autoScalingClient.createAutoScalingGroup(request);
autoScalingClient.putScalingPolicy(policyRequest);
}
}

1.2 Kubernetes Horizontal Pod Autoscaling

Example:

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: app-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: app-deployment
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 30
policies:
- type: Percent
value: 100
periodSeconds: 15
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 100
periodSeconds: 15

2. 🔀 Load Balancing Pattern

2.1 Application Load Balancer

Concept: Distributes incoming traffic across multiple instances.

Implementation:

@Configuration
public class LoadBalancerConfig {
@Bean
@LoadBalanced
public RestTemplate loadBalancedRestTemplate() {
return new RestTemplate();
}
}

@Service
public class LoadBalancedService {
private final RestTemplate restTemplate;
private final LoadBalancerClient loadBalancerClient;

public Response callService(String serviceId) {
ServiceInstance instance = loadBalancerClient.choose(serviceId);
String url = String.format("http://%s:%s",
instance.getHost(),
instance.getPort());

return restTemplate.getForObject(url + "/api/endpoint",
Response.class);
}

@PostConstruct
public void configureLoadBalancer() {
// Configure custom load balancing strategy
IRule loadBalancingRule = new WeightedResponseTimeRule();
DynamicServerListLoadBalancer<?> loadBalancer =
(DynamicServerListLoadBalancer<?>) loadBalancerClient;
loadBalancer.setRule(loadBalancingRule);
}
}

3. 📊 Database Scalability Pattern

3.1 Read Replicas

Concept: Scales read operations by creating database replicas.

Implementation:

@Configuration
public class DatabaseConfig {
@Bean
@Primary
@ConfigurationProperties(prefix = "spring.datasource.writer")
public DataSource writerDataSource() {
return DataSourceBuilder.create().build();
}

@Bean
@ConfigurationProperties(prefix = "spring.datasource.reader")
public DataSource readerDataSource() {
return DataSourceBuilder.create().build();
}
}

@Service
public class DatabaseService {
@Autowired
@Qualifier("writerDataSource")
private DataSource writerDataSource;

@Autowired
@Qualifier("readerDataSource")
private DataSource readerDataSource;

@Transactional(readOnly = true)
public Data readData(String id) {
try (Connection conn = readerDataSource.getConnection()) {
// Read operations
}
}

@Transactional
public void writeData(Data data) {
try (Connection conn = writerDataSource.getConnection()) {
// Write operations
}
}
}

3.2 Database Sharding

Concept: Partitions data across multiple databases.

Implementation:

@Configuration
public class ShardingConfig {
@Bean
public ShardingDataSource shardingDataSource() {
Map<String, DataSource> dataSourceMap = new HashMap<>();
dataSourceMap.put("shard0", createDataSource("shard0"));
dataSourceMap.put("shard1", createDataSource("shard1"));

ShardingRuleConfiguration shardingRuleConfig = new ShardingRuleConfiguration();

// Configure sharding rule
TableRuleConfiguration tableRuleConfig = new TableRuleConfiguration("users");
tableRuleConfig.setActualDataNodes("shard${0..1}.users");

// Sharding strategy
tableRuleConfig.setTableShardingStrategyConfig(
new StandardShardingStrategyConfiguration("user_id",
new ModuloShardingTableAlgorithm()));

shardingRuleConfig.getTableRuleConfigs().add(tableRuleConfig);

return ShardingDataSourceFactory.createDataSource(
dataSourceMap,
shardingRuleConfig,
new Properties());
}
}

public class ModuloShardingTableAlgorithm
implements PreciseShardingAlgorithm<Long> {

@Override
public String doSharding(
Collection<String> availableTargetNames,
PreciseShardingValue<Long> shardingValue) {

for (String targetName : availableTargetNames) {
if (targetName.endsWith(shardingValue.getValue() % 2 + "")) {
return targetName;
}
}
throw new UnsupportedOperationException();
}
}

4. 🗄️ Caching Patterns

4.1 Distributed Cache

Concept: Improves performance by caching data across multiple nodes.

Implementation:

@Configuration
@EnableCaching
public class CacheConfig extends CachingConfigurerSupport {
@Bean
public RedisCacheManager cacheManager(
RedisConnectionFactory connectionFactory) {
RedisCacheConfiguration config = RedisCacheConfiguration
.defaultCacheConfig()
.prefixCacheNameWith("cache:")
.entryTtl(Duration.ofMinutes(60))
.disableCachingNullValues();

return RedisCacheManager.builder(connectionFactory)
.cacheDefaults(config)
.build();
}
}

@Service
public class CachingService {
private final CacheManager cacheManager;

@Cacheable(value = "items",
key = "#id",
unless = "#result == null")
public Item getItem(String id) {
// Expensive operation to fetch item
return itemRepository.findById(id);
}

@CachePut(value = "items", key = "#item.id")
public Item updateItem(Item item) {
return itemRepository.save(item);
}

@CacheEvict(value = "items", key = "#id")
public void deleteItem(String id) {
itemRepository.deleteById(id);
}
}

5. 📤 Message Queue Pattern

5.1 Asynchronous Processing

Concept: Handles high load through asynchronous message processing.

Implementation:

@Configuration
public class MessagingConfig {
@Bean
public Queue queue() {
return new Queue("taskQueue", true);
}

@Bean
public RabbitTemplate rabbitTemplate(
ConnectionFactory connectionFactory) {
RabbitTemplate template = new RabbitTemplate(connectionFactory);
template.setMessageConverter(new Jackson2JsonMessageConverter());
return template;
}
}

@Service
public class MessageProcessingService {
private final RabbitTemplate rabbitTemplate;

@Async
public void processTask(Task task) {
rabbitTemplate.convertAndSend("taskQueue", task);
}

@RabbitListener(queues = "taskQueue")
public void handleTask(Task task) {
// Process task asynchronously
processTaskAsync(task);
}

private void processTaskAsync(Task task) {
CompletableFuture.runAsync(() -> {
// Task processing logic
}).thenAccept(result -> {
// Handle completion
}).exceptionally(throwable -> {
// Handle errors
return null;
});
}
}

6. 🌊 Rate Limiting Pattern

6.1 API Rate Limiting

Concept: Controls request rates to prevent overload.

Implementation:

@Configuration
public class RateLimitConfig {
@Bean
public RateLimiter rateLimiter() {
return RateLimiter.create(100.0); // 100 requests per second
}
}

@Component
public class RateLimitingFilter implements Filter {
private final RateLimiter rateLimiter;
private final Cache<String, RateLimiter> userRateLimiters;

public RateLimitingFilter() {
this.userRateLimiters = CacheBuilder.newBuilder()
.expireAfterWrite(1, TimeUnit.HOURS)
.build();
}

@Override
public void doFilter(
ServletRequest request,
ServletResponse response,
FilterChain chain) throws IOException, ServletException {

HttpServletRequest httpRequest = (HttpServletRequest) request;
String user = httpRequest.getHeader("User-Id");

RateLimiter limiter = userRateLimiters.get(user, () ->
RateLimiter.create(10.0)); // 10 requests per second per user

if (limiter.tryAcquire()) {
chain.doFilter(request, response);
} else {
HttpServletResponse httpResponse =
(HttpServletResponse) response;
httpResponse.setStatus(
HttpStatus.TOO_MANY_REQUESTS.value());
httpResponse.getWriter()
.write("Too many requests");
}
}
}

7. 📊 Monitoring and Metrics

7.1 Scalability Metrics Collection

Implementation:

@Configuration
public class MetricsConfig {
@Bean
public MeterRegistry meterRegistry() {
return new SimpleMeterRegistry();
}
}

@Service
public class MetricsService {
private final MeterRegistry registry;
private final Counter requestCounter;
private final Timer responseTimer;

public MetricsService(MeterRegistry registry) {
this.registry = registry;
this.requestCounter = Counter
.builder("app.requests")
.description("Total requests")
.register(registry);
this.responseTimer = Timer
.builder("app.response.time")
.description("Response time")
.register(registry);
}

public void recordMetrics(Runnable operation) {
requestCounter.increment();
Timer.Sample sample = Timer.start(registry);

try {
operation.run();
} finally {
sample.stop(responseTimer);
}
}
}

8. Best Practices

  1. Design for Scalability

    • Use stateless services
    • Implement proper caching
    • Design for failure
  2. Monitoring and Alerting

    • Track key metrics
    • Set up alerts
    • Monitor resource usage
  3. Performance Optimization

    • Use asynchronous operations
    • Implement caching
    • Optimize database queries
  4. Cost Management

    • Monitor resource usage
    • Implement auto-scaling
    • Use appropriate instance types

9. References

  • "Patterns of Scalable Systems" by Martin Kleppmann
  • "Building Microservices" by Sam Newman
  • AWS Well-Architected Framework
  • Azure Architecture Center
  • Google Cloud Architecture Framework