Master AWS VPC design with proven patterns for multi-region deployments, network segmentation, and secure connectivity. Discover expert insights, best practi...
After architecting VPCs for everything from startups to Fortune 500 companies, I've learned that network design decisions made on day one impact you for years. Here's how to build VPC architectures that scale, secure, and simplify your AWS infrastructure.
VPC Design Principles
The Hub-and-Spoke Model
# hub-vpc.tf - Central connectivity hub
resource "aws_vpc" "hub" {
cidr_block = "10.0.0.0/16"
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Name = "hub-vpc"
Type = "transit"
}
}
# Transit Gateway for inter-VPC communication
resource "aws_ec2_transit_gateway" "main" {
description = "Main Transit Gateway"
default_route_table_association = "disable"
default_route_table_propagation = "disable"
dns_support = "enable"
vpn_ecmp_support = "enable"
tags = {
Name = "main-tgw"
}
}
# Spoke VPCs for different environments
resource "aws_vpc" "production" {
cidr_block = "10.1.0.0/16"
tags = {
Name = "production-vpc"
Environment = "production"
}
}
resource "aws_vpc" "staging" {
cidr_block = "10.2.0.0/16"
tags = {
Name = "staging-vpc"
Environment = "staging"
}
}
IP Address Planning
# ip_planner.py - Generate subnet ranges
class IPPlanner:
def __init__(self, vpc_cidr):
self.vpc_network = ipaddress.ip_network(vpc_cidr)
self.subnets = []
def plan_subnets(self, zones=3, tiers=['public', 'private', 'data']):
"""
Generate subnet plan for multi-AZ deployment
"""
# Calculate bits needed
total_subnets = zones * len(tiers)
subnet_bits = math.ceil(math.log2(total_subnets))
# Generate subnets
subnet_generator = self.vpc_network.subnets(new_prefix=24)
subnet_plan = {}
for tier in tiers:
subnet_plan[tier] = {}
for zone in range(zones):
subnet = next(subnet_generator)
az_letter = chr(97 + zone) # a, b, c
subnet_plan[tier][f'az-{az_letter}'] = str(subnet)
return subnet_plan
# Example usage
planner = IPPlanner('10.0.0.0/16')
plan = planner.plan_subnets()
# Result:
# {
# 'public': {
# 'az-a': '10.0.0.0/24',
# 'az-b': '10.0.1.0/24',
# 'az-c': '10.0.2.0/24'
# },
# 'private': {
# 'az-a': '10.0.10.0/24',
# 'az-b': '10.0.11.0/24',
# 'az-c': '10.0.12.0/24'
# },
# 'data': {
# 'az-a': '10.0.20.0/24',
# 'az-b': '10.0.21.0/24',
# 'az-c': '10.0.22.0/24'
# }
# }
Multi-Region Architecture
Global Network Design
# Global network with peering
module "vpc_us_east_1" {
source = "./modules/regional-vpc"
region = "us-east-1"
cidr_block = "10.0.0.0/16"
az_count = 3
}
module "vpc_eu_west_1" {
source = "./modules/regional-vpc"
region = "eu-west-1"
cidr_block = "10.1.0.0/16"
az_count = 3
}
module "vpc_ap_southeast_1" {
source = "./modules/regional-vpc"
region = "ap-southeast-1"
cidr_block = "10.2.0.0/16"
az_count = 3
}
# Inter-region peering
resource "aws_vpc_peering_connection" "us_to_eu" {
provider = aws.us_east_1
vpc_id = module.vpc_us_east_1.vpc_id
peer_vpc_id = module.vpc_eu_west_1.vpc_id
peer_region = "eu-west-1"
tags = {
Name = "us-east-1-to-eu-west-1"
}
}
AWS Global Accelerator Integration
resource "aws_globalaccelerator_accelerator" "main" {
name = "global-app-accelerator"
ip_address_type = "IPV4"
enabled = true
attributes {
flow_logs_enabled = true
flow_logs_s3_bucket = aws_s3_bucket.flow_logs.id
flow_logs_s3_prefix = "global-accelerator/"
}
}
resource "aws_globalaccelerator_listener" "main" {
accelerator_arn = aws_globalaccelerator_accelerator.main.id
protocol = "TCP"
port_range {
from_port = 443
to_port = 443
}
}
resource "aws_globalaccelerator_endpoint_group" "us_east_1" {
listener_arn = aws_globalaccelerator_listener.main.id
endpoint_group_region = "us-east-1"
endpoint_configuration {
endpoint_id = aws_lb.us_east_1.arn
weight = 100
}
health_check_port = 443
health_check_protocol = "TCP"
}
Security Architecture
Network Segmentation
# Security groups with least privilege
resource "aws_security_group" "web_tier" {
name_prefix = "web-tier-"
vpc_id = aws_vpc.main.id
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
security_groups = [aws_security_group.alb.id]
}
egress {
from_port = 443
to_port = 443
protocol = "tcp"
security_groups = [aws_security_group.app_tier.id]
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_security_group" "app_tier" {
name_prefix = "app-tier-"
vpc_id = aws_vpc.main.id
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
security_groups = [aws_security_group.web_tier.id]
}
egress {
from_port = 3306
to_port = 3306
protocol = "tcp"
security_groups = [aws_security_group.data_tier.id]
}
}
# NACLs for additional layer
resource "aws_network_acl_rule" "data_tier_ingress" {
network_acl_id = aws_network_acl.data_tier.id
rule_number = 100
protocol = "tcp"
rule_action = "allow"
cidr_block = "10.0.10.0/24" # App tier subnet
from_port = 3306
to_port = 3306
}
VPC Flow Logs Analysis
# flow_logs_analyzer.py
import boto3
import pandas as pd
from datetime import datetime, timedelta
class FlowLogsAnalyzer:
def __init__(self, log_group_name):
self.logs_client = boto3.client('logs')
self.log_group = log_group_name
def analyze_suspicious_traffic(self, hours=24):
"""Identify suspicious network patterns"""
end_time = datetime.now()
start_time = end_time - timedelta(hours=hours)
query = """
fields srcaddr, dstaddr, dstport, protocol, action, packets, bytes
| filter action = "REJECT"
| stats count() as rejection_count by srcaddr, dstport
| sort rejection_count desc
| limit 20
"""
response = self.logs_client.start_query(
logGroupName=self.log_group,
startTime=int(start_time.timestamp()),
endTime=int(end_time.timestamp()),
queryString=query
)
# Wait for query completion
query_id = response['queryId']
status = 'Running'
while status == 'Running':
response = self.logs_client.get_query_results(queryId=query_id)
status = response['status']
time.sleep(1)
return pd.DataFrame(response['results'])
def identify_data_exfiltration(self):
"""Detect potential data exfiltration"""
query = """
fields srcaddr, dstaddr, bytes
| filter dstaddr not in ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
| stats sum(bytes) as total_bytes by srcaddr, dstaddr
| filter total_bytes > 1000000000
| sort total_bytes desc
"""
# Execute and return results
return self.execute_query(query)
PrivateLink and VPC Endpoints
Service Endpoints Configuration
# S3 VPC Endpoint
resource "aws_vpc_endpoint" "s3" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.s3"
vpc_endpoint_type = "Gateway"
route_table_ids = [aws_route_table.private.id]
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = "*"
Action = "s3:*"
Resource = [
"arn:aws:s3:::my-private-bucket",
"arn:aws:s3:::my-private-bucket/*"
]
}
]
})
}
# Interface endpoints for AWS services
locals {
interface_endpoints = {
ec2 = "com.amazonaws.${var.region}.ec2"
rds = "com.amazonaws.${var.region}.rds"
lambda = "com.amazonaws.${var.region}.lambda"
secretsmanager = "com.amazonaws.${var.region}.secretsmanager"
}
}
resource "aws_vpc_endpoint" "interface" {
for_each = local.interface_endpoints
vpc_id = aws_vpc.main.id
service_name = each.value
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
security_group_ids = [aws_security_group.vpc_endpoints.id]
private_dns_enabled = true
}
Hybrid Connectivity
Site-to-Site VPN
# Customer Gateway
resource "aws_customer_gateway" "main" {
bgp_asn = 65000
ip_address = var.on_premise_gateway_ip
type = "ipsec.1"
tags = {
Name = "main-customer-gateway"
}
}
# Virtual Private Gateway
resource "aws_vpn_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = {
Name = "main-vpn-gateway"
}
}
# VPN Connection
resource "aws_vpn_connection" "main" {
customer_gateway_id = aws_customer_gateway.main.id
vpn_gateway_id = aws_vpn_gateway.main.id
type = "ipsec.1"
static_routes_only = false
tags = {
Name = "main-vpn-connection"
}
}
Direct Connect Configuration
# direct_connect_monitor.py
class DirectConnectMonitor:
def __init__(self):
self.dx_client = boto3.client('directconnect')
self.cw_client = boto3.client('cloudwatch')
def check_connection_health(self, connection_id):
"""Monitor Direct Connect health"""
response = self.dx_client.describe_connections(
connectionId=connection_id
)
connection = response['connections'][0]
health_status = {
'state': connection['connectionState'],
'bandwidth': connection['bandwidth'],
'vlan': connection['vlan']
}
# Get CloudWatch metrics
metrics = self.cw_client.get_metric_statistics(
Namespace='AWS/DX',
MetricName='ConnectionBpsEgress',
Dimensions=[
{'Name': 'ConnectionId', 'Value': connection_id}
],
StartTime=datetime.now() - timedelta(hours=1),
EndTime=datetime.now(),
Period=300,
Statistics=['Average', 'Maximum']
)
health_status['bandwidth_utilization'] = metrics
return health_status
Cost Optimization
NAT Gateway Alternatives
# NAT Instance for cost savings
resource "aws_instance" "nat" {
ami = data.aws_ami.nat_instance.id
instance_type = "t3.nano"
subnet_id = aws_subnet.public[0].id
vpc_security_group_ids = [aws_security_group.nat.id]
source_dest_check = false
user_data = <<-EOF
#!/bin/bash
sysctl -w net.ipv4.ip_forward=1
iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
service iptables save
EOF
tags = {
Name = "nat-instance"
}
}
# Route table for private subnets
resource "aws_route" "private_nat" {
route_table_id = aws_route_table.private.id
destination_cidr_block = "0.0.0.0/0"
instance_id = aws_instance.nat.id
}
VPC Endpoint Cost Analysis
def calculate_endpoint_savings(monthly_data_transfer_gb, nat_gateway_count=1):
"""Calculate savings from using VPC endpoints"""
# Costs per GB (USD)
nat_gateway_data_cost = 0.045
vpc_endpoint_cost = 0.01 # Per hour
# NAT Gateway costs
nat_gateway_hourly = 0.045
nat_monthly_cost = (nat_gateway_hourly * 730 * nat_gateway_count +
monthly_data_transfer_gb * nat_gateway_data_cost)
# VPC Endpoint costs
endpoint_monthly_cost = vpc_endpoint_cost * 730
savings = nat_monthly_cost - endpoint_monthly_cost
return {
'nat_gateway_cost': nat_monthly_cost,
'vpc_endpoint_cost': endpoint_monthly_cost,
'monthly_savings': savings,
'annual_savings': savings * 12
}
Network Performance Optimization
Enhanced Networking
resource "aws_instance" "high_performance" {
ami = data.aws_ami.amazon_linux_2.id
instance_type = "m5n.large" # Nitro instance
# Enable enhanced networking
ena_support = true
# Enable SR-IOV for supported instances
sriov_net_support = "simple"
network_interface {
network_interface_id = aws_network_interface.high_performance.id
device_index = 0
}
}
resource "aws_network_interface" "high_performance" {
subnet_id = aws_subnet.private[0].id
# Attach multiple IPs for load distribution
private_ips = ["10.0.1.10", "10.0.1.11", "10.0.1.12"]
}
Placement Groups
resource "aws_placement_group" "cluster" {
name = "high-performance-cluster"
strategy = "cluster"
}
resource "aws_instance" "cluster_node" {
count = 3
ami = data.aws_ami.amazon_linux_2.id
instance_type = "c5n.18xlarge"
placement_group = aws_placement_group.cluster.id
# 100 Gbps networking
network_interface {
device_index = 0
network_interface_id = aws_network_interface.cluster[count.index].id
}
}
Disaster Recovery
Multi-Region Failover
# vpc_failover.py
class VPCFailover:
def __init__(self, primary_region, secondary_region):
self.primary_ec2 = boto3.client('ec2', region_name=primary_region)
self.secondary_ec2 = boto3.client('ec2', region_name=secondary_region)
self.route53 = boto3.client('route53')
def initiate_failover(self, hosted_zone_id, record_name):
"""Failover to secondary region"""
# Get secondary region ELB
secondary_elb = self.get_secondary_elb()
# Update Route53 to point to secondary region
response = self.route53.change_resource_record_sets(
HostedZoneId=hosted_zone_id,
ChangeBatch={
'Changes': [{
'Action': 'UPSERT',
'ResourceRecordSet': {
'Name': record_name,
'Type': 'A',
'AliasTarget': {
'HostedZoneId': secondary_elb['CanonicalHostedZoneId'],
'DNSName': secondary_elb['DNSName'],
'EvaluateTargetHealth': True
}
}
}]
}
)
return response['ChangeInfo']['Id']
Monitoring and Troubleshooting
VPC Reachability Analyzer
def analyze_connectivity(source_eni_id, destination_eni_id):
"""Use VPC Reachability Analyzer"""
ec2 = boto3.client('ec2')
response = ec2.create_network_insights_path(
Source=source_eni_id,
Destination=destination_eni_id,
Protocol='tcp',
DestinationPort=443
)
path_id = response['NetworkInsightsPath']['NetworkInsightsPathId']
# Start analysis
analysis = ec2.start_network_insights_analysis(
NetworkInsightsPathId=path_id
)
# Wait for completion and get results
analysis_id = analysis['NetworkInsightsAnalysis']['NetworkInsightsAnalysisId']
# Poll for results
while True:
result = ec2.describe_network_insights_analyses(
NetworkInsightsAnalysisIds=[analysis_id]
)
status = result['NetworkInsightsAnalyses'][0]['Status']
if status == 'succeeded':
return result['NetworkInsightsAnalyses'][0]
elif status == 'failed':
raise Exception("Analysis failed")
time.sleep(5)
Best Practices Checklist
Conclusion
A well-designed VPC is the foundation of secure, scalable AWS infrastructure. Start with clear IP planning, implement defense in depth, and use AWS native services for connectivity. Remember: the network you design today will support your applications for years to come, so invest time in getting it right from the start.