Deploy and manage production-grade Kubernetes clusters on AWS EKS with security, scalability, and cost optimization strategies.
Running Kubernetes on AWS EKS seems straightforward until you face your first production incident at 3 AM. After managing EKS clusters processing millions of requests daily, I've learned what separates hobby projects from enterprise-grade deployments. Here's your complete guide to EKS excellence.
EKS Architecture Design
Multi-AZ High Availability Setup
# eks-cluster.tf
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.0"
cluster_name = "production-cluster"
cluster_version = "1.28"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
# Enable IRSA for pod-level AWS permissions
enable_irsa = true
# Cluster endpoint access
cluster_endpoint_public_access = true
cluster_endpoint_private_access = true
cluster_endpoint_public_access_cidrs = ["10.0.0.0/8"]
# Enable cluster encryption
cluster_encryption_config = {
provider_key_arn = aws_kms_key.eks.arn
resources = ["secrets"]
}
# Cluster addons
cluster_addons = {
coredns = {
most_recent = true
}
kube-proxy = {
most_recent = true
}
vpc-cni = {
most_recent = true
configuration_values = jsonencode({
env = {
ENABLE_PREFIX_DELEGATION = "true"
WARM_PREFIX_TARGET = "1"
}
})
}
aws-ebs-csi-driver = {
most_recent = true
service_account_role_arn = module.ebs_csi_irsa.iam_role_arn
}
}
# Node groups configuration
eks_managed_node_groups = {
general = {
desired_size = 3
min_size = 3
max_size = 10
instance_types = ["t3a.large"]
capacity_type = "SPOT"
labels = {
Environment = "production"
Type = "general"
}
taints = []
update_config = {
max_unavailable_percentage = 33
}
}
compute = {
desired_size = 2
min_size = 0
max_size = 20
instance_types = ["c5.2xlarge", "c5a.2xlarge"]
capacity_type = "SPOT"
labels = {
Environment = "production"
Type = "compute"
}
taints = [{
key = "compute"
value = "true"
effect = "NO_SCHEDULE"
}]
}
}
}
IRSA (IAM Roles for Service Accounts)
Pod-Level AWS Permissions
# irsa.tf
module "s3_access_irsa" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
role_name = "s3-access-irsa"
role_policy_arns = {
policy = aws_iam_policy.s3_access.arn
}
oidc_providers = {
main = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["production:s3-app"]
}
}
}
resource "aws_iam_policy" "s3_access" {
name = "s3-access-policy"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
]
Resource = "arn:aws:s3:::my-app-bucket/*"
}
]
})
}
ServiceAccount Configuration
# service-account.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: s3-app
namespace: production
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/s3-access-irsa
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: s3-app
spec:
template:
spec:
serviceAccountName: s3-app
containers:
- name: app
image: my-app:latest
env:
- name: AWS_REGION
value: us-east-1
Auto-Scaling Strategies
Cluster Autoscaler
# cluster-autoscaler.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
spec:
template:
spec:
serviceAccountName: cluster-autoscaler
containers:
- name: cluster-autoscaler
image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.28.0
command:
- ./cluster-autoscaler
- --v=4
- --stderrthreshold=info
- --cloud-provider=aws
- --skip-nodes-with-local-storage=false
- --expander=least-waste
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/production-cluster
- --balance-similar-node-groups
- --skip-nodes-with-system-pods=false
env:
- name: AWS_REGION
value: us-east-1
Karpenter for Intelligent Scaling
# karpenter-provisioner.yaml
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
metadata:
name: default
spec:
requirements:
- key: karpenter.sh/capacity-type
operator: In
values: ["spot", "on-demand"]
- key: node.kubernetes.io/instance-type
operator: In
values:
- t3a.medium
- t3a.large
- t3a.xlarge
- c5.large
- c5.xlarge
limits:
resources:
cpu: 1000
memory: 1000Gi
providerRef:
name: default
ttlSecondsAfterEmpty: 30
ttlSecondsUntilExpired: 2592000 # 30 days
---
apiVersion: karpenter.k8s.aws/v1alpha1
kind: AWSNodeInstanceProfile
metadata:
name: default
spec:
subnetSelector:
karpenter.sh/discovery: production-cluster
securityGroupSelector:
karpenter.sh/discovery: production-cluster
instanceStorePolicy: RAID0
userData: |
#!/bin/bash
/etc/eks/bootstrap.sh production-cluster
echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
sysctl -p
Security Best Practices
Network Policies
# network-policies.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: backend-netpol
namespace: production
spec:
podSelector:
matchLabels:
tier: backend
policyTypes:
- Ingress
- Egress
ingress:
- from:
- podSelector:
matchLabels:
tier: frontend
ports:
- protocol: TCP
port: 8080
egress:
- to:
- podSelector:
matchLabels:
tier: database
ports:
- protocol: TCP
port: 3306
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- protocol: UDP
port: 53
Pod Security Standards
# pod-security-policy.yaml
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: restricted
spec:
privileged: false
allowPrivilegeEscalation: false
requiredDropCapabilities:
- ALL
volumes:
- 'configMap'
- 'emptyDir'
- 'projected'
- 'secret'
- 'downwardAPI'
- 'persistentVolumeClaim'
hostNetwork: false
hostIPC: false
hostPID: false
runAsUser:
rule: 'MustRunAsNonRoot'
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'RunAsAny'
fsGroup:
rule: 'RunAsAny'
readOnlyRootFilesystem: true
Monitoring and Observability
AWS Container Insights
# container-insights.yaml
apiVersion: v1
kind: Namespace
metadata:
name: amazon-cloudwatch
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cloudwatch-agent
namespace: amazon-cloudwatch
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/CloudWatchAgentServerRole
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudwatch-agent
namespace: amazon-cloudwatch
spec:
selector:
matchLabels:
name: cloudwatch-agent
template:
metadata:
labels:
name: cloudwatch-agent
spec:
serviceAccountName: cloudwatch-agent
containers:
- name: cloudwatch-agent
image: amazon/cloudwatch-agent:latest
resources:
limits:
cpu: 200m
memory: 200Mi
env:
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: HOST_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
Custom Metrics with CloudWatch
# custom_metrics.py
import boto3
from kubernetes import client, config
def publish_cluster_metrics():
"""Publish custom EKS metrics to CloudWatch"""
# Load Kubernetes config
config.load_incluster_config()
v1 = client.CoreV1Api()
# Initialize CloudWatch client
cloudwatch = boto3.client('cloudwatch', region_name='us-east-1')
# Get cluster metrics
nodes = v1.list_node()
pods = v1.list_pod_for_all_namespaces()
# Calculate metrics
total_nodes = len(nodes.items)
ready_nodes = sum(1 for node in nodes.items
if any(c.type == 'Ready' and c.status == 'True'
for c in node.status.conditions))
total_pods = len(pods.items)
running_pods = sum(1 for pod in pods.items
if pod.status.phase == 'Running')
# Publish metrics
cloudwatch.put_metric_data(
Namespace='EKS/Custom',
MetricData=[
{
'MetricName': 'NodeCount',
'Value': total_nodes,
'Unit': 'Count'
},
{
'MetricName': 'ReadyNodes',
'Value': ready_nodes,
'Unit': 'Count'
},
{
'MetricName': 'PodCount',
'Value': total_pods,
'Unit': 'Count'
},
{
'MetricName': 'RunningPods',
'Value': running_pods,
'Unit': 'Count'
}
]
)
Cost Optimization
Spot Instance Configuration
# spot-node-pool.yaml
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: production-cluster
region: us-east-1
nodeGroups:
- name: spot-node-group
instancesDistribution:
maxPrice: 0.05
instanceTypes:
- t3a.large
- t3.large
- t2.large
onDemandBaseCapacity: 0
onDemandPercentageAboveBaseCapacity: 0
spotInstancePools: 3
minSize: 2
maxSize: 10
desiredCapacity: 3
volumeSize: 100
volumeType: gp3
labels:
lifecycle: Ec2Spot
capacity-type: spot
taints:
- key: spotInstance
value: "true"
effect: PreferNoSchedule
tags:
k8s.io/cluster-autoscaler/enabled: "true"
k8s.io/cluster-autoscaler/production-cluster: "owned"
Resource Quotas
# resource-quotas.yaml
apiVersion: v1
kind: ResourceQuota
metadata:
name: compute-quota
namespace: production
spec:
hard:
requests.cpu: "100"
requests.memory: 200Gi
limits.cpu: "200"
limits.memory: 400Gi
persistentvolumeclaims: "10"
services.loadbalancers: "2"
---
apiVersion: v1
kind: LimitRange
metadata:
name: cpu-mem-limit-range
namespace: production
spec:
limits:
- default:
cpu: "1"
memory: "1Gi"
defaultRequest:
cpu: "100m"
memory: "128Mi"
max:
cpu: "4"
memory: "8Gi"
min:
cpu: "50m"
memory: "64Mi"
type: Container
Backup and Disaster Recovery
Velero for Cluster Backup
#!/bin/bash
# install-velero.sh
# Install Velero
velero install \
--provider aws \
--plugins velero/velero-plugin-for-aws:v1.7.0 \
--bucket eks-backups \
--backup-location-config region=us-east-1 \
--snapshot-location-config region=us-east-1 \
--secret-file ./credentials-velero
# Create backup schedule
velero schedule create daily-backup \
--schedule="0 2 * * *" \
--ttl 720h \
--include-namespaces production,staging
# Create disaster recovery backup
velero backup create dr-backup \
--include-namespaces production \
--wait
Cross-Region Replication
# dr_replication.py
import boto3
import subprocess
import json
def replicate_to_dr_region(source_region='us-east-1',
dr_region='us-west-2'):
"""Replicate EKS resources to DR region"""
# Export current cluster config
subprocess.run([
'kubectl', 'get', 'all', '--all-namespaces',
'-o', 'yaml', '>', 'cluster-backup.yaml'
])
# Copy ECR images to DR region
ecr_source = boto3.client('ecr', region_name=source_region)
ecr_dr = boto3.client('ecr', region_name=dr_region)
repos = ecr_source.describe_repositories()
for repo in repos['repositories']:
# Create repository in DR region
try:
ecr_dr.create_repository(
repositoryName=repo['repositoryName']
)
except ecr_dr.exceptions.RepositoryAlreadyExistsException:
pass
# Set up replication
ecr_source.put_replication_configuration(
replicationConfiguration={
'rules': [{
'destinations': [{
'region': dr_region,
'registryId': repo['registryId']
}]
}]
}
)
Load Balancing and Ingress
AWS Load Balancer Controller
# alb-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: main-ingress
namespace: production
annotations:
kubernetes.io/ingress.class: alb
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/healthcheck-path: /health
alb.ingress.kubernetes.io/healthcheck-interval-seconds: '15'
alb.ingress.kubernetes.io/healthcheck-timeout-seconds: '5'
alb.ingress.kubernetes.io/healthy-threshold-count: '2'
alb.ingress.kubernetes.io/unhealthy-threshold-count: '2'
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
alb.ingress.kubernetes.io/ssl-redirect: '443'
alb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:us-east-1:123456789:certificate/abc
alb.ingress.kubernetes.io/wafv2-acl-arn: arn:aws:wafv2:us-east-1:123456789:global/webacl/my-waf/abc
spec:
rules:
- host: api.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: api-service
port:
number: 8080
Secrets Management
AWS Secrets Manager Integration
# external-secrets.yaml
apiVersion: external-secrets.io/v1beta1
kind: SecretStore
metadata:
name: aws-secrets
namespace: production
spec:
provider:
aws:
service: SecretsManager
region: us-east-1
auth:
jwt:
serviceAccountRef:
name: external-secrets-sa
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: app-secrets
namespace: production
spec:
refreshInterval: 1h
secretStoreRef:
name: aws-secrets
kind: SecretStore
target:
name: app-secrets
creationPolicy: Owner
data:
- secretKey: database-password
remoteRef:
key: production/rds/password
- secretKey: api-key
remoteRef:
key: production/api/key
Logging with Fluent Bit
Centralized Logging to CloudWatch
# fluent-bit.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: fluent-bit-config
namespace: amazon-cloudwatch
data:
fluent-bit.conf: |
[SERVICE]
Flush 5
Log_Level info
Daemon off
[INPUT]
Name tail
Tag kube.*
Path /var/log/containers/*.log
Parser docker
DB /var/log/flb_kube.db
Mem_Buf_Limit 50MB
Skip_Long_Lines On
[FILTER]
Name kubernetes
Match kube.*
Kube_URL https://kubernetes.default.svc:443
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
Merge_Log On
K8S-Logging.Parser On
K8S-Logging.Exclude On
[OUTPUT]
Name cloudwatch_logs
Match *
region us-east-1
log_group_name /eks/production-cluster
log_stream_prefix ${HOSTNAME}-
auto_create_group true
Troubleshooting Guide
Common Issues and Solutions
# Debug node issues
kubectl describe node <node-name>
kubectl get events --sort-by='.lastTimestamp'
# Check pod scheduling issues
kubectl describe pod <pod-name> -n <namespace>
kubectl get pods --all-namespaces -o wide | grep -E "Pending|Error|CrashLoop"
# Investigate resource constraints
kubectl top nodes
kubectl top pods --all-namespaces
# Check cluster autoscaler logs
kubectl logs -n kube-system deployment/cluster-autoscaler
# Verify IRSA configuration
aws sts assume-role-with-web-identity \
--role-arn $ROLE_ARN \
--role-session-name test \
--web-identity-token file://$TOKEN_FILE \
--duration-seconds 3600
# Test network connectivity
kubectl run tmp-shell --rm -i --tty --image nicolaka/netshoot -- /bin/bash
Best Practices Checklist
Conclusion
Running EKS in production requires careful attention to security, scalability, and cost optimization. Start with a solid foundation using managed services where possible, implement comprehensive monitoring, and always plan for failure. With these practices, your EKS clusters will be resilient, secure, and cost-effective.