Master AWS EKS for Production Kubernetes

Deploy and manage production-grade Kubernetes clusters on AWS EKS with security, scalability, and cost optimization strategies.

Running Kubernetes on AWS EKS seems straightforward until you face your first production incident at 3 AM. After managing EKS clusters processing millions of requests daily, I've learned what separates hobby projects from enterprise-grade deployments. Here's your complete guide to EKS excellence.

EKS Architecture Design

Multi-AZ High Availability Setup

# eks-cluster.tf
module "eks" {
  source  = "terraform-aws-modules/eks/aws"
  version = "~> 19.0"

  cluster_name    = "production-cluster"
  cluster_version = "1.28"

  vpc_id     = module.vpc.vpc_id
  subnet_ids = module.vpc.private_subnets

  # Enable IRSA for pod-level AWS permissions
  enable_irsa = true

  # Cluster endpoint access
  cluster_endpoint_public_access  = true
  cluster_endpoint_private_access = true
  cluster_endpoint_public_access_cidrs = ["10.0.0.0/8"]

  # Enable cluster encryption
  cluster_encryption_config = {
    provider_key_arn = aws_kms_key.eks.arn
    resources        = ["secrets"]
  }

  # Cluster addons
  cluster_addons = {
    coredns = {
      most_recent = true
    }
    kube-proxy = {
      most_recent = true
    }
    vpc-cni = {
      most_recent = true
      configuration_values = jsonencode({
        env = {
          ENABLE_PREFIX_DELEGATION = "true"
          WARM_PREFIX_TARGET       = "1"
        }
      })
    }
    aws-ebs-csi-driver = {
      most_recent = true
      service_account_role_arn = module.ebs_csi_irsa.iam_role_arn
    }
  }

  # Node groups configuration
  eks_managed_node_groups = {
    general = {
      desired_size = 3
      min_size     = 3
      max_size     = 10

      instance_types = ["t3a.large"]
      capacity_type  = "SPOT"

      labels = {
        Environment = "production"
        Type        = "general"
      }

      taints = []

      update_config = {
        max_unavailable_percentage = 33
      }
    }

    compute = {
      desired_size = 2
      min_size     = 0
      max_size     = 20

      instance_types = ["c5.2xlarge", "c5a.2xlarge"]
      capacity_type  = "SPOT"

      labels = {
        Environment = "production"
        Type        = "compute"
      }

      taints = [{
        key    = "compute"
        value  = "true"
        effect = "NO_SCHEDULE"
      }]
    }
  }
}

IRSA (IAM Roles for Service Accounts)

Pod-Level AWS Permissions

# irsa.tf
module "s3_access_irsa" {
  source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"

  role_name = "s3-access-irsa"

  role_policy_arns = {
    policy = aws_iam_policy.s3_access.arn
  }

  oidc_providers = {
    main = {
      provider_arn               = module.eks.oidc_provider_arn
      namespace_service_accounts = ["production:s3-app"]
    }
  }
}

resource "aws_iam_policy" "s3_access" {
  name = "s3-access-policy"

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "s3:GetObject",
          "s3:PutObject",
          "s3:DeleteObject"
        ]
        Resource = "arn:aws:s3:::my-app-bucket/*"
      }
    ]
  })
}

ServiceAccount Configuration

# service-account.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: s3-app
  namespace: production
  annotations:
    eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/s3-access-irsa
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: s3-app
spec:
  template:
    spec:
      serviceAccountName: s3-app
      containers:
      - name: app
        image: my-app:latest
        env:
        - name: AWS_REGION
          value: us-east-1

Auto-Scaling Strategies

Cluster Autoscaler

# cluster-autoscaler.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cluster-autoscaler
  namespace: kube-system
spec:
  template:
    spec:
      serviceAccountName: cluster-autoscaler
      containers:
      - name: cluster-autoscaler
        image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.28.0
        command:
        - ./cluster-autoscaler
        - --v=4
        - --stderrthreshold=info
        - --cloud-provider=aws
        - --skip-nodes-with-local-storage=false
        - --expander=least-waste
        - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/production-cluster
        - --balance-similar-node-groups
        - --skip-nodes-with-system-pods=false
        env:
        - name: AWS_REGION
          value: us-east-1

Karpenter for Intelligent Scaling

# karpenter-provisioner.yaml
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
metadata:
  name: default
spec:
  requirements:
    - key: karpenter.sh/capacity-type
      operator: In
      values: ["spot", "on-demand"]
    - key: node.kubernetes.io/instance-type
      operator: In
      values: 
        - t3a.medium
        - t3a.large
        - t3a.xlarge
        - c5.large
        - c5.xlarge
  limits:
    resources:
      cpu: 1000
      memory: 1000Gi
  providerRef:
    name: default
  ttlSecondsAfterEmpty: 30
  ttlSecondsUntilExpired: 2592000 # 30 days
---
apiVersion: karpenter.k8s.aws/v1alpha1
kind: AWSNodeInstanceProfile
metadata:
  name: default
spec:
  subnetSelector:
    karpenter.sh/discovery: production-cluster
  securityGroupSelector:
    karpenter.sh/discovery: production-cluster
  instanceStorePolicy: RAID0
  userData: |
    #!/bin/bash
    /etc/eks/bootstrap.sh production-cluster
    echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
    sysctl -p

Security Best Practices

Network Policies

# network-policies.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: backend-netpol
  namespace: production
spec:
  podSelector:
    matchLabels:
      tier: backend
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - podSelector:
        matchLabels:
          tier: frontend
    ports:
    - protocol: TCP
      port: 8080
  egress:
  - to:
    - podSelector:
        matchLabels:
          tier: database
    ports:
    - protocol: TCP
      port: 3306
  - to:
    - namespaceSelector: {}
      podSelector:
        matchLabels:
          k8s-app: kube-dns
    ports:
    - protocol: UDP
      port: 53

Pod Security Standards

# pod-security-policy.yaml
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: restricted
spec:
  privileged: false
  allowPrivilegeEscalation: false
  requiredDropCapabilities:
    - ALL
  volumes:
    - 'configMap'
    - 'emptyDir'
    - 'projected'
    - 'secret'
    - 'downwardAPI'
    - 'persistentVolumeClaim'
  hostNetwork: false
  hostIPC: false
  hostPID: false
  runAsUser:
    rule: 'MustRunAsNonRoot'
  seLinux:
    rule: 'RunAsAny'
  supplementalGroups:
    rule: 'RunAsAny'
  fsGroup:
    rule: 'RunAsAny'
  readOnlyRootFilesystem: true

Monitoring and Observability

AWS Container Insights

# container-insights.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: amazon-cloudwatch
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cloudwatch-agent
  namespace: amazon-cloudwatch
  annotations:
    eks.amazonaws.com/role-arn: arn:aws:iam::123456789:role/CloudWatchAgentServerRole
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: cloudwatch-agent
  namespace: amazon-cloudwatch
spec:
  selector:
    matchLabels:
      name: cloudwatch-agent
  template:
    metadata:
      labels:
        name: cloudwatch-agent
    spec:
      serviceAccountName: cloudwatch-agent
      containers:
      - name: cloudwatch-agent
        image: amazon/cloudwatch-agent:latest
        resources:
          limits:
            cpu: 200m
            memory: 200Mi
        env:
        - name: HOST_IP
          valueFrom:
            fieldRef:
              fieldPath: status.hostIP
        - name: HOST_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        - name: K8S_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace

Custom Metrics with CloudWatch

# custom_metrics.py
import boto3
from kubernetes import client, config

def publish_cluster_metrics():
    """Publish custom EKS metrics to CloudWatch"""
    
    # Load Kubernetes config
    config.load_incluster_config()
    v1 = client.CoreV1Api()
    
    # Initialize CloudWatch client
    cloudwatch = boto3.client('cloudwatch', region_name='us-east-1')
    
    # Get cluster metrics
    nodes = v1.list_node()
    pods = v1.list_pod_for_all_namespaces()
    
    # Calculate metrics
    total_nodes = len(nodes.items)
    ready_nodes = sum(1 for node in nodes.items 
                     if any(c.type == 'Ready' and c.status == 'True' 
                           for c in node.status.conditions))
    
    total_pods = len(pods.items)
    running_pods = sum(1 for pod in pods.items 
                      if pod.status.phase == 'Running')
    
    # Publish metrics
    cloudwatch.put_metric_data(
        Namespace='EKS/Custom',
        MetricData=[
            {
                'MetricName': 'NodeCount',
                'Value': total_nodes,
                'Unit': 'Count'
            },
            {
                'MetricName': 'ReadyNodes',
                'Value': ready_nodes,
                'Unit': 'Count'
            },
            {
                'MetricName': 'PodCount',
                'Value': total_pods,
                'Unit': 'Count'
            },
            {
                'MetricName': 'RunningPods',
                'Value': running_pods,
                'Unit': 'Count'
            }
        ]
    )

Cost Optimization

Spot Instance Configuration

# spot-node-pool.yaml
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
  name: production-cluster
  region: us-east-1

nodeGroups:
  - name: spot-node-group
    instancesDistribution:
      maxPrice: 0.05
      instanceTypes:
        - t3a.large
        - t3.large
        - t2.large
      onDemandBaseCapacity: 0
      onDemandPercentageAboveBaseCapacity: 0
      spotInstancePools: 3
    minSize: 2
    maxSize: 10
    desiredCapacity: 3
    volumeSize: 100
    volumeType: gp3
    labels:
      lifecycle: Ec2Spot
      capacity-type: spot
    taints:
      - key: spotInstance
        value: "true"
        effect: PreferNoSchedule
    tags:
      k8s.io/cluster-autoscaler/enabled: "true"
      k8s.io/cluster-autoscaler/production-cluster: "owned"

Resource Quotas

# resource-quotas.yaml
apiVersion: v1
kind: ResourceQuota
metadata:
  name: compute-quota
  namespace: production
spec:
  hard:
    requests.cpu: "100"
    requests.memory: 200Gi
    limits.cpu: "200"
    limits.memory: 400Gi
    persistentvolumeclaims: "10"
    services.loadbalancers: "2"
---
apiVersion: v1
kind: LimitRange
metadata:
  name: cpu-mem-limit-range
  namespace: production
spec:
  limits:
  - default:
      cpu: "1"
      memory: "1Gi"
    defaultRequest:
      cpu: "100m"
      memory: "128Mi"
    max:
      cpu: "4"
      memory: "8Gi"
    min:
      cpu: "50m"
      memory: "64Mi"
    type: Container

Backup and Disaster Recovery

Velero for Cluster Backup

#!/bin/bash
# install-velero.sh

# Install Velero
velero install \
    --provider aws \
    --plugins velero/velero-plugin-for-aws:v1.7.0 \
    --bucket eks-backups \
    --backup-location-config region=us-east-1 \
    --snapshot-location-config region=us-east-1 \
    --secret-file ./credentials-velero

# Create backup schedule
velero schedule create daily-backup \
    --schedule="0 2 * * *" \
    --ttl 720h \
    --include-namespaces production,staging

# Create disaster recovery backup
velero backup create dr-backup \
    --include-namespaces production \
    --wait

Cross-Region Replication

# dr_replication.py
import boto3
import subprocess
import json

def replicate_to_dr_region(source_region='us-east-1', 
                           dr_region='us-west-2'):
    """Replicate EKS resources to DR region"""
    
    # Export current cluster config
    subprocess.run([
        'kubectl', 'get', 'all', '--all-namespaces',
        '-o', 'yaml', '>', 'cluster-backup.yaml'
    ])
    
    # Copy ECR images to DR region
    ecr_source = boto3.client('ecr', region_name=source_region)
    ecr_dr = boto3.client('ecr', region_name=dr_region)
    
    repos = ecr_source.describe_repositories()
    
    for repo in repos['repositories']:
        # Create repository in DR region
        try:
            ecr_dr.create_repository(
                repositoryName=repo['repositoryName']
            )
        except ecr_dr.exceptions.RepositoryAlreadyExistsException:
            pass
        
        # Set up replication
        ecr_source.put_replication_configuration(
            replicationConfiguration={
                'rules': [{
                    'destinations': [{
                        'region': dr_region,
                        'registryId': repo['registryId']
                    }]
                }]
            }
        )

Load Balancing and Ingress

AWS Load Balancer Controller

# alb-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: main-ingress
  namespace: production
  annotations:
    kubernetes.io/ingress.class: alb
    alb.ingress.kubernetes.io/scheme: internet-facing
    alb.ingress.kubernetes.io/target-type: ip
    alb.ingress.kubernetes.io/healthcheck-path: /health
    alb.ingress.kubernetes.io/healthcheck-interval-seconds: '15'
    alb.ingress.kubernetes.io/healthcheck-timeout-seconds: '5'
    alb.ingress.kubernetes.io/healthy-threshold-count: '2'
    alb.ingress.kubernetes.io/unhealthy-threshold-count: '2'
    alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
    alb.ingress.kubernetes.io/ssl-redirect: '443'
    alb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:us-east-1:123456789:certificate/abc
    alb.ingress.kubernetes.io/wafv2-acl-arn: arn:aws:wafv2:us-east-1:123456789:global/webacl/my-waf/abc
spec:
  rules:
  - host: api.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: api-service
            port:
              number: 8080

Secrets Management

AWS Secrets Manager Integration

# external-secrets.yaml
apiVersion: external-secrets.io/v1beta1
kind: SecretStore
metadata:
  name: aws-secrets
  namespace: production
spec:
  provider:
    aws:
      service: SecretsManager
      region: us-east-1
      auth:
        jwt:
          serviceAccountRef:
            name: external-secrets-sa
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: app-secrets
  namespace: production
spec:
  refreshInterval: 1h
  secretStoreRef:
    name: aws-secrets
    kind: SecretStore
  target:
    name: app-secrets
    creationPolicy: Owner
  data:
  - secretKey: database-password
    remoteRef:
      key: production/rds/password
  - secretKey: api-key
    remoteRef:
      key: production/api/key

Logging with Fluent Bit

Centralized Logging to CloudWatch

# fluent-bit.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: fluent-bit-config
  namespace: amazon-cloudwatch
data:
  fluent-bit.conf: |
    [SERVICE]
        Flush                     5
        Log_Level                 info
        Daemon                    off

    [INPUT]
        Name                      tail
        Tag                       kube.*
        Path                      /var/log/containers/*.log
        Parser                    docker
        DB                        /var/log/flb_kube.db
        Mem_Buf_Limit            50MB
        Skip_Long_Lines          On

    [FILTER]
        Name                      kubernetes
        Match                     kube.*
        Kube_URL                 https://kubernetes.default.svc:443
        Kube_CA_File             /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File          /var/run/secrets/kubernetes.io/serviceaccount/token
        Merge_Log                On
        K8S-Logging.Parser       On
        K8S-Logging.Exclude      On

    [OUTPUT]
        Name                      cloudwatch_logs
        Match                     *
        region                    us-east-1
        log_group_name           /eks/production-cluster
        log_stream_prefix        ${HOSTNAME}-
        auto_create_group        true

Troubleshooting Guide

Common Issues and Solutions

# Debug node issues
kubectl describe node <node-name>
kubectl get events --sort-by='.lastTimestamp'

# Check pod scheduling issues
kubectl describe pod <pod-name> -n <namespace>
kubectl get pods --all-namespaces -o wide | grep -E "Pending|Error|CrashLoop"

# Investigate resource constraints
kubectl top nodes
kubectl top pods --all-namespaces

# Check cluster autoscaler logs
kubectl logs -n kube-system deployment/cluster-autoscaler

# Verify IRSA configuration
aws sts assume-role-with-web-identity \
  --role-arn $ROLE_ARN \
  --role-session-name test \
  --web-identity-token file://$TOKEN_FILE \
  --duration-seconds 3600

# Test network connectivity
kubectl run tmp-shell --rm -i --tty --image nicolaka/netshoot -- /bin/bash

Best Practices Checklist

Use managed node groups for easier updates
Implement IRSA for pod-level permissions
Enable cluster autoscaling
Configure network policies
Set up monitoring with Container Insights
Implement backup strategy with Velero
Use Spot instances for non-critical workloads
Configure resource quotas and limits
Enable audit logging
Implement secrets management
Set up centralized logging
Use ALB for ingress
Regular security updates
Test disaster recovery procedures
Monitor costs with Cost Explorer

Conclusion

Running EKS in production requires careful attention to security, scalability, and cost optimization. Start with a solid foundation using managed services where possible, implement comprehensive monitoring, and always plan for failure. With these practices, your EKS clusters will be resilient, secure, and cost-effective.