Infrastructure as Code
Infrastructure as Code
Implement Infrastructure as Code (IaC) to automate infrastructure provisioning, ensure consistency, and enable version-controlled infrastructure management.
IaC Overview
Infrastructure as Code treats infrastructure configuration as software code, enabling automated provisioning, version control, and consistent environments.
Benefits of IaC
- Consistency: Eliminate configuration drift
- Repeatability: Deploy identical environments
- Version Control: Track infrastructure changes
- Automation: Reduce manual errors
- Documentation: Code serves as documentation
- Testing: Validate infrastructure before deployment
Terraform
Project Structure
terraform/
├── environments/
│ ├── dev/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── terraform.tfvars
│ ├── staging/
│ └── production/
├── modules/
│ ├── networking/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── outputs.tf
│ ├── compute/
│ ├── database/
│ └── security/
├── global/
│ └── state/
└── scripts/
Complete AWS Infrastructure
# main.tf - Root module
terraform {
required_version = ">= 1.5.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "terraform-state-prod"
key = "infrastructure/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-state-lock"
}
}
provider "aws" {
region = var.aws_region
default_tags {
tags = {
Environment = var.environment
ManagedBy = "Terraform"
Project = var.project_name
}
}
}
# VPC Module
module "vpc" {
source = "./modules/networking"
vpc_cidr = var.vpc_cidr
availability_zones = var.availability_zones
private_subnet_cidrs = var.private_subnet_cidrs
public_subnet_cidrs = var.public_subnet_cidrs
environment = var.environment
}
# EKS Cluster
module "eks" {
source = "./modules/compute/eks"
cluster_name = "${var.project_name}-${var.environment}"
cluster_version = "1.28"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnet_ids
node_groups = {
general = {
desired_capacity = 3
min_capacity = 2
max_capacity = 10
instance_types = ["t3.medium"]
labels = {
role = "general"
}
taints = []
}
spot = {
desired_capacity = 2
min_capacity = 0
max_capacity = 5
instance_types = ["t3.large", "t3a.large"]
capacity_type = "SPOT"
labels = {
role = "spot"
}
taints = [{
key = "spot"
value = "true"
effect = "NO_SCHEDULE"
}]
}
}
}
# RDS Database
module "database" {
source = "./modules/database/rds"
identifier = "${var.project_name}-${var.environment}"
engine = "postgres"
engine_version = "15.4"
instance_class = var.db_instance_class
allocated_storage = 100
max_allocated_storage = 500
storage_encrypted = true
database_name = var.project_name
username = "dbadmin"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnet_ids
allowed_security_groups = [module.eks.worker_security_group_id]
backup_retention_period = 30
backup_window = "03:00-04:00"
maintenance_window = "sun:04:00-sun:05:00"
enabled_cloudwatch_logs_exports = ["postgresql"]
multi_az = var.environment == "production" ? true : false
deletion_protection = var.environment == "production" ? true : false
skip_final_snapshot = var.environment != "production" ? true : false
}
Networking Module
# modules/networking/main.tf
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_hostnames = true
enable_dns_support = true
tags = {
Name = "${var.environment}-vpc"
}
}
# Internet Gateway
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = {
Name = "${var.environment}-igw"
}
}
# Public Subnets
resource "aws_subnet" "public" {
count = length(var.public_subnet_cidrs)
vpc_id = aws_vpc.main.id
cidr_block = var.public_subnet_cidrs[count.index]
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = true
tags = {
Name = "${var.environment}-public-${var.availability_zones[count.index]}"
"kubernetes.io/cluster/${var.environment}" = "shared"
"kubernetes.io/role/elb" = "1"
}
}
# Private Subnets
resource "aws_subnet" "private" {
count = length(var.private_subnet_cidrs)
vpc_id = aws_vpc.main.id
cidr_block = var.private_subnet_cidrs[count.index]
availability_zone = var.availability_zones[count.index]
tags = {
Name = "${var.environment}-private-${var.availability_zones[count.index]}"
"kubernetes.io/cluster/${var.environment}" = "shared"
"kubernetes.io/role/internal-elb" = "1"
}
}
# NAT Gateways
resource "aws_eip" "nat" {
count = length(var.availability_zones)
domain = "vpc"
tags = {
Name = "${var.environment}-nat-${var.availability_zones[count.index]}"
}
}
resource "aws_nat_gateway" "main" {
count = length(var.availability_zones)
subnet_id = aws_subnet.public[count.index].id
allocation_id = aws_eip.nat[count.index].id
tags = {
Name = "${var.environment}-nat-${var.availability_zones[count.index]}"
}
depends_on = [aws_internet_gateway.main]
}
# Route Tables
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = {
Name = "${var.environment}-public-rt"
}
}
resource "aws_route_table" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main[count.index].id
}
tags = {
Name = "${var.environment}-private-rt-${var.availability_zones[count.index]}"
}
}
# Route Table Associations
resource "aws_route_table_association" "public" {
count = length(var.public_subnet_cidrs)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table_association" "private" {
count = length(var.private_subnet_cidrs)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private[count.index].id
}
Ansible
Playbook Structure
ansible/
├── inventories/
│ ├── production/
│ │ ├── hosts.yml
│ │ └── group_vars/
│ └── staging/
├── roles/
│ ├── common/
│ ├── docker/
│ ├── kubernetes/
│ └── monitoring/
├── playbooks/
│ ├── site.yml
│ ├── deploy.yml
│ └── maintenance.yml
└── ansible.cfg
Configuration Management Playbook
# playbooks/site.yml
---
- name: Configure all servers
hosts: all
become: yes
gather_facts: yes
pre_tasks:
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
when: ansible_os_family == "Debian"
- name: Ensure Python is installed
raw: test -e /usr/bin/python3 || (apt -y update && apt install -y python3-minimal)
changed_when: false
roles:
- common
- security
- name: Configure Kubernetes nodes
hosts: kubernetes
become: yes
roles:
- docker
- kubernetes
tasks:
- name: Configure kernel parameters
sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
sysctl_set: yes
state: present
reload: yes
loop:
- { key: 'net.bridge.bridge-nf-call-iptables', value: '1' }
- { key: 'net.bridge.bridge-nf-call-ip6tables', value: '1' }
- { key: 'net.ipv4.ip_forward', value: '1' }
- name: Configure monitoring
hosts: monitoring
become: yes
roles:
- monitoring
vars:
prometheus_version: "2.45.0"
grafana_version: "10.0.0"
alertmanager_version: "0.25.0"
Security Hardening Role
# roles/security/tasks/main.yml
---
- name: Configure firewall
include_tasks: firewall.yml
- name: SSH hardening
include_tasks: ssh.yml
- name: Configure fail2ban
include_tasks: fail2ban.yml
- name: System hardening
include_tasks: system.yml
- name: Install and configure auditd
include_tasks: auditd.yml
# roles/security/tasks/ssh.yml
---
- name: Configure SSH
lineinfile:
path: /etc/ssh/sshd_config
regexp: "^{{ item.key }}"
line: "{{ item.key }} {{ item.value }}"
state: present
loop:
- { key: 'PermitRootLogin', value: 'no' }
- { key: 'PasswordAuthentication', value: 'no' }
- { key: 'PubkeyAuthentication', value: 'yes' }
- { key: 'X11Forwarding', value: 'no' }
- { key: 'MaxAuthTries', value: '3' }
- { key: 'Protocol', value: '2' }
- { key: 'ClientAliveInterval', value: '300' }
- { key: 'ClientAliveCountMax', value: '2' }
notify: restart sshd
- name: Set SSH host key algorithms
lineinfile:
path: /etc/ssh/sshd_config
regexp: '^HostKeyAlgorithms'
line: 'HostKeyAlgorithms ssh-ed25519,rsa-sha2-512,rsa-sha2-256'
notify: restart sshd
Pulumi
TypeScript Infrastructure
// index.ts
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
import * as awsx from "@pulumi/awsx";
import * as k8s from "@pulumi/kubernetes";
const config = new pulumi.Config();
const environment = pulumi.getStack();
// Create VPC
const vpc = new awsx.ec2.Vpc(`${environment}-vpc`, {
numberOfAvailabilityZones: 3,
natGateways: {
strategy: "HighlyAvailable",
},
tags: {
Environment: environment,
},
});
// Create EKS cluster
const cluster = new aws.eks.Cluster(`${environment}-cluster`, {
vpcId: vpc.vpcId,
subnetIds: vpc.privateSubnetIds,
instanceType: "t3.medium",
desiredCapacity: 3,
minSize: 2,
maxSize: 10,
nodeAssociatePublicIpAddress: false,
deployDashboard: false,
});
// Create RDS instance
const dbSubnetGroup = new aws.rds.SubnetGroup(`${environment}-db-subnet`, {
subnetIds: vpc.privateSubnetIds,
});
const db = new aws.rds.Instance(`${environment}-database`, {
engine: "postgres",
engineVersion: "15.4",
instanceClass: aws.rds.InstanceType.T3_Micro,
allocatedStorage: 20,
dbSubnetGroupName: dbSubnetGroup.name,
vpcSecurityGroupIds: [cluster.nodeSecurityGroup.id],
username: "dbadmin",
password: config.requireSecret("dbPassword"),
skipFinalSnapshot: environment !== "production",
});
// Deploy application to Kubernetes
const k8sProvider = new k8s.Provider("k8s", {
kubeconfig: cluster.kubeconfig,
});
const appNamespace = new k8s.core.v1.Namespace("app", {
metadata: {
name: "myapp",
},
}, { provider: k8sProvider });
const appDeployment = new k8s.apps.v1.Deployment("app", {
metadata: {
namespace: appNamespace.metadata.name,
},
spec: {
replicas: 3,
selector: {
matchLabels: {
app: "myapp",
},
},
template: {
metadata: {
labels: {
app: "myapp",
},
},
spec: {
containers: [{
name: "app",
image: "myapp:latest",
env: [
{
name: "DATABASE_URL",
value: pulumi.interpolate`postgresql://dbadmin:${config.requireSecret("dbPassword")}@${db.endpoint}/myapp`,
},
],
}],
},
},
},
}, { provider: k8sProvider });
// Export outputs
export const vpcId = vpc.vpcId;
export const clusterName = cluster.name;
export const kubeconfig = cluster.kubeconfig;
export const dbEndpoint = db.endpoint;
CloudFormation
Complete Stack Template
# infrastructure.yaml
AWSTemplateFormatVersion: '2010-09-09'
Description: 'Complete infrastructure stack'
Parameters:
Environment:
Type: String
Default: development
AllowedValues:
- development
- staging
- production
KeyName:
Type: AWS::EC2::KeyPair::KeyName
Description: EC2 Key Pair for SSH access
Mappings:
EnvironmentConfig:
development:
InstanceType: t3.micro
MinSize: 1
MaxSize: 3
staging:
InstanceType: t3.small
MinSize: 2
MaxSize: 5
production:
InstanceType: t3.medium
MinSize: 3
MaxSize: 10
Resources:
VPC:
Type: AWS::CloudFormation::Stack
Properties:
TemplateURL: https://s3.amazonaws.com/cloudformation-templates/vpc.yaml
Parameters:
EnvironmentName: !Ref Environment
VpcCIDR: 10.0.0.0/16
PublicSubnet1CIDR: 10.0.1.0/24
PublicSubnet2CIDR: 10.0.2.0/24
PrivateSubnet1CIDR: 10.0.11.0/24
PrivateSubnet2CIDR: 10.0.12.0/24
SecurityGroups:
Type: AWS::CloudFormation::Stack
Properties:
TemplateURL: https://s3.amazonaws.com/cloudformation-templates/security-groups.yaml
Parameters:
EnvironmentName: !Ref Environment
VPC: !GetAtt VPC.Outputs.VPC
ApplicationLoadBalancer:
Type: AWS::ElasticLoadBalancingV2::LoadBalancer
Properties:
Name: !Sub ${Environment}-alb
Subnets:
- !GetAtt VPC.Outputs.PublicSubnet1
- !GetAtt VPC.Outputs.PublicSubnet2
SecurityGroups:
- !GetAtt SecurityGroups.Outputs.LoadBalancerSecurityGroup
AutoScalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
VPCZoneIdentifier:
- !GetAtt VPC.Outputs.PrivateSubnet1
- !GetAtt VPC.Outputs.PrivateSubnet2
LaunchTemplate:
LaunchTemplateId: !Ref LaunchTemplate
Version: !GetAtt LaunchTemplate.LatestVersionNumber
MinSize: !FindInMap [EnvironmentConfig, !Ref Environment, MinSize]
MaxSize: !FindInMap [EnvironmentConfig, !Ref Environment, MaxSize]
TargetGroupARNs:
- !Ref TargetGroup
HealthCheckType: ELB
HealthCheckGracePeriod: 300
Tags:
- Key: Name
Value: !Sub ${Environment}-instance
PropagateAtLaunch: true
LaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateName: !Sub ${Environment}-launch-template
LaunchTemplateData:
ImageId: !Ref LatestAmiId
InstanceType: !FindInMap [EnvironmentConfig, !Ref Environment, InstanceType]
KeyName: !Ref KeyName
SecurityGroupIds:
- !GetAtt SecurityGroups.Outputs.InstanceSecurityGroup
UserData:
Fn::Base64: !Sub |
#!/bin/bash
yum update -y
yum install -y docker
systemctl start docker
systemctl enable docker
docker run -d -p 80:8080 myapp:latest
Outputs:
LoadBalancerURL:
Description: URL of the Application Load Balancer
Value: !Sub http://${ApplicationLoadBalancer.DNSName}
VPCId:
Description: VPC ID
Value: !GetAtt VPC.Outputs.VPC
GitOps with ArgoCD
Application Manifests
# argocd/applications/production.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: production-apps
namespace: argocd
spec:
project: production
source:
repoURL: https://github.com/myorg/infrastructure
targetRevision: main
path: k8s/production
destination:
server: https://kubernetes.default.svc
namespace: production
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
---
apiVersion: argoproj.io/v1alpha1
kind: AppProject
metadata:
name: production
namespace: argocd
spec:
description: Production applications
sourceRepos:
- https://github.com/myorg/*
destinations:
- namespace: production
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: ''
kind: Namespace
namespaceResourceWhitelist:
- group: '*'
kind: '*'
roles:
- name: admin
policies:
- p, proj:production:admin, applications, *, production/*, allow
groups:
- platform-team
Testing Infrastructure
Terraform Testing with Terratest
// test/vpc_test.go
package test
import (
"testing"
"github.com/gruntwork-io/terratest/modules/terraform"
"github.com/gruntwork-io/terratest/modules/aws"
"github.com/stretchr/testify/assert"
)
func TestVPCModule(t *testing.T) {
t.Parallel()
awsRegion := "us-east-1"
terraformOptions := &terraform.Options{
TerraformDir: "../modules/networking",
Vars: map[string]interface{}{
"vpc_cidr": "10.0.0.0/16",
"environment": "test",
"availability_zones": []string{"us-east-1a", "us-east-1b"},
},
}
defer terraform.Destroy(t, terraformOptions)
terraform.InitAndApply(t, terraformOptions)
vpcId := terraform.Output(t, terraformOptions, "vpc_id")
vpc := aws.GetVpcById(t, vpcId, awsRegion)
assert.Equal(t, "10.0.0.0/16", vpc.CidrBlock)
assert.True(t, vpc.EnableDnsSupport)
assert.True(t, vpc.EnableDnsHostnames)
subnets := aws.GetSubnetsForVpc(t, vpcId, awsRegion)
assert.Equal(t, 4, len(subnets))
}
Infrastructure Policies
Open Policy Agent (OPA)
# policies/terraform.rego
package terraform.analysis
import input as tfplan
# Deny public S3 buckets
deny[msg] {
resource := tfplan.resource_changes[_]
resource.type == "aws_s3_bucket"
resource.change.after.acl == "public-read"
msg := sprintf("S3 bucket %v has public read access", [resource.address])
}
# Require encryption for RDS
deny[msg] {
resource := tfplan.resource_changes[_]
resource.type == "aws_db_instance"
not resource.change.after.storage_encrypted
msg := sprintf("RDS instance %v is not encrypted", [resource.address])
}
# Enforce tagging
deny[msg] {
resource := tfplan.resource_changes[_]
required_tags := {"Environment", "Owner", "Project"}
missing_tags := required_tags - {tag | resource.change.after.tags[tag]}
count(missing_tags) > 0
msg := sprintf("Resource %v is missing required tags: %v", [resource.address, missing_tags])
}
Best Practices
- Version Control: Store all IaC in Git repositories
- State Management: Use remote state with locking
- Modularization: Create reusable modules
- Environment Separation: Separate configs per environment
- Testing: Test infrastructure changes before applying
- Documentation: Document modules and variables
- Security: Never commit secrets, use secret management
- Validation: Use pre-commit hooks and CI checks
Related Resources
Note: This documentation is provided for reference purposes only. It reflects general best practices and industry-aligned guidelines, and any examples, claims, or recommendations are intended as illustrative—not definitive or binding.