Nomad 提供多种存储卷类型来满足不同的持久化存储需求:
| 类型 | 适用场景 | 持久化 | 跨节点 | 复杂度 |
|---|---|---|---|---|
| Host Volumes | 节点本地存储,简单数据持久化 | ✓ | ✗ | 低 |
| Docker Volumes | Docker 容器专用,由 Docker 管理 | ✓ | ✗ | 低 |
| CSI Volumes | 企业级存储,支持跨节点访问 | ✓ | ✓ | 高 |
Host Volumes 是最简单的存储方式,将宿主机上的目录挂载到任务容器中。
首先需要在 Nomad Client 配置文件中定义 host volume:
# /etc/nomad.d/client.hcl
client {
enabled = true
host_volume "mysql_data" {
path = "/opt/data/mysql"
read_only = false
}
host_volume "nginx_config" {
path = "/opt/config/nginx"
read_only = true
}
host_volume "shared_logs" {
path = "/var/log/apps"
read_only = false
}
}
配置说明:
host_volume 块定义一个命名的主机卷
path: 宿主机上的绝对路径(必须提前创建)
read_only: 是否只读(默认 false)
重要提示:
# 需要手动创建目录并设置权限
sudo mkdir -p /opt/data/mysql
sudo chown -R 1000:1000 /opt/data/mysql
sudo chmod 755 /opt/data/mysql
# 重启 Nomad Client 使配置生效
sudo systemctl restart nomad
job "mysql-example" {
datacenters = ["dc1"]
type = "service"
group "db" {
count = 1
# 声明需要使用的 volume
volume "mysql_data" {
type = "host"
source = "mysql_data" # 对应 client 配置中的名称
read_only = false
}
task "mysql" {
driver = "docker"
config {
image = "mysql:8.0"
ports = ["db"]
}
# 在任务中挂载 volume
volume_mount {
volume = "mysql_data" # 对应 group 中声明的 volume
destination = "/var/lib/mysql"
read_only = false
}
env {
MYSQL_ROOT_PASSWORD = "secretpassword"
}
resources {
cpu = 500
memory = 512
}
}
network {
port "db" {
static = 3306
}
}
}
}
job "web-app" {
datacenters = ["dc1"]
group "web" {
count = 1
# 声明多个 volume
volume "app_data" {
type = "host"
source = "shared_logs"
}
volume "config" {
type = "host"
source = "nginx_config"
read_only = true
}
task "nginx" {
driver = "docker"
config {
image = "nginx:latest"
ports = ["http"]
}
# 挂载配置文件(只读)
volume_mount {
volume = "config"
destination = "/etc/nginx/conf.d"
read_only = true
}
# 挂载日志目录(读写)
volume_mount {
volume = "app_data"
destination = "/var/log/nginx"
read_only = false
}
resources {
cpu = 200
memory = 128
}
}
network {
port "http" {
static = 80
}
}
}
}
job "config-app" {
group "app" {
volume "config" {
type = "host"
source = "nginx_config"
}
task "web" {
driver = "docker"
# 使用 template 生成配置文件
template {
data = <<EOH
server {
listen {{ env "NOMAD_PORT_http" }};
server_name {{ env "NOMAD_ALLOC_ID" }}.example.com;
location / {
root /usr/share/nginx/html;
}
}
EOH
destination = "local/nginx.conf"
}
volume_mount {
volume = "config"
destination = "/etc/nginx/conf.d"
}
config {
image = "nginx:latest"
ports = ["http"]
}
}
network {
port "http" {}
}
}
}
job "permission-example" {
group "app" {
volume "data" {
type = "host"
source = "mysql_data"
}
task "mysql" {
driver = "docker"
config {
image = "mysql:8.0"
# 指定容器运行用户
user = "1000:1000"
}
volume_mount {
volume = "data"
destination = "/var/lib/mysql"
read_only = false
}
env {
MYSQL_ROOT_PASSWORD = "password"
}
}
}
}
限制:
节点绑定: Host volume 将任务绑定到特定节点,影响调度灵活性无跨节点: 数据只存在于单个节点,不支持跨节点访问手动管理: 需要手动创建目录、管理权限和备份注意事项:
确保目录权限与容器用户匹配定期备份重要数据监控磁盘空间使用使用约束确保任务调度到正确的节点节点约束示例:
job "stateful-app" {
group "db" {
volume "data" {
type = "host"
source = "mysql_data"
}
# 确保任务总是调度到同一节点
constraint {
attribute = "${node.unique.name}"
value = "node-1"
}
task "mysql" {
# ... 任务配置
}
}
}
Docker Volumes 是由 Docker 驱动直接管理的卷,提供比 Host Volumes 更灵活的存储选项。
Docker Volumes 有三种主要类型:
Docker 自动管理的持久化卷,存储在 Docker 的数据目录中。
job "postgres-named-volume" {
datacenters = ["dc1"]
type = "service"
group "db" {
count = 1
task "postgres" {
driver = "docker"
config {
image = "postgres:14"
ports = ["db"]
# Docker 命名卷配置
volumes = [
"postgres-data:/var/lib/postgresql/data"
]
}
env {
POSTGRES_PASSWORD = "mysecretpassword"
POSTGRES_DB = "myapp"
}
resources {
cpu = 500
memory = 512
}
}
network {
port "db" {
to = 5432
}
}
}
}
特点:
Docker 自动创建和管理卷卷名称:
postgres-data数据持久化在
/var/lib/docker/volumes/postgres-data/_data任务重启后数据保留
直接挂载宿主机路径,类似 Host Volumes。
job "nginx-bind-mount" {
datacenters = ["dc1"]
group "web" {
task "nginx" {
driver = "docker"
config {
image = "nginx:latest"
ports = ["http"]
# 绑定挂载:宿主机路径:容器路径:选项
volumes = [
"/opt/nginx/html:/usr/share/nginx/html:ro",
"/opt/nginx/logs:/var/log/nginx:rw"
]
}
resources {
cpu = 200
memory = 128
}
}
network {
port "http" {
static = 8080
}
}
}
}
挂载选项:
:ro - 只读 (read-only)
:rw - 读写 (read-write, 默认)
:z - SELinux 私有标签
:Z - SELinux 共享标签
在内存中创建临时存储,任务停止后数据丢失。
job "cache-app" {
group "cache" {
task "redis" {
driver = "docker"
config {
image = "redis:alpine"
# tmpfs 挂载配置
mount {
type = "tmpfs"
target = "/data"
tmpfs_options {
size = 100000000 # 100MB
mode = 1777 # 权限
}
}
}
}
}
}
Docker 支持第三方卷驱动,提供高级存储功能。
job "local-driver-example" {
group "app" {
task "web" {
driver = "docker"
config {
image = "nginx:latest"
# 使用本地驱动创建卷
mount {
type = "volume"
source = "nginx-data"
target = "/usr/share/nginx/html"
volume_options {
driver_config {
name = "local"
options = {
type = "nfs"
o = "addr=192.168.1.100,rw"
device = ":/exports/nginx"
}
}
}
}
}
}
}
}
job "nfs-volume-app" {
datacenters = ["dc1"]
group "storage" {
task "app" {
driver = "docker"
config {
image = "ubuntu:22.04"
command = "tail"
args = ["-f", "/dev/null"]
mount {
type = "volume"
source = "nfs-share"
target = "/mnt/nfs"
volume_options {
driver_config {
name = "local"
options = {
type = "nfs"
o = "addr=nfs.example.com,nolock,soft,rw"
device = ":/exports/shared"
}
}
}
}
}
}
}
}
job "volume-lifecycle" {
group "app" {
task "setup" {
driver = "docker"
config {
image = "alpine:latest"
# Docker 会自动创建不存在的命名卷
volumes = [
"my-persistent-data:/data"
]
command = "sh"
args = [
"-c",
"echo 'Initial data' > /data/init.txt && sleep 3600"
]
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "main" {
driver = "docker"
config {
image = "alpine:latest"
# 使用相同的卷
volumes = [
"my-persistent-data:/data:ro"
]
command = "tail"
args = ["-f", "/data/init.txt"]
}
}
}
}
# 列出所有 Docker 卷
docker volume ls
# 查看卷详情
docker volume inspect postgres-data
# 手动创建卷
docker volume create --name myapp-data
# 删除未使用的卷
docker volume prune
# 删除特定卷(确保没有容器使用)
docker volume rm myapp-data
# 备份卷数据
docker run --rm
-v postgres-data:/source:ro
-v /backup:/backup
alpine tar czf /backup/postgres-backup.tar.gz -C /source .
# 恢复卷数据
docker run --rm
-v postgres-data:/target
-v /backup:/backup
alpine tar xzf /backup/postgres-backup.tar.gz -C /target
job "shared-volume-app" {
datacenters = ["dc1"]
group "services" {
count = 1
task "producer" {
driver = "docker"
config {
image = "alpine:latest"
volumes = [
"shared-data:/data"
]
command = "sh"
args = [
"-c",
"while true; do date >> /data/log.txt; sleep 5; done"
]
}
resources {
cpu = 100
memory = 64
}
}
task "consumer" {
driver = "docker"
config {
image = "alpine:latest"
volumes = [
"shared-data:/data:ro" # 只读访问
]
command = "sh"
args = [
"-c",
"tail -f /data/log.txt"
]
}
resources {
cpu = 100
memory = 64
}
}
}
}
job "advanced-volume" {
group "db" {
task "mysql" {
driver = "docker"
config {
image = "mysql:8.0"
mount {
type = "volume"
source = "mysql-data"
target = "/var/lib/mysql"
volume_options {
# 禁用卷的自动复制
no_copy = false
# 使用标签
labels = {
app = "mysql"
env = "production"
version = "8.0"
}
driver_config {
name = "local"
options = {
# 本地卷选项
type = "none"
o = "bind"
device = "/mnt/ssd/mysql"
}
}
}
}
}
env {
MYSQL_ROOT_PASSWORD = "secretpass"
}
}
}
}
job "backup-job" {
type = "batch"
periodic {
cron = "0 2 * * *" # 每天凌晨2点
prohibit_overlap = true
}
group "backup" {
task "mysql-backup" {
driver = "docker"
config {
image = "alpine:latest"
volumes = [
"mysql-data:/source:ro",
"/backup/mysql:/backup"
]
command = "sh"
args = [
"-c",
"tar czf /backup/mysql-$(date +%Y%m%d-%H%M%S).tar.gz -C /source ."
]
}
resources {
cpu = 200
memory = 256
}
}
}
}
| 特性 | Docker Volume | Host Volume |
|---|---|---|
| 配置位置 | Job 文件 | Client 配置文件 |
| 管理方式 | Docker 自动管理 | 手动管理 |
| 路径可见性 | 抽象(Docker 管理) | 明确的宿主机路径 |
| 驱动支持 | 支持多种驱动 | 仅本地文件系统 |
| 备份 | 需要 Docker 命令 | 直接访问文件系统 |
| 迁移性 | 较难(Docker 特定) | 容易(直接复制) |
| 性能 | 稍低(抽象层) | 原生文件系统性能 |
# 推荐:使用描述性名称和环境前缀
volumes = [
"prod-mysql-data:/var/lib/mysql",
"prod-nginx-logs:/var/log/nginx"
]
# 不推荐:模糊的名称
volumes = [
"data:/var/lib/mysql",
"logs:/var/log/nginx"
]
job "secure-volume" {
group "app" {
task "web" {
driver = "docker"
config {
image = "nginx:latest"
# 只读挂载配置文件
volumes = [
"nginx-config:/etc/nginx:ro"
]
# 读写挂载日志
mount {
type = "volume"
source = "nginx-logs"
target = "/var/log/nginx"
readonly = false
}
}
}
}
}
# 监控脚本:检查卷使用情况
#!/bin/bash
docker volume ls -q | while read vol; do
echo "Volume: $vol"
docker volume inspect $vol | jq '.[0].Mountpoint' | xargs du -sh
done
# 定期清理未使用的卷(谨慎使用)
docker volume prune -f --filter "label!=keep=true"
# 错误信息
Error: Permission denied
# 解决方案:检查容器用户和卷权限
docker volume inspect my-volume
# 调整权限
docker run --rm -v my-volume:/data alpine chown -R 1000:1000 /data
# 确保使用命名卷而非匿名卷
config {
# 错误:匿名卷(任务重启会创建新卷)
volumes = ["/var/lib/mysql"]
# 正确:命名卷
volumes = ["mysql-data:/var/lib/mysql"]
}
# 测试 NFS 连接
mount -t nfs nfs.example.com:/exports /mnt/test
# 检查 Docker 日志
docker logs <container-id>
# 验证 NFS 选项
showmount -e nfs.example.com
CSI (Container Storage Interface) 是 Nomad 支持的企业级存储解决方案,提供跨节点、高可用的持久化存储。
CSI 由以下组件构成:
┌─────────────────────────────────────────────┐
│ Nomad Scheduler │
│ (管理卷创建、挂载、调度) │
└──────────────┬──────────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ CSI Controller Plugin │
│ (在 Server 节点运行,处理卷的创建/删除) │
└─────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ CSI Node Plugin │
│ (在每个 Client 节点运行,处理卷的挂载) │
└─────────────────────────────────────────────┘
│
▼
┌─────────────────────────────────────────────┐
│ 存储后端 (AWS EBS, Ceph, NFS, 等) │
└─────────────────────────────────────────────┘
核心概念:
Controller Plugin: 负责卷的生命周期管理(创建、删除、快照)Node Plugin: 负责将卷挂载到节点上Volume: 持久化存储资源Topology: 定义卷的物理位置约束(如可用区)步骤 1: 创建 CSI 插件 Job
# ebs-csi-plugin.nomad.hcl
job "aws-ebs-csi-plugin" {
datacenters = ["dc1"]
type = "system" # 在所有节点运行
group "controller" {
# Controller 只需要在部分节点运行
count = 2
task "plugin" {
driver = "docker"
config {
image = "amazon/aws-ebs-csi-driver:v1.20.0"
args = [
"controller",
"--endpoint=unix://csi/csi.sock",
"--logtostderr",
"--v=5"
]
privileged = true
}
csi_plugin {
id = "aws-ebs"
type = "controller"
mount_dir = "/csi"
}
resources {
cpu = 500
memory = 256
}
# AWS 凭证配置
env {
AWS_ACCESS_KEY_ID = "YOUR_ACCESS_KEY"
AWS_SECRET_ACCESS_KEY = "YOUR_SECRET_KEY"
AWS_REGION = "us-east-1"
}
}
}
group "nodes" {
# Node 插件必须在所有 Client 节点运行
task "plugin" {
driver = "docker"
config {
image = "amazon/aws-ebs-csi-driver:v1.20.0"
args = [
"node",
"--endpoint=unix://csi/csi.sock",
"--logtostderr",
"--v=5"
]
privileged = true
}
csi_plugin {
id = "aws-ebs"
type = "node"
mount_dir = "/csi"
}
resources {
cpu = 500
memory = 256
}
}
}
}
步骤 2: 部署插件
# 运行 CSI 插件
nomad job run ebs-csi-plugin.nomad.hcl
# 验证插件状态
nomad plugin status aws-ebs
# 查看插件详情
nomad plugin status -verbose aws-ebs
| 插件 | 存储后端 | 适用场景 |
|---|---|---|
| aws-ebs | AWS Elastic Block Store | AWS 云环境 |
| aws-efs | AWS Elastic File System | AWS 共享文件系统 |
| ceph-csi | Ceph RBD/CephFS | 私有云/混合云 |
| nfs-csi | NFS 服务器 | 通用文件共享 |
| hostpath | 本地路径 | 开发/测试环境 |
| portworx | Portworx 存储 | 企业级容器存储 |
# volume.hcl
id = "mysql-volume-01"
name = "mysql-volume-01"
type = "csi"
plugin_id = "aws-ebs"
# 卷容量
capacity_min = "10GB"
capacity_max = "20GB"
# 访问模式
capability {
access_mode = "single-node-writer" # 单节点读写
attachment_mode = "file-system" # 文件系统模式
}
# 卷的物理位置约束
topology_request {
required {
topology {
segments {
"topology.ebs.csi.aws.com/zone" = "us-east-1a"
}
}
}
}
# 挂载选项
mount_options {
fs_type = "ext4"
mount_flags = ["noatime", "nodiratime"]
}
# 插件特定参数
parameters {
type = "gp3" # EBS 卷类型
iops = "3000"
throughput = "125"
encrypted = "true"
}
创建卷:
# 创建卷
nomad volume create volume.hcl
# 列出所有卷
nomad volume status
# 查看卷详情
nomad volume status mysql-volume-01
# 删除卷(必须先卸载)
nomad volume delete mysql-volume-01
| 访问模式 | 说明 | 使用场景 |
|---|---|---|
| single-node-reader-only | 单节点只读 | 配置文件分发 |
| single-node-writer | 单节点读写 | 数据库(MySQL, PostgreSQL) |
| multi-node-reader-only | 多节点只读 | 静态资源共享 |
| multi-node-single-writer | 多节点单写 | 共享日志收集 |
| multi-node-multi-writer | 多节点多写 | 分布式文件系统 |
| 附加模式 | 说明 |
|---|---|
| file-system | 以文件系统方式挂载(格式化为 ext4/xfs 等) |
| block-device | 以块设备方式挂载(原始块设备,由应用自行管理) |
job "mysql-with-csi" {
datacenters = ["dc1"]
type = "service"
group "db" {
count = 1
# 声明使用 CSI 卷
volume "mysql_data" {
type = "csi"
source = "mysql-volume-01" # 卷 ID
read_only = false
attachment_mode = "file-system"
access_mode = "single-node-writer"
}
task "mysql" {
driver = "docker"
config {
image = "mysql:8.0"
ports = ["db"]
}
# 挂载 CSI 卷
volume_mount {
volume = "mysql_data"
destination = "/var/lib/mysql"
read_only = false
}
env {
MYSQL_ROOT_PASSWORD = "password123"
MYSQL_DATABASE = "myapp"
}
resources {
cpu = 1000
memory = 1024
}
}
network {
port "db" {
to = 3306
}
}
# 健康检查
service {
name = "mysql"
port = "db"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
job "complex-app" {
datacenters = ["dc1"]
group "app" {
# 声明多个 CSI 卷
volume "app_data" {
type = "csi"
source = "app-data-volume"
read_only = false
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
volume "app_logs" {
type = "csi"
source = "app-logs-volume"
read_only = false
access_mode = "multi-node-single-writer"
attachment_mode = "file-system"
}
volume "shared_config" {
type = "csi"
source = "config-volume"
read_only = true
access_mode = "multi-node-reader-only"
attachment_mode = "file-system"
}
task "web" {
driver = "docker"
config {
image = "myapp:latest"
}
# 挂载应用数据(读写)
volume_mount {
volume = "app_data"
destination = "/var/lib/app"
read_only = false
}
# 挂载日志(读写)
volume_mount {
volume = "app_logs"
destination = "/var/log/app"
read_only = false
}
# 挂载配置(只读)
volume_mount {
volume = "shared_config"
destination = "/etc/app/config"
read_only = true
}
}
}
}
# snapshot.hcl - 创建快照
id = "mysql-snapshot-01"
source_volume_id = "mysql-volume-01"
plugin_id = "aws-ebs"
name = "mysql-backup-2024-01-15"
parameters {
description = "Daily backup"
}
# 创建快照
nomad volume snapshot create snapshot.hcl
# 从快照恢复卷
cat > restore-volume.hcl <<EOF
id = "mysql-volume-02"
name = "mysql-restored"
type = "csi"
plugin_id = "aws-ebs"
snapshot_id = "mysql-snapshot-01"
capacity_min = "10GB"
capacity_max = "20GB"
capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
EOF
nomad volume create restore-volume.hcl
# 注意:并非所有 CSI 插件都支持在线扩容
job "db-with-expansion" {
group "db" {
volume "data" {
type = "csi"
source = "mysql-volume-01"
# 某些插件支持动态扩容
per_alloc = false
}
task "mysql" {
# ... 任务配置
}
}
}
# 手动扩容卷(需要插件支持)
# 1. 更新卷规格
cat > expand-volume.hcl <<EOF
id = "mysql-volume-01"
plugin_id = "aws-ebs"
capacity_min = "20GB"
capacity_max = "30GB"
EOF
# 2. 某些插件需要先卸载卷
nomad job stop mysql-with-csi
# 3. 扩容(如果插件支持)
# AWS EBS 支持在线扩容
aws ec2 modify-volume --volume-id vol-xxx --size 30
# 4. 重新挂载
nomad job run mysql-with-csi.nomad
job "topology-aware-app" {
datacenters = ["dc1"]
group "app" {
volume "data" {
type = "csi"
source = "regional-volume"
read_only = false
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
# Nomad 会自动将任务调度到卷所在的可用区
constraint {
attribute = "${meta.availability_zone}"
operator = "="
value = "us-east-1a"
}
task "app" {
driver = "docker"
config {
image = "myapp:latest"
}
volume_mount {
volume = "data"
destination = "/data"
}
}
}
}
job "ceph-csi-plugin" {
datacenters = ["dc1"]
type = "system"
group "controller" {
task "ceph-controller" {
driver = "docker"
config {
image = "quay.io/cephcsi/cephcsi:v3.9.0"
args = [
"--type=rbd",
"--controllerserver=true",
"--endpoint=unix://csi/csi.sock",
"--nodeid=${node.unique.name}",
"--instanceid=${node.unique.name}-controller"
]
privileged = true
}
csi_plugin {
id = "ceph-rbd"
type = "controller"
mount_dir = "/csi"
}
template {
data = <<EOH
[{
"clusterID": "cluster-id",
"monitors": [
"mon1.example.com:6789",
"mon2.example.com:6789",
"mon3.example.com:6789"
]
}]
EOH
destination = "local/ceph-config.json"
}
resources {
cpu = 500
memory = 256
}
}
}
group "nodes" {
task "ceph-node" {
driver = "docker"
config {
image = "quay.io/cephcsi/cephcsi:v3.9.0"
args = [
"--type=rbd",
"--nodeserver=true",
"--endpoint=unix://csi/csi.sock",
"--nodeid=${node.unique.name}"
]
privileged = true
# 挂载主机路径以访问设备
volumes = [
"/dev:/dev",
"/sys:/sys",
"/lib/modules:/lib/modules:ro"
]
}
csi_plugin {
id = "ceph-rbd"
type = "node"
mount_dir = "/csi"
# 健康检查
health_timeout = "90s"
}
resources {
cpu = 500
memory = 256
}
}
}
}
job "nfs-csi-plugin" {
datacenters = ["dc1"]
type = "system"
group "controller" {
task "nfs-controller" {
driver = "docker"
config {
image = "registry.k8s.io/sig-storage/nfsplugin:v4.3.0"
args = [
"-v=5",
"--nodeid=${node.unique.name}",
"--endpoint=unix://csi/csi.sock"
]
}
csi_plugin {
id = "nfs"
type = "controller"
mount_dir = "/csi"
}
resources {
cpu = 200
memory = 128
}
}
}
group "nodes" {
task "nfs-node" {
driver = "docker"
config {
image = "registry.k8s.io/sig-storage/nfsplugin:v4.3.0"
args = [
"-v=5",
"--nodeid=${node.unique.name}",
"--endpoint=unix://csi/csi.sock"
]
privileged = true
volumes = [
"/:/host:rshared"
]
}
csi_plugin {
id = "nfs"
type = "node"
mount_dir = "/csi"
}
resources {
cpu = 200
memory = 128
}
}
}
}
使用 NFS CSI 卷:
# nfs-volume.hcl
id = "nfs-shared-01"
name = "nfs-shared"
type = "csi"
plugin_id = "nfs"
capability {
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
}
mount_options {
fs_type = "nfs"
mount_flags = ["nolock", "vers=4.1"]
}
parameters {
server = "nfs.example.com"
share = "/exports/shared"
}
# 查看所有插件
nomad plugin status
# 查看特定插件
nomad plugin status aws-ebs
# 详细信息(包括 Controller/Node 状态)
nomad plugin status -verbose aws-ebs
# 查看插件健康状态
nomad node status -self | grep -A 20 "CSI"
问题 1: 卷无法挂载
# 检查插件状态
nomad plugin status <plugin-id>
# 查看 allocation 日志
nomad alloc logs <alloc-id>
# 检查 CSI Node 插件日志
nomad alloc logs -task plugin <node-plugin-alloc-id>
# 检查节点 CSI 健康状态
nomad node status <node-id>
问题 2: 卷已被占用
# 查看卷状态
nomad volume status mysql-volume-01
# 强制分离卷(谨慎使用)
nomad volume detach mysql-volume-01 <node-id>
# 等待卷释放后重试
nomad job run mysql-job.nomad
问题 3: 插件启动失败
# 检查插件配置是否正确
job "debug-plugin" {
group "controller" {
task "plugin" {
# 添加调试日志
config {
args = [
"--v=5", # 增加日志级别
"--logtostderr"
]
}
# 增加资源限制
resources {
cpu = 1000
memory = 512
}
}
}
}
job "ha-csi-plugin" {
group "controller" {
count = 3 # 多个 controller 副本
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "30s"
healthy_deadline = "5m"
}
# 分散到不同节点
spread {
attribute = "${node.unique.name}"
weight = 100
}
task "controller" {
# ... 插件配置
}
}
}
# 限制卷大小和数量
volume "limited_volume" {
type = "csi"
plugin_id = "aws-ebs"
capacity_min = "10GB"
capacity_max = "100GB" # 最大 100GB
parameters {
type = "gp3"
iops = "3000" # 限制 IOPS
}
}
job "volume-backup" {
type = "batch"
periodic {
cron = "0 3 * * *" # 每天凌晨3点
prohibit_overlap = true
}
group "backup" {
volume "source" {
type = "csi"
source = "mysql-volume-01"
read_only = true
access_mode = "single-node-reader-only"
attachment_mode = "file-system"
}
task "create-snapshot" {
driver = "docker"
config {
image = "amazon/aws-cli:latest"
command = "/bin/bash"
args = [
"-c",
"aws ec2 create-snapshot --volume-id ${VOLUME_ID} --description 'Daily backup'"
]
}
env {
VOLUME_ID = "vol-xxxxx"
}
}
}
}
是否需要跨节点访问?
├─ 是 → 使用 CSI Volumes
│ ├─ 云环境 (AWS/Azure/GCP) → 云厂商 CSI (EBS/Azure Disk/GCE PD)
│ ├─ 私有云/混合云 → Ceph CSI 或 Portworx
│ └─ 简单文件共享 → NFS CSI
│
└─ 否 → 节点本地存储
├─ 使用 Docker 驱动 → Docker Volumes
│ ├─ 简单持久化 → 命名卷
│ ├─ 需要特定路径 → 绑定挂载
│ └─ 临时数据 → tmpfs
│
└─ 使用其他驱动 (exec/raw_exec) → Host Volumes
| 应用场景 | 推荐方案 | 理由 |
|---|---|---|
| 单节点数据库 | Host Volume 或 Docker Volume | 性能最佳,配置简单 |
| 分布式数据库 | CSI Volume (单节点写) | 支持故障转移 |
| 共享文件系统 | CSI Volume (多节点读写) | NFS/CephFS 支持 |
| 日志收集 | Docker Volume | 轻量级,易管理 |
| 缓存数据 | tmpfs | 内存速度,无需持久化 |
| 静态资源 | Host Volume (只读) | 性能好,内容不变 |
| 配置文件 | Host Volume 或 Template | 灵活配置,版本控制 |
# CSI 卷文件系统优化
volume "optimized_volume" {
type = "csi"
plugin_id = "aws-ebs"
mount_options {
# XFS:大文件、高并发
fs_type = "xfs"
mount_flags = [
"noatime", # 不更新访问时间,提升性能
"nodiratime", # 不更新目录访问时间
"largeio", # 优化大文件 I/O
"inode64" # 64 位 inode,支持更多文件
]
}
# EXT4:通用场景
# fs_type = "ext4"
# mount_flags = ["noatime", "nodiratime", "data=ordered"]
parameters {
type = "io2" # 高性能 SSD
iops = "10000" # 高 IOPS
throughput = "500" # 高吞吐量
}
}
job "database-optimized" {
group "db" {
volume "data" {
type = "csi"
source = "high-perf-volume"
read_only = false
}
task "postgres" {
driver = "docker"
config {
image = "postgres:14"
# 使用 ulimit 调整文件描述符
ulimit {
nofile = "65536"
}
# I/O 权重(需要 cgroup v1)
# blkio_weight = 1000
}
volume_mount {
volume = "data"
destination = "/var/lib/postgresql/data"
}
# PostgreSQL 性能参数
env {
POSTGRES_INITDB_ARGS = "-E UTF8 --data-checksums"
}
# 调整 PostgreSQL 配置
template {
data = <<EOH
# 内存配置
shared_buffers = 4GB
effective_cache_size = 12GB
work_mem = 64MB
maintenance_work_mem = 1GB
# I/O 配置
wal_buffers = 16MB
checkpoint_completion_target = 0.9
random_page_cost = 1.1 # SSD 优化
# 并发配置
max_connections = 200
max_worker_processes = 8
EOH
destination = "local/postgresql.conf"
}
resources {
cpu = 4000
memory = 8192
}
}
}
}
# NFS 性能调优
volume "nfs_optimized" {
type = "csi"
plugin_id = "nfs"
mount_options {
fs_type = "nfs"
mount_flags = [
"vers=4.2", # 使用最新 NFS 协议
"rsize=1048576", # 读缓冲 1MB
"wsize=1048576", # 写缓冲 1MB
"hard", # 硬挂载,确保数据一致性
"timeo=600", # 超时 60 秒
"retrans=2", # 重传 2 次
"async", # 异步写入(注意数据安全)
"noatime", # 不更新访问时间
"nodiratime" # 不更新目录访问时间
]
}
parameters {
server = "nfs.example.com"
share = "/exports/high-perf"
}
}
job "secure-database" {
group "db" {
# 使用加密卷
volume "encrypted_data" {
type = "csi"
source = "encrypted-volume"
read_only = false
}
task "mysql" {
driver = "docker"
config {
image = "mysql:8.0"
}
volume_mount {
volume = "encrypted_data"
destination = "/var/lib/mysql"
}
# 使用 Vault 管理密码
template {
data = <<EOH
MYSQL_ROOT_PASSWORD={{ with secret "database/mysql/root" }}{{ .Data.data.password }}{{ end }}
EOH
destination = "secrets/mysql.env"
env = true
}
vault {
policies = ["database-read"]
}
}
}
}
创建加密卷:
# encrypted-volume.hcl
id = "encrypted-volume"
type = "csi"
plugin_id = "aws-ebs"
capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
parameters {
type = "gp3"
encrypted = "true" # 启用 EBS 加密
kmsKeyId = "arn:aws:kms:..." # 使用 KMS 密钥
}
job "restricted-app" {
group "app" {
volume "readonly_config" {
type = "host"
source = "app_config"
read_only = true # 只读挂载配置
}
volume "writable_data" {
type = "host"
source = "app_data"
read_only = false
}
task "web" {
driver = "docker"
config {
image = "myapp:latest"
# 以非 root 用户运行
user = "1000:1000"
}
# 配置只读
volume_mount {
volume = "readonly_config"
destination = "/etc/app"
read_only = true
}
# 数据可写
volume_mount {
volume = "writable_data"
destination = "/var/lib/app"
read_only = false
}
}
}
}
job "audit-logging" {
group "monitor" {
volume "audit_logs" {
type = "host"
source = "audit_logs"
}
task "log-collector" {
driver = "docker"
config {
image = "fluentd:latest"
}
volume_mount {
volume = "audit_logs"
destination = "/var/log/audit"
}
# Fluentd 配置
template {
data = <<EOH
<source>
@type tail
path /var/log/audit/*.log
pos_file /var/log/audit.pos
tag audit.*
<parse>
@type json
time_key timestamp
time_format %Y-%m-%dT%H:%M:%S.%L%z
</parse>
</source>
<match audit.**>
@type elasticsearch
host elasticsearch.service.consul
port 9200
index_name audit-logs
type_name audit
</match>
EOH
destination = "local/fluent.conf"
}
}
}
}
job "backup-strategy" {
type = "batch"
periodic {
cron = "0 2 * * *" # 每天 2 AM
prohibit_overlap = true
time_zone = "Asia/Shanghai"
}
group "backup" {
volume "source_data" {
type = "csi"
source = "production-db-volume"
read_only = true
access_mode = "single-node-reader-only"
attachment_mode = "file-system"
}
volume "backup_destination" {
type = "host"
source = "backup_storage"
}
task "create_backup" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = [
"-c",
<<EOH
#!/bin/sh
set -e
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/backup/daily"
SOURCE_DIR="/data"
mkdir -p "$BACKUP_DIR"
# 创建完整备份
tar czf "$BACKUP_DIR/backup_$TIMESTAMP.tar.gz" -C "$SOURCE_DIR" .
# 保留最近 7 天的备份
find "$BACKUP_DIR" -name "backup_*.tar.gz" -mtime +7 -delete
# 验证备份
tar tzf "$BACKUP_DIR/backup_$TIMESTAMP.tar.gz" > /dev/null
echo "Backup completed: backup_$TIMESTAMP.tar.gz"
EOH
]
}
volume_mount {
volume = "source_data"
destination = "/data"
read_only = true
}
volume_mount {
volume = "backup_destination"
destination = "/backup"
}
resources {
cpu = 500
memory = 512
}
}
# 备份通知
task "notify" {
driver = "docker"
lifecycle {
hook = "poststop"
}
config {
image = "curlimages/curl:latest"
command = "curl"
args = [
"-X", "POST",
"-H", "Content-Type: application/json",
"-d", "{"text":"Database backup completed"}",
"https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
]
}
resources {
cpu = 100
memory = 64
}
}
}
}
#!/bin/bash
# disaster-recovery.sh
set -e
BACKUP_FILE=$1
VOLUME_ID="mysql-volume-01"
PLUGIN_ID="aws-ebs"
if [ -z "$BACKUP_FILE" ]; then
echo "Usage: $0 <backup-file>"
exit 1
fi
echo "=== 灾难恢复流程 ==="
# 1. 停止使用该卷的所有任务
echo "1. 停止应用..."
nomad job stop production-db
# 2. 等待卷释放
echo "2. 等待卷释放..."
while nomad volume status "$VOLUME_ID" | grep -q "In Use"; do
sleep 5
done
# 3. 分离卷
echo "3. 分离卷..."
nomad volume detach "$VOLUME_ID"
# 4. 创建临时恢复任务
echo "4. 恢复数据..."
cat > restore-job.nomad <<EOF
job "restore-data" {
type = "batch"
group "restore" {
volume "target" {
type = "csi"
source = "$VOLUME_ID"
read_only = false
}
volume "backup" {
type = "host"
source = "backup_storage"
}
task "restore" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = [
"-c",
"rm -rf /target/* && tar xzf /backup/$BACKUP_FILE -C /target"
]
}
volume_mount {
volume = "target"
destination = "/target"
}
volume_mount {
volume = "backup"
destination = "/backup"
read_only = true
}
}
}
}
EOF
nomad job run restore-job.nomad
# 5. 等待恢复完成
echo "5. 等待恢复完成..."
nomad job status restore-data | grep -q "Complete"
# 6. 重启应用
echo "6. 重启应用..."
nomad job run production-db.nomad
echo "=== 恢复完成 ==="
job "cross-region-replication" {
type = "batch"
periodic {
cron = "0 */6 * * *" # 每 6 小时
prohibit_overlap = true
}
group "replicate" {
volume "source" {
type = "csi"
source = "primary-volume"
read_only = true
}
task "sync-to-dr" {
driver = "docker"
config {
image = "rclone/rclone:latest"
command = "rclone"
args = [
"sync",
"/source",
"s3-dr:backup-bucket/volume-replica/",
"--config", "/local/rclone.conf"
]
}
volume_mount {
volume = "source"
destination = "/source"
read_only = true
}
template {
data = <<EOH
[s3-dr]
type = s3
provider = AWS
region = us-west-2
access_key_id = {{ env "AWS_ACCESS_KEY_ID" }}
secret_access_key = {{ env "AWS_SECRET_ACCESS_KEY" }}
EOH
destination = "local/rclone.conf"
}
env {
AWS_ACCESS_KEY_ID = "YOUR_KEY"
AWS_SECRET_ACCESS_KEY = "YOUR_SECRET"
}
}
}
}
job "volume-monitoring" {
type = "service"
group "monitor" {
volume "data" {
type = "host"
source = "app_data"
}
task "disk-exporter" {
driver = "docker"
config {
image = "prom/node-exporter:latest"
args = [
"--path.rootfs=/host",
"--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
]
volumes = [
"/:/host:ro,rslave"
]
}
resources {
cpu = 100
memory = 128
}
}
task "alert-manager" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = [
"-c",
<<EOH
while true; do
USAGE=$(df -h /data | awk 'NR==2 {print $5}' | sed 's/%//')
if [ "$USAGE" -gt 80 ]; then
echo "WARNING: Disk usage is ${USAGE}%"
# 发送告警
curl -X POST -H 'Content-Type: application/json'
-d "{"text":"Volume usage: ${USAGE}%"}"
https://hooks.slack.com/YOUR_WEBHOOK
fi
sleep 300
done
EOH
]
}
volume_mount {
volume = "data"
destination = "/data"
read_only = true
}
}
}
}
job "cleanup-old-data" {
type = "batch"
periodic {
cron = "0 3 * * 0" # 每周日 3 AM
prohibit_overlap = true
}
group "cleanup" {
volume "logs" {
type = "host"
source = "app_logs"
}
task "clean" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = [
"-c",
<<EOH
#!/bin/sh
set -e
echo "Starting cleanup..."
# 删除 30 天前的日志
find /logs -name "*.log" -mtime +30 -delete
# 压缩 7 天前的日志
find /logs -name "*.log" -mtime +7 -mtime -30 -exec gzip {} ;
# 清理临时文件
find /logs/tmp -type f -mtime +1 -delete
echo "Cleanup completed"
df -h /logs
EOH
]
}
volume_mount {
volume = "logs"
destination = "/logs"
}
resources {
cpu = 200
memory = 128
}
}
}
}
#!/bin/bash
# migrate-to-csi.sh
set -e
JOB_NAME="myapp"
HOST_VOLUME_PATH="/opt/data/myapp"
CSI_VOLUME_ID="myapp-csi-volume"
CSI_PLUGIN_ID="aws-ebs"
echo "=== 迁移到 CSI Volume ==="
# 1. 创建 CSI 卷
echo "1. 创建 CSI 卷..."
cat > volume.hcl <<EOF
id = "$CSI_VOLUME_ID"
type = "csi"
plugin_id = "$CSI_PLUGIN_ID"
capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
capacity_min = "50GB"
capacity_max = "100GB"
parameters {
type = "gp3"
}
EOF
nomad volume create volume.hcl
# 2. 停止应用
echo "2. 停止应用..."
nomad job stop "$JOB_NAME"
# 3. 数据迁移
echo "3. 迁移数据..."
cat > migrate-job.nomad <<EOF
job "data-migration" {
type = "batch"
group "migrate" {
volume "source" {
type = "host"
source = "myapp_data"
}
volume "target" {
type = "csi"
source = "$CSI_VOLUME_ID"
}
task "copy" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = ["-c", "cp -a /source/. /target/"]
}
volume_mount {
volume = "source"
destination = "/source"
read_only = true
}
volume_mount {
volume = "target"
destination = "/target"
}
}
}
}
EOF
nomad job run migrate-job.nomad
# 4. 等待迁移完成
echo "4. 等待迁移完成..."
sleep 10
# 5. 更新应用配置使用 CSI 卷
echo "5. 更新应用配置..."
sed -i 's/type.*=.*"host"/type = "csi"/' "$JOB_NAME.nomad"
sed -i "s/source.*=.*/source = "$CSI_VOLUME_ID"/" "$JOB_NAME.nomad"
# 6. 重启应用
echo "6. 重启应用..."
nomad job run "$JOB_NAME.nomad"
echo "=== 迁移完成 ==="
job "blue-green-deployment" {
datacenters = ["dc1"]
group "blue" {
count = 1
volume "data_v1" {
type = "csi"
source = "app-volume-v1"
}
task "app" {
driver = "docker"
config {
image = "myapp:v1"
}
volume_mount {
volume = "data_v1"
destination = "/data"
}
}
}
group "green" {
count = 0 # 初始不启动
volume "data_v2" {
type = "csi"
source = "app-volume-v2" # 新卷
}
task "app" {
driver = "docker"
config {
image = "myapp:v2"
}
volume_mount {
volume = "data_v2"
destination = "/data"
}
}
}
update {
max_parallel = 1
health_check = "checks"
min_healthy_time = "30s"
healthy_deadline = "5m"
canary = 1
}
}
A:
Host Volume: 在 Nomad Client 配置中定义,需要预先创建目录,适用于所有驱动Docker Volume: 在 Job 文件中定义,由 Docker 自动管理,仅适用于 Docker 驱动选择建议: 如果使用 Docker 驱动且不需要精确控制路径,优先使用 Docker VolumeA: 取决于访问模式:
single-node-writer: 卷会跟随任务迁移,但需要先卸载再挂载(可能有短暂停机)multi-node-multi-writer: 支持同时在多个节点访问(需要存储后端支持,如 NFS、CephFS)A:
# 1. 查看卷状态
nomad volume status <volume-id>
# 2. 查看哪个 allocation 正在使用
nomad volume status -verbose <volume-id>
# 3. 停止占用的 job
nomad job stop <job-name>
# 4. 如果任务已经不存在,手动分离
nomad volume detach <volume-id> <node-id>
A: 不会,只要:
Host Volume: 使用命名卷,路径在 Client 配置中定义Docker Volume: 使用命名卷(如
mydata:/path),而非匿名卷(
/path)CSI Volume: 卷独立于任务生命周期存在
A: 三种方式:
# 方式 1: 同一 Group 内的多个任务共享卷
group "app" {
volume "shared" {
type = "host"
source = "shared_data"
}
task "producer" {
volume_mount {
volume = "shared"
destination = "/data"
}
}
task "consumer" {
volume_mount {
volume = "shared"
destination = "/data"
read_only = true
}
}
}
# 方式 2: 不同 Group 使用相同的 Host/Docker Volume
# (必须在同一节点)
# 方式 3: 使用支持多节点的 CSI Volume
volume "shared_csi" {
type = "csi"
source = "nfs-volume"
access_mode = "multi-node-multi-writer"
}
A:
# 1. 检查插件 Job 状态
nomad job status <csi-plugin-job>
# 2. 查看 allocation 日志
nomad alloc logs <alloc-id>
# 3. 检查节点 CSI 状态
nomad node status <node-id> | grep CSI
# 4. 常见问题:
# - 权限不足:确保 privileged = true
# - 缺少依赖:检查节点是否安装必要的工具(如 nfs-common)
# - 网络问题:确保能访问存储后端
A:
# CSI 卷可以在创建时指定大小
volume "limited" {
type = "csi"
plugin_id = "aws-ebs"
capacity_min = "10GB"
capacity_max = "50GB" # 最大 50GB
}
# Host/Docker Volume 需要在操作系统层面限制
# 例如使用 LVM 或 quota
A:
# 方式 1: 使用 Prometheus Node Exporter
task "monitor" {
driver = "docker"
config {
image = "prom/node-exporter:latest"
volumes = ["/:/host:ro,rslave"]
}
}
# 方式 2: 自定义监控脚本
task "disk-check" {
driver = "docker"
config {
image = "alpine:latest"
command = "/bin/sh"
args = ["-c", "while true; do df -h /data; iostat -x 5 1; sleep 60; done"]
}
}
A:
# 1. 停止所有使用该卷的 Job
nomad job stop <job-name>
# 2. 确认卷未被使用
nomad volume status <volume-id>
# 3. 备份数据(如果需要)
# 运行备份任务...
# 4. 删除卷
nomad volume delete <volume-id>
# 5. 对于 Docker Volume,还需要清理 Docker
docker volume rm <volume-name>
A: 部分支持:
CSI Volumes: 支持动态创建(通过
nomad volume create)Host Volumes: 不支持,必须预先在 Client 配置中定义Docker Volumes: 支持,Docker 会自动创建不存在的命名卷
# === Host Volumes ===
# 在 /etc/nomad.d/client.hcl 中配置
# 重启 Client: systemctl restart nomad
# === Docker Volumes ===
docker volume ls # 列出卷
docker volume inspect <volume-name> # 查看卷详情
docker volume rm <volume-name> # 删除卷
docker volume prune # 清理未使用的卷
# === CSI Volumes ===
nomad volume create <volume-spec.hcl> # 创建卷
nomad volume status # 列出所有卷
nomad volume status <volume-id> # 查看卷详情
nomad volume delete <volume-id> # 删除卷
nomad volume detach <volume-id> <node-id> # 强制分离卷
nomad plugin status # 列出所有插件
nomad plugin status <plugin-id> # 查看插件详情
nomad plugin status -verbose <plugin-id> # 详细信息
# === 任务调试 ===
nomad alloc logs <alloc-id> # 查看任务日志
nomad alloc exec <alloc-id> <task> sh # 进入容器
nomad node status <node-id> # 查看节点状态
# Client 配置
client {
host_volume "name" {
path = "/absolute/path"
read_only = false
}
}
# Job 配置
volume "name" {
type = "host"
source = "name"
read_only = false
}
volume_mount {
volume = "name"
destination = "/container/path"
read_only = false
}
# 简单命名卷
volumes = ["volume-name:/container/path"]
# 绑定挂载
volumes = ["/host/path:/container/path:ro"]
# mount 语法
mount {
type = "volume"
source = "volume-name"
target = "/container/path"
}
# 卷规格文件
id = "volume-id"
type = "csi"
plugin_id = "plugin-name"
capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
capacity_min = "10GB"
capacity_max = "100GB"
# Job 配置
volume "name" {
type = "csi"
source = "volume-id"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
本指南涵盖了 Nomad 中所有存储卷类型的详细使用方法:
Host Volumes - 适合简单场景,节点本地存储Docker Volumes - Docker 原生支持,易于管理CSI Volumes - 企业级方案,支持跨节点和高级特性