环境准备

  • Oracle Cloud、阿里云
  • Oracle Linux 8、Cent OS 7.9
  • Ampere ARM 处理器、X86 处理器
  • 2 cores, 12g memory, bandwidth 2gbps

k8s 基础环境设置

设置主机名 & hosts

sudo hostnamectl set-hostname master1
vim /etc/hosts

# 公网ip和主机名
公网ip master1
公网ip node1
公网ip node2

设置基础环境

  • 新建脚本
vim k8s-pre-install-centos.sh
  • 脚本内容
#!/bin/sh

function set_base(){

  # 1.关闭SELinux,这样做的目的是:为了让容器能读取主机文件系统。
  sudo setenforce 0
  sudo sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config

  # 2.设置系统时间
  timedatectl set-timezone Asia/Shanghai   # 设置系统时区为 中国/上海
  timedatectl set-local-rtc 0   # 将当前的UTC时间写入硬件时钟
  systemctl restart rsyslog   # 重启依赖于系统时间的服务
  systemctl restart crond

  # 3.关闭防火墙,PS:如果使用云服务器,还需要在云服务器的控制台中把防火墙关闭了或者允许所有端口。
  systemctl stop firewalld
  systemctl disable firewalld

  # 4.永久关闭swap分区交换,kubeadm规定,一定要关闭
  swapoff -a    # 暂时关闭SWAP分区
  swapoff -a && sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab    # 永久禁用SWAP分区

  # 5.iptables配置
  cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF

  cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

  # iptables生效参数
  sysctl --system

}

set_base
  • 执行脚本
chmod 777 k8s-pre-install-centos.sh && ./k8s-pre-install-centos.sh

设置虚拟网卡

阿里云

cat > /etc/sysconfig/network-scripts/ifcfg-eth0:1 <<EOF
BOOTPROTO=static
DEVICE=eth0:1
IPADDR=公网IP
PREFIX=32
TYPE=Ethernet
USERCTL=no
ONBOOT=yes
EOF

oracle cloud

cat > /etc/sysconfig/network-scripts/ifcfg-enp0s6:1 <<EOF
BOOTPROTO=static
DEVICE=enp0s6:1
IPADDR=公网IP
PREFIX=32
TYPE=Ethernet
USERCTL=no
ONBOOT=yes
EOF

重启网络服务

systemctl restart network

检查虚拟网卡是否已添加上

ifconfig

Docker 安装

一、阿里云

  • 一键安装
curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun
  • 设置开机启动
systemctl start docker && systemctl enable docker
配置 docker
  • 将 driver 由 cgroupfs 改为 syetmd
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": ["https://0sfv2fhl.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
}
}
EOF
sudo systemctl daemon-reload
sudo systemctl restart docker
配置 containerd
  • 生成默认配置文件
containerd config default > /etc/containerd/config.toml
  • 将 sandbox_image 镜像源设置为阿里云 google_containers 镜像源
vim /etc/containerd/config.toml
45  [plugins."io.containerd.grpc.v1.cri"]
61     sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.6"
  • 配置镜像加速地址
#配置containerd镜像加速地址
vim /etc/containerd/config.toml
217  [plugins."io.containerd.tracing.processor.v1.otlp"]
218    endpoint = "https://0sfv2fhl.mirror.aliyuncs.com"
219    insecure = false
220    protocol = ""
#使用的是镜像加速器服务,阿里云镜像站开通
  • 配置 cgroup 驱动程序 systemd
vim /etc/containerd/config.toml
114 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
125   SystemdCgroup = true
  • 应用所有更改后,重新启动 containerd
systemctl restart containerd && systemctl enable containerd

netstat -anput | grep containerd

二、oracle cloud

  • 一键安装
sudo yum-config-manager \
    --add-repo \
    https://download.docker.com/linux/centos/docker-ce.repo

docker & containerd 安装

sudo yum install -y -q docker-ce docker-ce-cli containerd.io docker-compose-plugin docker-ce-rootless-extras docker-buildx-plugin
  • 设置开机启动
systemctl start docker && systemctl enable docker
配置 docker
  • 将 driver 由 cgroupfs 改为 syetmd
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": ["https://0sfv2fhl.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
}
}
EOF
sudo systemctl daemon-reload
sudo systemctl restart docker
配置 containerd
  • 生成默认配置文件
containerd config default > /etc/containerd/config.toml
  • 配置 cgroup 驱动程序 systemd
vim /etc/containerd/config.toml
114 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
125   SystemdCgroup = true
  • 应用所有更改后, 重新启动 containerd
systemctl restart containerd && systemctl enable containerd

netstat -anput | grep containerd

安装 kubelet, kubectl, kubeadm

阿里云

添加阿里云 k8s repo

cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enable=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

oracle cloud

添加官方 k8s repo

cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-\$basearch
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kubelet kubeadm kubectl
EOF
sudo yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes

修改 kubelet 启动参数

每台主机都要添加并指定对应的公网 IP,然后才能使用公网 IP 进行集群间通信

vim /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf

在 KUBELET_KUBECONFIG_ARGS 后面追加 --node-ip=<public_ip>

ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS --node-ip=xxx.xxx.xxx.xxx

修改之后执行 daemon-reload 让修改生效

systemctl daemon-reload

启动 kubelet / 重启 kubelet

systemctl enable kubelet && systemctl start kubelet

kubeadm 初始化集群

阿里云

kubeadm init --kubernetes-version=1.28.5 \
  --apiserver-advertise-address=公网ip \
  --image-repository=registry.aliyuncs.com/google_containers \
  --pod-network-cidr=10.244.0.0/16 \
  --v=5

国内节点拉镜像及打 tag

crictl pull registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2
ctr -n k8s.io i tag registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2 registry.k8s.io/kube-proxy:v1.28.2

oracle cloud

kubeadm init --kubernetes-version=1.28.5 \
  --apiserver-advertise-address=公网ip \
  --pod-network-cidr=10.244.0.0/16 \
  --v=5

更新用户配置文件

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

集群健康检查

# 检查集群状态
kubectl cluster-info

# 健康检查
curl -k https://localhost:6443/healthz

配置默认的 endpoints

crictl config runtime-endpoint unix:///run/containerd/containerd.sock
crictl config image-endpoint unix:///run/containerd/containerd.sock

修改 kube-apiserver 配置

kube-apiserver 添加 --bind-address=0.0.0.0,确认 --advertise-addres=<公网 IP>

vim /etc/kubernetes/manifests/kube-apiserver.yaml
apiVersion: v1
kind: Pod
metadata:
  annotations:
    kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 10.0.20.8:6443
  creationTimestamp: null
  labels:
    component: kube-apiserver
    tier: control-plane
  name: kube-apiserver
  namespace: kube-system
spec:
  containers:
    - command:
        - kube-apiserver
        - --advertise-address=xx.xx.xx.xx # 修改为公网IP
        - --bind-address=0.0.0.0 # 新增参数
        - --allow-privileged=true
        - --authorization-mode=Node,RBAC
        - --client-ca-file=/etc/kubernetes/pki/ca.crt
        - --enable-admission-plugins=NodeRestriction
        ...

flannel 网络部署

wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

kubectl apply -f kube-flannel.yml

稍等片刻, 便大功告成! 😄

NAME     STATUS   ROLES           AGE     VERSION   INTERNAL-IP     EXTERNAL-IP   OS-IMAGE                KERNEL-VERSION                CONTAINER-RUNTIME
master   Ready    control-plane   4m27s   v1.24.2   172.18.54.227   <none>        CentOS Linux 7 (Core)   3.10.0-1160.66.1.el7.x86_64   containerd://1.6.6
  • 遇到“open /run/flannel/subnet.env: no such file or directory“解决办法
    新建/run/flannel/subnet.env文件, 写入内容
    FLANNEL_NETWORK=10.244.0.0/16
    FLANNEL_SUBNET=10.244.0.1/24
    FLANNEL_MTU=1450
    FLANNEL_IPMASQ=true
  • 遇到集群节点服务不互通的解决办法
    查看节点public_ip是不是绑定内网 IP 了
    kubectl describe nodes node1

    修改public_ip为公网 IP
    # 查找指定节点flannel pod名称
    kubectl -n kube-flannel get pods --field-selector spec.nodeName=node1 | grep flannel
    
    # 修改内网IP为外网IP
    kubectl annotate node node1 flannel.alpha.coreos.com/public-ip-overwrite=43.129.71.69 --overwrite
    kubectl -n kube-flannel get pod kube-flannel-ds-r7kps -o yaml | kubectl replace --force -f -
    
    # 查看是否更改成功
    kubectl describe nodes node1

设置 Master 节点可调度

kubectl taint nodes k8s-master node-role.kubernetes.io/control-plane:NoSchedule-
kubectl taint nodes --all node-role.kubernetes.io/master-

helm 安装

一键安装

curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash

使命令生效

vim ~/.bashrc

追加到末尾

export PATH=$PATH:/usr/local/bin

helm 安装 k8tz

统一设定 pod 时区为 Asia/Shanghai

  • 克隆修改好的 k8tz
git clone [email protected]:ddong8/k8tz.git && cd k8tz
  • 安装 k8tz
helm install k8tz ./

helm 安装 ingress nginx controller

  • 克隆修改好的 ingress-nginx
git clone [email protected]:ddong8/ingress-nginx.git
  • 给节点打上刚刚设置的标签 ingress=true
kubectl label node master1 ingress=true
  • 创建命名空间
kubectl create ns ingress-nginx
  • 安装
helm install ingress-nginx -n ingress-nginx .
  • 查看 ingress-nginx
kubectl get all -n ingress-nginx
kubectl get pods -n ingress-nginx -o wide

安装 cert-manager

自动签发证书


git clone [email protected]:ddong8/cert-manager.git && cd cert-manager

helm install cert-manager ./ --namespace cert-manager --create-namespace

还需要创建 ClusterIssuer vim Secret-cluster-issuer.yaml

apiVersion: v1
kind: Secret
metadata:
  name: cloudflare-api-token-secret
  namespace: cert-manager # 这里配置为安装cert-manager资源的命名空间
type: Opaque
stringData:
  api-token: 'xxxxxxxxxxxxxxx' # cloudflare token 这里的值为[配置API Token]一节中创建的API Token值

---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: cloudflare-acme-cluster-issuer
spec:
  acme:
    email: '[email protected]'
    # 配置证书目录
    server: https://acme-v02.api.letsencrypt.org/directory
    privateKeySecretRef:
      name: acme-issuer-account-key
    solvers:
    - dns01:
        cloudflare:
          apiTokenSecretRef:
            name: cloudflare-api-token-secret # 引用当前文档中创建的Secret名称
            key: api-token

提交到集群

kubectl apply -f Secret-cluster-issuer.yaml

安装 nfs

  • 安装 NFS 软件包
sudo yum install nfs-utils
  • 创建共享目录
mkdir -p /data/nfs
chmod 777 -R /data/nfs
  • 配置共享
vim /etc/exports

添加以下行

/data/nfs *(rw,sync,no_root_squash)
  • 启动和配置 NFS 服务
sudo systemctl start nfs-server
sudo systemctl enable nfs-server
  • 验证 NFS 配置
showmount -e localhost
  • 挂载 NFS 共享(在客户端)
sudo mkdir -p /data/nfs
chmod 777 -R /data/nfs
sudo mount server_ip:/data/nfs /data/nfs

helm 安装 nfs 自动分配插件

  • 添加 helm repo
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm repo update
  • 安装
kubectl create ns nfs

helm install -n nfs nfs-subdir-external-provisioner-master1 nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
    --set nfs.server=x.x.x.x \
    --set nfs.path=/data/nfs-master1 \
    --set storageClass.name=nfs-master1-client


helm install -n nfs nfs-subdir-external-provisioner-node1 nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
    --set nfs.server=x.x.x.x \
    --set nfs.path=/data/nfs-node1 \
    --set storageClass.name=nfs-node1-client


helm install -n nfs nfs-subdir-external-provisioner-node2 nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
    --set nfs.server=x.x.x.x \
    --set nfs.path=/data/nfs-node2 \
    --set storageClass.name=nfs-node2-client
  • 即可使用 Storage Classes 自动创建 pv pvc

kubernetes-dashboard 安装

  • 克隆修改好的 kubernetes-dashboard
git clone [email protected]:ddong8/kubernetes-dashboard.git
  • 创建命名空间
kubectl create ns kubernetes-dashboard
  • 创建 tls
sh tls.sh
  • helm 安装 kubernetes-dashboard
helm install kubernetes-dashboard ./ -n kubernetes-dashboard

创建一个服务帐户并使用以下 yaml 文件分配所需的权限

vim k8s-dashboard-account.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin-user
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
  - kind: ServiceAccount
    name: admin-user
    namespace: kube-system
kubectl create -f k8s-dashboard-account.yaml

创建 admin-user token

kubectl -n kube-system create token admin-user --duration=31536000s   # token有效期365天

code-server 安装

  • 克隆修改好的 code-server
git clone [email protected]:ddong8/code-server.git
  • 创建 tls
sh tls.sh
  • helm 安装 code-server
helm install code-server ./

实用命令

1.为了提高跨云集群的稳定性,可以适当提高节点上报状态的间隔时间和重试次数

  • 将 node2 节点上报状态时长设置为每 30s(默认值为 10s)上报一次
kubectl annotate node node2 node-status-update-frequency=30s
  • 将 node2 状态上报失败重试次数设置为 20 次(默认为 5 次)
kubectl annotate node node2 node-status-update-retry=20
  • 更改 apiserver 参数配置

在 master 节点编辑 apiserver 配置文件 /etc/kubernetes/manifests/kube-apiserver.yaml 添加 --http2-max-streams-per-connection=-1

apiVersion: v1
kind: Pod
metadata:
  name: kube-apiserver
spec:
  containers:
  - name: kube-apiserver
    ...
    command:
    - kube-apiserver
    - ...
    - --http2-max-streams-per-connection=-1

设置心跳超时和间隔参数, 将periodSeconds设置为 30s, timeoutSeconds 设置为 60s

apiVersion: v1
kind: Pod
metadata:
  labels:
    component: kube-apiserver
    tier: control-plane
  name: kube-apiserver
  namespace: kube-system
spec:
  containers:
    - command:
        - kube-apiserver
        - --bind-address=0.0.0.0
        - --allow-privileged=true
        - --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
        - --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
        - --http2-max-streams-per-connection=-1
      image: registry.k8s.io/kube-apiserver:v1.28.2
      imagePullPolicy: IfNotPresent
      livenessProbe:
        failureThreshold: 8
        httpGet:
          host: xx.xx.xx.xx
          path: /livez
          port: 6443
          scheme: HTTPS
        initialDelaySeconds: 10
        periodSeconds: 30
        timeoutSeconds: 60
      name: kube-apiserver
      readinessProbe:
        failureThreshold: 3
        httpGet:
          host: xx.xx.xx.xx
          path: /readyz
          port: 6443
          scheme: HTTPS
        periodSeconds: 30
        timeoutSeconds: 60
      resources:
        requests:
          cpu: 250m
      startupProbe:
        failureThreshold: 24
        httpGet:
          host: xx.xx.xx.xx
          path: /livez
          port: 6443
          scheme: HTTPS
        initialDelaySeconds: 10
        periodSeconds: 30
        timeoutSeconds: 60