Eli's Blog

1. 安装准备

1.1 设置主机名

1
2
3
4
5
6
# master
hostnamectl set-hostname k8s-master

# node
hostnamectl set-hostname k8s-node01
hostnamectl set-hostname k8s-node02

1.2 hostname相互解析

1
2
3
4
vi /etc/hosts
192.168.31.40 k8s-master
192.168.31.41 k8s-node01
192.168.31.42 k8s-node02

1.3 关闭虚拟内存 swap

1
swapoff -a && sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab

1.4 调整内核参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
net.ipv4.tcp_tw_recycle=0
vm.swappiness=0 # 禁止使用 swap 空间,只有当系统 OOM 时才允许使用它
vm.overcommit_memory=1 # 不检查物理内存是否够用
fs.inotify.max_user_instances=8192 # 开启 OOM
vm.panic_on_oom=0
fs.inotify.max_user_watches=1048576
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
net.netfilter.nf_conntrack_max=2310720
EOF

sysctl -p /etc/sysctl.d/kubernetes.conf

# 问题 sysctl: cannot stat /proc/sys/net/bridge/bridge-nf-call-iptables: No such file or directory
modprobe br_netfilter

1.5 升级内核

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
uname -r
3.10.0-1127.el7.x86_64

# 内核repo
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm

# 安装新内核
yum --enablerepo=elrepo-kernel install -y kernel-lt

# 检查启动menu是否已加入新内核版本
cat /boot/grub2/grub.cfg | grep elrepo | grep menuentry
menuentry 'CentOS Linux (4.4.236-1.el7.elrepo.x86_64) 7 (Core)' --class centos --class gnu-linux --class gnu --class os --unrestricted $menuentry_id_option 'gnulinux-3.10.0-1127.el7.x86_64-advanced-3ec6d414-2b79-482d-9643-b7baeb42cb3d' {

# 开机从新内核启动
grub2-set-default 'CentOS Linux (4.4.236-1.el7.elrepo.x86_64) 7 (Core)'

# 重启系统
reboot

1.6 开启 ipvs (kube-proxy需要)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
modprobe br_netfilter

cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules

lsmod | grep -e ip_vs -e nf_conntrack_ipv4

2. Kubeadm 部署安装

2.1 安装kubeadm

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cat > /etc/yum.repos.d/kubernetes.repo <<EOF
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 查询版本
yum list kubelet --showduplicates

yum install -y kubeadm-1.18.6 kubelet-1.18.6 kubectl-1.18.6

systemctl enable kubelet
  • kubelet: 运行在集群的所有节点上,用于启动Pod和容器对象的工具
  • kubeadm: 初始化集群,启动集群的命令工具
  • kubectl: 和集群通信的命令行工具。可以部署和管理应用,查看各种资源,创建、删除和更新各种组建

2.2 下载k8s镜像

默认镜像放google服务器上,国内使用aliyun服务器

1
2
3
4
5
6
7
8
9
10
11
12
$ vi load_kube_images.sh
#!/bin/bash
url=registry.aliyuncs.com/google_containers
version=v1.18.6
images=(`kubeadm config images list --kubernetes-version=$version|awk -F '/' '{print $2}'`)
for imagename in ${images[@]} ; do
docker pull $url/$imagename
docker tag $url/$imagename k8s.gcr.io/$imagename
docker rmi -f $url/$imagename
done

$ chmod u+x load_kube_images.sh && bash load_kube_images.sh

2.3 初始化主节点 (k8s-master)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# 默认配置
$ kubeadm config print init-defaults > kubeadm-config.yaml

$ vi kubeadm-config.yaml
...
localAPIEndpoint:
advertiseAddress: 192.168.31.40 # 修改为本机IP
bindPort: 6443
...
kubernetesVersion: v1.18.6 # 版本必须匹配
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16 # 新增
serviceSubnet: 10.96.0.0/12
scheduler: {}
# 新增如下,修改kube-proxy协议为ipvs
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
SupportIPVSProxyMode: true
mode: ipvs

# 执行初始化
$ kubeadm init --config=kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
...
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.31.40:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:1c02156e6d3d6b85938e20f0473af2dffff7a22a6c67387aba97da38f0952386

# 如果发生错误,重置后重新init
$ kubeadm reset

# 执行初始化后的提示步骤
$ mkdir -p $HOME/.kube
$ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
$ sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 获取节点
$ kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master NotReady master 3m31s v1.18.6

2.4 部署网络 (k8s-master)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 解决DNS污染问题 (不推荐)
$ echo "199.232.68.133 raw.githubusercontent.com" >> /etc/hosts

# 尽量用nsloop查询,将dns配对
$ yum install -y bind-utils
$ nslookup raw.githubusercontent.com

$ wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

$ kubectl apply -f kube-flannel.yml
podsecuritypolicy.policy/psp.flannel.unprivileged created
clusterrole.rbac.authorization.k8s.io/flannel created
clusterrolebinding.rbac.authorization.k8s.io/flannel created
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds created

$ kubectl get pod -n kube-system
kube-flannel-ds-4dlvt 1/1 Running 0 54s

$ kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master Ready master 22m v1.18.6

2.5 工作节点加入集群 (k8s-node1, k8s-node2)

1
2
3
# k8s-master节点下 kubeadm init 的运行日志最后行
$ kubeadm join 192.168.31.40:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:1c02156e6d3d6b85938e20f0473af2dffff7a22a6c67387aba97da38f0952386

如果未找到,主节点(k8s-master) 执行下列操作获取令牌

1
2
3
4
5
6
7
8
9
10
11
$ kubeadm token list
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
7114t2.imdu2ivf56cbjk38 1h 2020-09-22T22:31:42+08:00 <none> Proxy for managing TTL for the kubeadm-certs secret <none>
abcdef.0123456789abcdef 23h 2020-09-23T20:31:43+08:00 authentication,signing <none> system:bootstrappers:kubeadm:default-node-token

# 令牌过期
$ kubeadm token create

# 生成密钥
$ openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
0daca31a7b9820fc60eaa28cacc25ee3a16c7af5c6e8104d91bf709bf2e741bc

2.6 查看所有节点

1
2
3
4
5
6
7
8
9
10
11
12
13
# kubectl 命令补齐
yum install -y bash-completion
source /usr/share/bash-completion/bash_completion

source <(kubectl completion bash)
echo "source <(kubectl completion bash)" >> ~/.bash_profile

kubectl get node

kubectl get pod -n kube-system

# 当前各个节点的详细情况
kubectl get pod -n kube-system -o wide

2.7 问题:组件controller-manager 和scheduler状态 Unhealthy

1
2
3
4
5
6
7
$ kubectl get cs
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
etcd-0 Healthy {"health":"true"}

$ netstat -an | grep -e 10251 -e 10252

解决方法: 检查kube-scheduler和kube-controller-manager组件配置是否禁用了非安全端口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
$ vi /etc/kubernetes/manifests/kube-scheduler.yaml 
...
spec:
containers:
- command:
- kube-scheduler
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
#- --port=0 # 注释掉
image: k8s.gcr.io/kube-scheduler:v1.18.6

$ vi /etc/kubernetes/manifests/kube-controller-manager.yaml
...
spec:
containers:
- command:
- kube-controller-manager
- --node-cidr-mask-size=24
#- --port=0 # 注释掉
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt

# 重启kubelet
$ systemctl restart kubelet

# 再次查询状态
$ kubectl get cs
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}

6. k8s主节点测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
vi nginx.yml
# API 版本号
apiVersion: apps/v1
# 类型,如:Pod/ReplicationController/Deployment/Service/Ingress
kind: Deployment
metadata:
# Kind 的名称
name: nginx-app
spec:
selector:
matchLabels:
# 容器标签的名字,发布 Service 时,selector 需要和这里对应
app: nginx
# 部署的实例数量
replicas: 2
template:
metadata:
labels:
app: nginx
spec:
# 配置容器,数组类型,说明可以配置多个容器
containers:
# 容器名称
- name: nginx
# 容器镜像
image: hub.elihe.io/library/nginx:v1
# 只有镜像不存在时,才会进行镜像拉取
imagePullPolicy: IfNotPresent
ports:
# Pod 端口
- containerPort: 80

kubectl apply -f nginx.yml

kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-app-76b8bd9478-z74zq 1/1 Running 0 93s 10.244.1.2 k8s-node02 <none> <none>
nginx-app-76b8bd9478-z8zt6 1/1 Running 0 93s 10.244.2.5 k8s-node01 <none> <none>

kubectl delete pod nginx-app-76b8bd9478-z74zq

kubectl get pod -o wide

# 变更数量
kubectl scale --replicas=3 deployment/nginx-app

kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-app-76b8bd9478-6gkq6 1/1 Running 0 4m25s 10.244.1.3 k8s-node02 <none> <none>
nginx-app-76b8bd9478-d7p6r 1/1 Running 0 13s 10.244.1.4 k8s-node02 <none> <none>
nginx-app-76b8bd9478-z8zt6 1/1 Running 0 6m37s 10.244.2.5 k8s-node01 <none> <none>

kubectl get deployment
NAME READY UP-TO-DATE AVAILABLE AGE
nginx-app 3/3 3 3 12m

kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 16h

# 开放访问端口
kubectl expose deployment nginx-app --port=3000 --target-port=80

kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 16h
nginx-app ClusterIP 10.104.89.46 <none> 3000/TCP 9s

# 测试端口转发
curl -i 10.104.89.46:3000
ipvsadm -Ln
TCP 10.104.89.46:3000 rr
-> 10.244.1.3:80 Masq 1 0 1
-> 10.244.1.4:80 Masq 1 0 1
-> 10.244.2.5:80 Masq 1 0 1

# 开放外部访问nginx
kubectl edit svc nginx-app
type: NodePort #ClusterIP

kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 16h
nginx-app NodePort 10.104.89.46 <none> 3000:31057/TCP 8m44s

http://192.168.31.40:31057/
http://192.168.31.41:31057/
http://192.168.31.42:31057/