基于kubesphere离线部署kubenetes集群

节点角色主机名CPU(核)内存(GB)系统盘(GB)数据盘(TB)IP备注
k8s-master-1bigdata-dn0212*82562*60010*310.4.11.22k8s-master-1
k8s-master-2bigdata-dn0232*82562*60010*310.4.11.23k8s-master-2
k8s-master-3bigdata-dn0312*82562*60010*310.4.11.31k8s-master-3
k8s-node-1bigdata-dn0322*82562*60010*310.4.11.32k8s-node-1
k8s-node-2bigdata-dn0332*82562*60010*310.4.11.33k8s-node-2
k8s-node-3bigdata-dn0342*82562*60010*310.4.11.34k8s-node-3

基础配置

#关闭防火墙和selinux
systemctl stop firewalld && systemctl disable firewalld
#永久关闭selinux
vi /etc/selinux/config
#修改下面配置项
SELINUX=disabled
#临时关闭selinux
setenforce 0
#所有节点修改主机名
[root@localhost ~]# hostnamectl set-hostname k8s-master-1
[root@localhost ~]# su
[root@k8s-master-1 ~]#
[root@localhost ~]# hostnamectl set-hostname k8s-master-2
[root@localhost ~]# su
[root@k8s-master-2 ~]#
[root@localhost ~]# hostnamectl set-hostname k8s-master-3
[root@localhost ~]# su
[root@k8s-master-3 ~]#
[root@localhost ~]# hostnamectl set-hostname k8s-node-1
[root@localhost ~]# su
[root@k8s-node-1 ~]#
[root@localhost ~]# hostnamectl set-hostname k8s-node-2
[root@localhost ~]# su
[root@k8s-node-2 ~]#
[root@localhost ~]# hostnamectl set-hostname k8s-node-3
[root@localhost ~]# su
#添加主机名解析
vi /etc/hosts
192.168.1.180  k8s-master01.hectsi.cn k8s-master01
192.168.1.53  k8s-master02.hectsi.cn k8s-master02
192.168.1.110  k8s-master03.hectsi.cn k8s-master03
192.168.1.162  k8s-node01.hectsi.cn k8s-node01
192.168.1.210  k8s-node02.hectsi.cn k8s-node02
192.168.1.224  k8s-node03.hectsi.cn k8s-node03
192.168.1.73  k8s-node04.hectsi.cn k8s-node04
192.168.1.169  k8s-node05.hectsi.cn k8s-node05
192.168.1.193  k8s-node06.hectsi.cn k8s-node06
192.168.1.184  lb.hectsi.cn #负载均衡
#挂载磁盘
[root@k8s-master01 kk]# lsblk   #查看数据盘
NAME   MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
vda    253:0    0   40G  0 disk
├─vda1 253:1    0    4G  0 part
└─vda2 253:2    0   36G  0 part /
vdb    253:16   0  500G  0 disk
[root@k8s-master01 kk]# fdisk /dev/vdb
n
p
一路回车
....
w

[root@k8s-master01 kk]# mkfs.ext4 /dev/vdb1  #格式化文件系统
[root@k8s-master01 kk]# lsblk   #查看查看数据盘
NAME   MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
vda    253:0    0   40G  0 disk
├─vda1 253:1    0    4G  0 part
└─vda2 253:2    0   36G  0 part /
vdb    253:16   0  500G  0 disk
└─vdb1 253:17   0  500G  0 part 
[root@k8s-master01 kk]# vi /etc/fstab  #永久挂载,添加
/dev/vdb1       /data   ext4    defaults        0 0
[root@k8s-master01 kk]# mount -a #挂载
[root@k8s-master01 kk]# df -h  #查看验证

离线下载相关资源

以下步骤在有网络的机器上执行

  1. 使用以下命令从能够访问互联网的机器上下载镜像清单文件 images-list.txt
curl -L -O https://github.com/kubesphere/ks-installer/releases/download/v3.2.1/images-list.txt
  1. 下载 offline-installation-tool.sh
curl -L -O https://github.com/kubesphere/ks-installer/releases/download/v3.2.1/offline-installation-tool.sh
  1. 使 .sh 文件可执行。
chmod +x offline-installation-tool.sh
  1. 下载 Kubernetes 二进制文件。
./offline-installation-tool.sh -b -v v1.21.5 

如果您无法访问 Google 的对象存储服务,请运行以下命令添加环境变量以变更来源。

export KKZONE=cn;./offline-installation-tool.sh -b -v v1.21.5 
  1. offline-installation-tool.sh 中拉取镜像
./offline-installation-tool.sh -s -l images-list.txt -d ./kubesphere-images
  1. 下载kk
export KKZONE=cn
curl -sfL https://get-kk.kubesphere.io | VERSION=v1.2.1 sh -
chmod +x kk
  1. 下载必要依赖
[root@k8s-master-1 rpm]# yum install --downloadonly --downloaddir=. socat conntrack -y
[root@k8s-master-1 rpm]# ls
conntrack-tools-1.4.4-7.el7.x86_64.rpm         libnetfilter_cttimeout-1.0.0-7.el7.x86_64.rpm  socat-1.7.3.2-2.el7.x86_64.rpm
libnetfilter_cthelper-1.0.0-11.el7.x86_64.rpm  libnetfilter_queue-1.0.2-2.el7_2.x86_64.rpm
[root@k8s-master-1 rpm]#cd ..
[root@k8s-master-1 ~]#tar cvf rpm.tar.gz rpm

推送镜像至私有仓库

  1. 将打包的镜像文件传输至您的本地机器,并运行以下命令把它推送至仓库,dockerhub.kubekey.local为自己创建的harbor镜像仓库
./offline-installation-tool.sh -l images-list.txt -d ./kubesphere-images -r dockerhub.kubekey.local
  1. 上传依赖离线包并安装依赖(所有机器操作)
[root@k8s-master-1 ~]# tar xf rpm.tar.gz
[root@k8s-master-1 ~]# cd rpm
[root@k8s-master-1 rpm]# yum localinstall ./*.rpm

安装配置haproxy负载均衡

您必须在您的环境中创建一个负载均衡器来监听(在某些云平台也称作监听器)关键端口,主要起到三个master高可用的作用。建议监听下表中的端口。

  • 请确保您的负载均衡器至少监听 apiserver 端口。
服务协议端口
apiserverTCP6443
ks-consoleTCP30880
httpTCP80
httpsTCP443
#离线环境可事先上传haproxy离线包,此处机器选择k8s-node-1
[root@k8s-node-1 ~]# yum install haproxy -y
[root@k8s-node-1 ~]# vi /etc/haproxy/haproxy.cfg
global
    log /dev/log    local0
    log /dev/log    local1 notice
    chroot /var/lib/haproxy
    stats socket /var/run/haproxy-admin.sock mode 660 level admin
    stats timeout 30s
    user haproxy
    group haproxy
    daemon
    nbproc 1

defaults
    log     global
    timeout connect 5000
    timeout client  10m
    timeout server  10m
#对k8s集群三个master节点的6443端口进行代理
listen kube-master
    bind 192.168.9.8:6443
    mode tcp
    option tcplog
    balance roundrobin
    server 192.168.9.4 192.168.9.4:6443 check inter 2000 fall 2 rise 2 weight 1
    server 192.168.9.5 192.168.9.5:6443 check inter 2000 fall 2 rise 2 weight 1
    server 192.168.9.6 192.168.9.6:6443 check inter 2000 fall 2 rise 2 weight 1
[root@k8s-node-1 ~]# systemctl start haproxy && systemctl enable haproxy

自定义安装部署文件

[root@k8s-master-1 kubesphere]# ./kk create config --with-kubesphere v3.2.1 --with-kubernetes v1.21.5
[root@k8s-master-1 kubesphere]# vi config-sample.yaml
apiVersion: kubekey.kubesphere.io/v1alpha1
kind: Cluster
metadata:
  name: sample
spec:
  hosts:
  - {name: k8s-master-1, address: 10.4.11.22, internalAddress: 10.4.11.22, user: root, password: 'JNBigdata@2020'}
  - {name: k8s-master-2, address: 10.4.11.23, internalAddress: 10.4.11.23, user: root, password: 'JNBigdata@2020'}
  - {name: k8s-master-3, address: 10.4.11.31, internalAddress: 10.4.11.31, user: root, password: 'JNBigdata@2020'}
  - {name: k8s-node-1, address: 10.4.11.32, internalAddress: 10.4.11.32, user: root, password: 'JNBigdata@2020'}
  - {name: k8s-node-2, address: 10.4.11.33, internalAddress: 10.4.11.33, user: root, password: 'JNBigdata@2020'}
  - {name: k8s-node-3, address: 10.4.11.34, internalAddress: 10.4.11.34, user: root, password: 'JNBigdata@2020'}
  roleGroups:
    etcd:
    - k8s-master-1
    - k8s-master-2
    - k8s-master-3
    master:
    - k8s-master-1
    - k8s-master-2
    - k8s-master-3

    worker:
    - k8s-node-1
    - k8s-node-2
    - k8s-node-3
  controlPlaneEndpoint:
    ##Internal loadbalancer for apiservers
    #internalLoadbalancer: haproxy

    domain: lb.kubesphere.local
    address: "10.4.11.32"
    port: 6443
  kubernetes:
    version: v1.22.1
    clusterName: cluster.local
  network:
    plugin: calico
    kubePodsCIDR: 10.233.64.0/18
    kubeServiceCIDR: 10.233.0.0/18
  registry:
    registryMirrors: []
    insecureRegistries: []
    privateRegistry: "dockerhub.dsj.com:18443"
  addons: []



---
apiVersion: installer.kubesphere.io/v1alpha1
kind: ClusterConfiguration
metadata:
  name: ks-installer
  namespace: kubesphere-system
  labels:
    version: v3.2.1
spec:
  persistence:
    storageClass: ""
  authentication:
    jwtSecret: ""
  local_registry: ""
  # dev_tag: ""
  etcd:
    monitoring: true
    endpointIps: localhost
    port: 2379
    tlsEnable: true
  common:
    core:
      console:
        enableMultiLogin: true
        port: 30880
        type: NodePort
    # apiserver:
    #  resources: {}
    # controllerManager:
    #  resources: {}
    redis:
      enabled: false
      volumeSize: 2Gi
    openldap:
      enabled: false
      volumeSize: 2Gi
    minio:
      volumeSize: 20Gi
    monitoring:
      # type: external
      endpoint: http://prometheus-operated.kubesphere-monitoring-system.svc:9090
      GPUMonitoring:
        enabled: false
    gpu:
      kinds:
      - resourceName: "nvidia.com/gpu"
        resourceType: "GPU"
        default: true
    es:
      # master:
      #   volumeSize: 4Gi
      #   replicas: 1
      #   resources: {}
      # data:
      #   volumeSize: 20Gi
      #   replicas: 1
      #   resources: {}
      logMaxAge: 7
      elkPrefix: logstash
      basicAuth:
        enabled: false
        username: ""
        password: ""
      externalElasticsearchHost: ""
      externalElasticsearchPort: ""
  alerting:
    enabled: false
    # thanosruler:
    #   replicas: 1
    #   resources: {}
  auditing:
    enabled: false
    # operator:
    #   resources: {}
    # webhook:
    #   resources: {}
  devops:
    enabled: true
    jenkinsMemoryLim: 2Gi
    jenkinsMemoryReq: 1500Mi
    jenkinsVolumeSize: 8Gi
    jenkinsJavaOpts_Xms: 512m
    jenkinsJavaOpts_Xmx: 512m
    jenkinsJavaOpts_MaxRAM: 2g
  events:
    enabled: false
    # operator:
    #   resources: {}
    # exporter:
    #   resources: {}
    # ruler:
    #   enabled: true
    #   replicas: 2
    #   resources: {}
  logging:
    enabled: false
    containerruntime: docker
    logsidecar:
      enabled: true
      replicas: 2
      # resources: {}
  metrics_server:
    enabled: false
  monitoring:
    storageClass: ""
    # kube_rbac_proxy:
    #   resources: {}
    # kube_state_metrics:
    #   resources: {}
    # prometheus:
    #   replicas: 1
    #   volumeSize: 20Gi
    #   resources: {}
    #   operator:
    #     resources: {}
    #   adapter:
    #     resources: {}
    # node_exporter:
    #   resources: {}
    # alertmanager:
    #   replicas: 1
    #   resources: {}
    # notification_manager:
    #   resources: {}
    #   operator:
    #     resources: {}
    #   proxy:
    #     resources: {}
    gpu:
      nvidia_dcgm_exporter:
        enabled: false
        # resources: {}
  multicluster:
    clusterRole: none
  network:
    networkpolicy:
      enabled: false
    ippool:
      type: none
    topology:
      type: none
  openpitrix:
    store:
      enabled: false
  servicemesh:
    enabled: false
  kubeedge:
    enabled: false
    cloudCore:
      nodeSelector: {"node-role.kubernetes.io/worker": ""}
      tolerations: []
      cloudhubPort: "10000"
      cloudhubQuicPort: "10001"
      cloudhubHttpsPort: "10002"
      cloudstreamPort: "10003"
      tunnelPort: "10004"
      cloudHub:
        advertiseAddress:
          - ""
        nodeLimit: "100"
      service:
        cloudhubNodePort: "30000"
        cloudhubQuicNodePort: "30001"
        cloudhubHttpsNodePort: "30002"
        cloudstreamNodePort: "30003"
        tunnelNodePort: "30004"
    edgeWatcher:
      nodeSelector: {"node-role.kubernetes.io/worker": ""}
      tolerations: []
      edgeWatcherAgent:
        nodeSelector: {"node-role.kubernetes.io/worker": ""}
        tolerations: []


开始安装

[root@k8s-master-1 kubesphere]# ./kk create cluster -f config-sample.yaml
#查看部署日志
[root@k8s-master-1 kubesphere]# kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l app=ks-install -o jsonpath='{.items[0].metadata.name}') -f
#若您看到以下信息,您的高可用集群便已创建成功。

#####################################################
###              Welcome to KubeSphere!           ###
#####################################################

Console: http://192.168.0.3:30880
Account: admin
Password: P@88w0rd

NOTES:
  1. After you log into the console, please check the
     monitoring status of service components in
     the "Cluster Management". If any service is not
     ready, please wait patiently until all components
     are up and running.
  2. Please change the default password after login.

#####################################################
https://kubesphere.io             2020-xx-xx xx:xx:xx
#####################################################

验证集群

[root@k8s-master-1 kubesphere]# kubectl get pod -A
#所有pod均为运行状态即可

添加节点

添加之前请确保安装docker、conntrack、socat、glusterfs客户端

#编辑现有的config-sample.yaml文件,如果没有可以使用./kk create config --from-cluster命令生成
···
spec:
  hosts:
  - {name: master1, address: 192.168.0.3, internalAddress: 192.168.0.3, user: root, password: ***}
  - {name: node1, address: 192.168.0.4, internalAddress: 192.168.0.4, user: root, password: ***}
  - {name: node2, address: 192.168.0.5, internalAddress: 192.168.0.5, user: root, password: ***}
  roleGroups:
    etcd:
    - master1
    master:
    - master1
    worker:
    - node1
    - node2
···
#在配置文件中,将新节点的信息放在 hosts 和 roleGroups 之下。该示例添加了两个新节点(即 node1 和 node2)。这里的 master1 是现有节点。
./kk add nodes -f sample.yaml

验证

#在master节点执行下列命令已验证
kubectl get node

删除节点

#将要删除的节点设置为不可调度
kubectl cordon nodename
#删除节点需要使用config-sample.yaml文件,如果没有可以使用./kk create config --from-cluster命令生成
##请确保在该配置文件中提供主机的所有信息,无需更改文件内容,然后运行以下命令以删除节点
./kk delete node <nodeName> -f config-sample.yaml

验证

#在master节点执行下列命令以验证
kubectl get node

k8s日常运维

1. pod

#查看pod
kubectl get pod 
#查看所有命名空间的pod
kubectl get pod -A
#查看指定命名空间的pod
kubectl get pod -n namespaceName
#查看pod简略信息
kubectl get pod -o wide
#查看pod详细信息
kubectl describe -n szxc pod/redis
#查看pod的日志,查看倒数n行--tail=n
kubectl logs -f -n szxc pod/redis
#删除pod
kubectl delete -n szxc pod/redis
#从pod中拷贝文件
kubectl cp -n szxc redis:/aaa.bin ./aaa.bin
#查看pod资源使用率
kubectl stop pod

2. service

#查看svc
kubectl get svc 
#查看所有命名空间的svc
kubectl get svc -A
#查看指定命名空间的svc
kubectl get svc -n namespaceName
#查看svc简略信息
kubectl get svc -o wide
#查看svc详细信息及pod事件
kubectl describe -n szxc svc/redis
#删除svc
kubectl delete -n szxc svc/redis

3. configmap

#查看cm
kubectl get cm
#查看所有命名空间的cm
kubectl get cm -A
#查看指定命名空间的cm
kubectl get cm -n namespaceName
#查看cm简略信息
kubectl get cm -o wide
#查看cm详细信息
kubectl describe -n szxc cm/redis
#删除cm
kubectl delete -n szxc cm/redis

4. node

#查看node
kubectl get node
#查看node简略信息
kubectl get node -o wide
#查看node详细信息
kubectl describe node/node01
#为node打标签
kubectl label node node1 env-role=prod
#查看标签
kubectl get nodes  --show-labels
#查看node资源使用率
kubectl stop node

系统组件的维护

a. kubelet

#登录需要排查的节点(k8s所有节点都会启动kublet)
#查看kubelet状态
systemctl status kubelet
#启动kubelet
systemctl start kubelet
#停止kubelet
systemctl stop kubelet
#重启kubelet
systemctl restart kubelet
#查看kubelet日志
journalctl -u kubelet

kubelet服务的启动文件

b. apiserver等

calico、coredns、controller-manager、kube-proxy、scheduler等系统组件都与apiserver相同

#查看apiserver的日志
kubectl logs --tail=100 -f -n kube-system pod/kube-apiserver-k8s-master01
#重启apiserver
kubectl delete -n kube-system pod/kube-apiserver-k8s-master0
#查看apiserver的事件信息
kubectl describe -n kube-system pod/kube-apiserver-k8s-master01
#查看系统组件的运行状态及信息(系统组件都运行在kube-system的命名空间中)
kubectl get pod -n kube-system -o wide

c. etcd

#登录需要排查的节点(k8s所有master节点都会启动etcd)
#查看etcd状态
systemctl status etcd
#启动etcd
systemctl start etcd
#停止etcd
systemctl stop etcd
#重启etcd
systemctl restart etcd
#查看etcd日志
journalctl -u etcd

Logo

开源、云原生的融合云平台

更多推荐