坑多,搞了好多次。
1./etc/ansible/hosts
[OSEv3:children]mastersnodesetcdnfs[OSEv3:vars]ansible_ssh_user=rootopenshift_deployment_type=openshift-enterpriseopenshift_release=v3.9osm_use_cockpit=trueosm_cockpit_plugins=['cockpit-kubernetes']openshift_cockpit_deployer_prefix='openshift3/'openshift_cockpit_deployer_version='v3.9.43'osm_cluster_network_cidr=10.128.0.0/14openshift_portal_net=172.30.0.0/16openshift_master_api_port=8443openshift_master_console_port=8443openshift_hosted_registry_storage_kind=nfsopenshift_hosted_registry_storage_access_modes=['ReadWriteMany']openshift_hosted_registry_storage_nfs_directory=/exportsopenshift_hosted_registry_storage_nfs_options='*(rw,root_squash)'openshift_hosted_registry_storage_volume_name=registryopenshift_hosted_registry_storage_volume_size=10Gioreg_url=registry.example.com/openshift3/ose-\${component}:\${version}openshift_docker_additional_registries=registry.example.comopenshift_docker_insecure_registries=registry.example.comopenshift_docker_blocked_registries=registry.access.redhat.com,docker.ioopenshift_image_tag=v3.9.43openshift_enable_service_catalog=trueopenshift_service_catalog_image_prefix=registry.example.com/openshift3/ose-openshift_service_catalog_image_version=v3.9.43ansible_service_broker_image_prefix=registry.example.com/openshift3/ose-ansible_service_broker_etcd_image_prefix=registry.example.com/rhel7/ansible_service_broker_selector={ "region": "infra"}openshift_template_service_broker_namespaces=['openshift']template_service_broker_selector={ "region": "infra"}template_service_broker_prefix=registry.example.com/openshift3/ose-openshift_hosted_manage_registry=falseoreg_url=registry.example.com/openshift3/ose-${component}:${version}openshift_examples_modify_imagestreams=trueopenshift_clock_enabled=trueopenshift_metrics_storage_kind=nfsopenshift_metrics_install_metrics=trueopenshift_metrics_storage_access_modes=['ReadWriteOnce']openshift_metrics_storage_host=nfs.example.comopenshift_metrics_storage_nfs_directory=/exportsopenshift_metrics_storage_volume_name=metricsopenshift_metrics_storage_volume_size=10Giopenshift_metrics_hawkular_hostname=hawkular-metrics.apps.example.com#openshift_metrics_cassandra_storage_type=emptydiropenshift_metrics_image_prefix=registry.example.com/openshift3/openshift_hosted_metrics_deploy=trueopenshift_hosted_metrics_public_url=https://hawkular-metrics.apps.example.com/hawkular/metricsopenshift_metrics_image_version=v3.9.43openshift_master_identity_providers=[{ 'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}]# Default login account: admin / handhandopenshift_master_htpasswd_users={ 'admin': '$apr1$gfaL16Jf$c.5LAvg3xNDVQTkk6HpGB1'}#openshift_repos_enable_testing=trueopenshift_disable_check=docker_image_availability,disk_availability,memory_availability,docker_storagedocker_selinux_enabled=falseopenshift_docker_options=" --selinux-enabled --insecure-registry 172.30.0.0/16 --log-driver json-file --log-opt max-size=50M --log-opt max-file=3 --insecure-registry registry.example.com --add-registry registry.example.com"osm_etcd_image=rhel7/etcdopenshift_logging_image_prefix=registry.example.com/openshift3/openshift_hosted_router_selector='region=infra,router=true'openshift_master_default_subdomain=app.example.comopenshift_web_console_prefix=registry.example.com/openshift3/ose-openshift_web_console_version=v3.9.43# host group for masters[masters]master.example.com# host group for etcd[etcd]master.example.com# host group for nodes, includes region info[nodes]master.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=truenode1.example.com openshift_node_labels="{'region': 'infra', 'router': 'true', 'zone': 'default'}" openshift_schedulable=truenode2.example.com openshift_node_labels="{'region': 'infra', 'zone': 'default', 'node': 'true'}" openshift_schedulable=true[nfs]nfs.example.com
2.有几个镜像需要retag
docker pull registry.example.com/openshift3/registry-console:v3.9.43 docker tag registry.example.com/openshift3/registry-console:v3.9.43 registry.example.com/openshift3/registry-console:v3.9docker push registry.example.com/openshift3/registry-console:v3.9docker pull registry.example.com/openshift3/ose-deployer:v3.9.43docker tag registry.example.com/openshift3/ose-deployer:v3.9.43 registry.example.com/openshift3/ose-deployer:v3.9.51docker push registry.example.com/openshift3/ose-deployer:v3.9.51docker pull registry.example.com/openshift3/ose-pod:v3.9.43docker tag registry.example.com/openshift3/ose-pod:v3.9.43 registry.example.com/openshift3/ose-pod:v3.9.51docker push registry.example.com/openshift3/ose-pod:v3.9.51
更新主节点
ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_9/upgrade_control_plane.yml | tee /tmp/upgrade_control_plane_to_3_9.log;
完成后状态
TASK [openshift_master : Wait for master API to come back online] *******************************************************************************************************task path: /usr/share/ansible/openshift-ansible/roles/openshift_master/tasks/restart.yml:6ok: [master.example.com] => { "changed": false, "elapsed": 10, "failed": false, "path": null, "port": 8443, "search_regex": null, "state": "started"}TASK [openshift_master : restart master controllers] ********************************************************************************************************************task path: /usr/share/ansible/openshift-ansible/roles/openshift_master/tasks/restart.yml:14changed: [master.example.com] => { "attempts": 1, "changed": true, "cmd": ["systemctl", "restart", "atomic-openshift-master-controllers"], "delta": "0:00:00.738269", "end": "2018-11-24 21:47:24.938854", "failed": false, "rc": 0, "start": "2018-11-24 21:47:24.200585", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}META: ran handlersPLAY RECAP **************************************************************************************************************************************************************localhost : ok=28 changed=0 unreachable=0 failed=0 master.example.com : ok=798 changed=197 unreachable=0 failed=0 nfs.example.com : ok=1 changed=0 unreachable=0 failed=0 [root@master ~]# oc get pods --all-namespacesNAMESPACE NAME READY STATUS RESTARTS AGEdefault docker-registry-2-8kc4s 1/1 Running 0 16mdefault docker-registry-2-qh9vq 1/1 Running 0 16mdefault docker-registry-2-xdz55 1/1 Running 2 3hdefault registry-console-2-qtj4j 1/1 Running 0 16mdefault router-4-ctlwd 1/1 Running 0 7mdefault router-4-kvbc6 1/1 Running 0 6mkube-service-catalog apiserver-bp4j4 1/1 Running 0 3mkube-service-catalog controller-manager-m82nr 0/1 CrashLoopBackOff 4 3mopenshift-ansible-service-broker asb-1-deploy 0/1 Error 0 2mopenshift-ansible-service-broker asb-etcd-1-deploy 0/1 Error 0 2mopenshift-infra hawkular-cassandra-1-6qmm9 1/1 Running 2 3hopenshift-infra hawkular-metrics-fmj5n 0/1 CrashLoopBackOff 38 3hopenshift-infra heapster-8cb76 0/1 Error 1 16mopenshift-template-service-broker apiserver-7gnvj 0/1 Error 3 2mopenshift-template-service-broker apiserver-kqqx7 1/1 Running 0 2mopenshift-template-service-broker apiserver-smzqn 0/1 Error 3 2mopenshift-web-console webconsole-55d596f44d-n6gf8 1/1 Running 0 9m
[root@master ~]# oc get nodeNAME STATUS ROLES AGE VERSIONmaster.example.com Ready master 19h v1.9.1+a0ce1bc657node1.example.com Ready19h v1.7.6+a08f5eeb62node2.example.com Ready 19h v1.7.6+a08f5eeb62
更新node节点
ansible-playbook -vv /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_9/upgrade_nodes.yml -e openshift_upgrade_nodes_serial=1 | tee /tmp/upgrade_node_to_3_9.log;
任务结束后输出
TASK [openshift_excluder : Enable openshift excluder] *******************************************************************************************************************task path: /usr/share/ansible/openshift-ansible/roles/openshift_excluder/tasks/exclude.yml:24changed: [node1.example.com] => { "changed": true, "cmd": ["/sbin/atomic-openshift-excluder", "exclude"], "delta": "0:00:00.049623", "end": "2018-11-25 09:04:05.773310", "failed": false, "rc": 0, "start": "2018-11-25 09:04:05.723687", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}changed: [node2.example.com] => { "changed": true, "cmd": ["/sbin/atomic-openshift-excluder", "exclude"], "delta": "0:00:00.051837", "end": "2018-11-25 09:04:05.158001", "failed": false, "rc": 0, "start": "2018-11-25 09:04:05.106164", "stderr": "", "stderr_lines": [], "stdout": "", "stdout_lines": []}META: ran handlersMETA: ran handlersPLAY RECAP **************************************************************************************************************************************************************localhost : ok=12 changed=0 unreachable=0 failed=0 master.example.com : ok=76 changed=4 unreachable=0 failed=0 nfs.example.com : ok=28 changed=2 unreachable=0 failed=0 node1.example.com : ok=158 changed=45 unreachable=0 failed=0 node2.example.com : ok=158 changed=46 unreachable=0 failed=0
[root@master ~]# oc get nodesNAME STATUS ROLES AGE VERSIONmaster.example.com Ready master 12h v1.9.1+a0ce1bc657node1.example.com Ready12h v1.9.1+a0ce1bc657node2.example.com Ready 12h v1.9.1+a0ce1bc657
heapster,metrics为什么不见了,还需要去查
[root@master ~]# oc get pods --all-namespacesNAMESPACE NAME READY STATUS RESTARTS AGEdefault router-4-kvbc6 1/1 Running 0 18mkube-service-catalog apiserver-bp4j4 1/1 Running 0 15mkube-service-catalog controller-manager-m82nr 0/1 CrashLoopBackOff 7 15mopenshift-ansible-service-broker asb-1-deploy 0/1 Error 0 14mopenshift-ansible-service-broker asb-etcd-1-deploy 0/1 Error 0 14mopenshift-template-service-broker apiserver-7gnvj 1/1 Running 7 14mopenshift-template-service-broker apiserver-kqqx7 1/1 Running 0 14mopenshift-template-service-broker apiserver-smzqn 1/1 Running 7 14mopenshift-web-console webconsole-55d596f44d-n6gf8 1/1 Running 0 21m
更新脚本不要反复执行,遇到的问题包括
- 导入模板失败
TASK [openshift_examples : Import RHEL streams] *************************************************************************************************************************task path: /usr/share/ansible/openshift-ansible/roles/openshift_examples/tasks/main.yml:58FAILED - RETRYING: Import RHEL streams (3 retries left).FAILED - RETRYING: Import RHEL streams (2 retries left).FAILED - RETRYING: Import RHEL streams (1 retries left).
倒入模板失败,目前暂时没理
- RETRYING: Poll for OpenShift pod deployment success
TASK [openshift_hosted : Poll for OpenShift pod deployment success] *****************************************************************************************************task path: /usr/share/ansible/openshift-ansible/roles/openshift_hosted/tasks/wait_for_pod.yml:23FAILED - RETRYING: Poll for OpenShift pod deployment success (60 retries left).FAILED - RETRYING: Poll for OpenShift pod deployment success (59 retries left).
看了一下是在docker-registry部署完后的检查,修改hosts文件,加入
openshift_hosted_manage_registry=false
- 验证TSB是否运行
TASK [template_service_broker : Verify that TSB is running] ********************************************************************************FAILED - RETRYING: Verify that TSB is running (120 retries left).FAILED - RETRYING: Verify that TSB is running (119 retries left).
解决办法,修改service_broker在infra的节点上运行。(之前是node=true节点上)
template_service_broker_selector={ "region": "infra"}
- upgrade storage
脚本不能反复执行