MPP install
greenplum install
注意事项:
1.hostname 不能大写##
2.如果segment和mirror副本数比较大,建议调整/etc/ssh/sshd_config的maxconnect数值
3.系统参数需要修改kernel.sem = 500 1024000 200 8192 ##############这个配置是三个节点 6块磁盘,每个磁盘6个segment 6个mirror 如果segment超过这个比例的话,需要对应调整sem的参数
4.要使用gpadmin这个用户安装软件 vim /etc/sudoers 赋予gpadmin root的权限
# User privilege specification
root ALL=(ALL:ALL) ALL
gpadmin ALL=(ALL:ALL) ALL
安装步骤
一.master standby work节点系统参数调整
1.1用户准备
sudo groupadd -g 530 gpadmin
sudo useradd -g 530 -u 530 -m -d /home/gpadmin -s /bin/bash gpadmin
sudo chown -R gpadmin:gpadmin /home/gpadmin/
sudo passwd gpadmin
sudo vim /etc/hosts
# hosts for greenplum
192.168.0.200 mpp01
192.168.2.201 mpp02
192.168.0.202 mpp03
su gpadmin
#免密登陆
ssh-keygen -t rsa
cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
chmod 700 ~/.ssh
ssh-copy-id mpp02
ssh-copy-id mpp03
#####修改ubuntu18.04的主机名
没有这个文件创建这个文件
/etc/cloud/cloud.cfg
preserve_hostname: true
1.2 系统参数优化
#参考文档:https://blog.csdn.net/ctypyb2002/article/details/84107389
sudo vim /etc/sysctl.conf
xfs_mount_options = rw,noatime,inode64,allocsize=16m ###如果不修改这个gpcheck会提示异常
kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
#kernel.sem = 500 1024000 200 8192 (108个segment及其以上,使用这个sem参数)
kernel.sem = 500 1024000 200 4096 (108个segment以下,使用这个sem参数)
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 1265536
kernel.msgmax = 1265536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.ip_forward = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.overcommit_memory = 2
sudo sysctl -p
1.3 #修改打开文件句柄的数量
vim /etc/security/limits.conf
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
1.4 #设置磁盘访问I/O调度策略(stat磁盘 选择deadline )
Linux磁盘I/O调度器对磁盘的访问支持不同的策略,默认的为CFQ,GP建议设置为deadline 要查看某驱动器的I/O调度策略,可通过如下命令查看,下面示例的为正确的配置:
# cat /sys/block/{devname}/queue/scheduler
noop anticipatory [deadline] cfq
针对我们服务器上的磁盘 修改i/o调度策略
echo deadline > /sys/block/sda/queue/scheduler
echo deadline > /sys/block/sdb/queue/scheduler
echo deadline > /sys/block/sdc/queue/scheduler
echo deadline > /sys/block/sdd/queue/scheduler
echo deadline > /sys/block/sde/queue/scheduler
echo deadline > /sys/block/sdf/queue/scheduler
1.5 #打开磁盘预热
每个磁盘设备文件需要配置read-ahead(blockdev)值为65536
官方文档的推荐值为16384,但译者认为应该为65536更合理,该值设置的是预读扇区数,实际上预读的字节数是blockdev设置除以2,而GP缺省的blocksize为32KB,刚好与65536(32768B/32KB)对应。
65536/16384 ? 待定 我默认使用官网的配置
/sbin/blockdev –getra /dev/sda
/sbin/blockdev –setra 16384 /dev/sda
/sbin/blockdev –getra /dev/sdb
/sbin/blockdev –setra 16384 /dev/sdb
/sbin/blockdev –getra /dev/sdc
/sbin/blockdev –setra 16384 /dev/sdc
/sbin/blockdev –getra /dev/sdd
/sbin/blockdev –setra 16384 /dev/sdd
/sbin/blockdev –getra /dev/sde
/sbin/blockdev –setra 16384 /dev/sde
/sbin/blockdev –getra /dev/sdf
/sbin/blockdev –setra 16384 /dev/sdf
1.6 #修改sshd的配置
vim /etc/ssh/sshd_config
UseDNS no
#PidFile /var/run/sshd.pid
MaxStartups 100:30:1000
#添加ubuntu 18.04 开机自启的服务
sudo vi /etc/systemd/system/rc-local.service
开机任务(https://www.jianshu.com/p/79d24b4af4e5)
[Unit]
Description=/etc/rc.local Compatibility
ConditionPathExists=/etc/rc.local
[Service]
Type=forking
ExecStart=/etc/rc.local start
TimeoutSec=0
StandardOutput=tty
RemainAfterExit=yes
SysVStartPriority=99
[Install]
WantedBy=multi-user.target
sudo vi /etc/rc.local
#!/bin/sh -e
#
# rc.local
#
# This script is executed at the end of each multiuser runlevel.
# Make sure that the script will “exit 0” on success or any other
# value on error.
#
# In order to enable or disable this script just change the execution
# bits.
#
# By default this script does nothing.
netplan apply
exit 0
sudo chmod +x /etc/rc.local
sudo systemctl enable rc-local
sudo systemctl start rc-local.service
sudo systemctl status rc-local.service
二、安装greenplum
###################master和standby work节点(mpp集群每个节点都要执行)
2.1 安装gp-xercesc
git clone https://github.com/greenplum-db/gp-xerces.git
cd gp-xerces
mkdir build
cd build
../configure –prefix=/usr/local && make -j64
sudo make -j64 install
#安装re2c
tar -zxvf re2c-1.1.1.tar.gz
cd re2c-1
./configure
make -j4
sudo make -j4 install
#安装ninja
tar -zxvf ninja-1.9.0.tar.gz
cd ninja
./configure.py –bootstrap
sudo cp -rp ninja /usr/bin
2.2 安装gporca
git clone https://github.com/greenplum-db/gporca.git
cd gporca-3.39.0/
sudo apt install cmake
cmake -GNinja -H. -Bbuild
sudo ninja install -C build
2.3 master节点安装gpdb(mpp的master节点执行)
git clone https://github.com/greenplum-db/gpdb.git ####选择合适的gpdb版本https://github.com/greenplum-db/gpdb/releases/ #我选择的版本是https://github.com/greenplum-db/gpdb/releases/tag/6.0.0-beta.3
cd gpdb
######安装需要的依赖关系
sudo ./README.ubuntu.bash
#将这个脚本拷贝到mpp其他的节点 安装基础包
scp ./README.ubuntu.bash gpadmin@mpp02:/home/gpadmin
scp ./README.ubuntu.bash gpadmin@mpp03:/home/gpadmin
此时系统会默认安装libxerces-c-dev库,需要将此库移除以使其正确编译。
sudo apt remove libxerces-c-dev ###此库会有冲突
./configure –enable-orca –with-perl –with-python –with-libxml –prefix=/usr/local/gpdb
make -j64 && sudo make -j64 install
#################
sudo ldconfig #################一定要重载库文件,否则会报没有已安装的两个插件的库文件(如libgpos.so)
2.4 standby和work节点安装greenplum
cd /home/gpadmin
######安装需要的依赖关系
sudo ./README.ubuntu.bash
压缩master节点greenplum的安装目录,然后把压缩文件拷贝到standby和wodk的节点相同的目录,然后解压
tar -czvf gpdb.tar.gz /usr/local/gpdb
scp -rp gpdb.tar.gz gpadmin@mpp02:/home/gpadmin
scp -rp gpdb.tar.gz gpadmin@mpp03:/home/gpadmin
ssh gadmin@mpp02 “cd /home/gpadmin && tar -zxvf gpdb.tar.gz && mv gpdb /usr/local && sudo ldconfig ”
ssh gadmin@mpp03 “cd /home/gpadmin && tar -zxvf gpdb.tar.gz && mv gpdb /usr/local && sudo ldconfig ”
2.5 修改所有节点gpadmin的环境变量
#master 节点 和standby节点
sudo mkdir -p /data1/master
sudo chown -R gpadmin:gpadmin /data1/master
vim /home/gpadmin/.profile
source /usr/local/gpdb/greenplum_path.sh
export PGPORT=5432
export PGDATABASE=gp_sydb
TZ=’Asia/Shanghai’; export TZ
export MASTER_DATA_DIRECTORY=/data1/master/gpseg-1/ #master节点的数据目录
#然后每个节点执行
sudo ldconfig && source /home/gpadmin/.profile && source /usr/local/gpdb/greenplum_path.sh
不重启刷新磁盘
ls /sys/class/scsi_host/
“—” 将后面的值改为空,重新刷新磁盘
sudo echo “- – -” > /sys/class/scsi_host/host0/scan
sudo echo “- – -” > /sys/class/scsi_host/host1/scan
sudo echo “- – -” > /sys/class/scsi_host/host2/scan
sudo echo “- – -” > /sys/class/scsi_host/host3/scan
sudo echo “- – -” > /sys/class/scsi_host/host4/scan
sudo echo “- – -” > /sys/class/scsi_host/host5/scan
sudo echo “- – -” > /sys/class/scsi_host/host6/scan
sudo echo “- – -” > /sys/class/scsi_host/host7/scan
sudo echo “- – -” > /sys/class/scsi_host/host8/scan
sudo echo “- – -” > /sys/class/scsi_host/host9/scan
sudo echo “- – -” > /sys/class/scsi_host/host10/scan
sudo echo “- – -” > /sys/class/scsi_host/host11/scan
sudo echo “- – -” > /sys/class/scsi_host/host12/scan
sudo echo “- – -” > /sys/class/scsi_host/host13/scan
sudo echo “- – -” > /sys/class/scsi_host/host14/scan
sudo echo “- – -” > /sys/class/scsi_host/host15/scan
sudo echo “- – -” > /sys/class/scsi_host/host16/scan
sudo echo “- – -” > /sys/class/scsi_host/host17/scan
sudo echo “- – -” > /sys/class/scsi_host/host18/scan
sudo echo “- – -” > /sys/class/scsi_host/host19/scan
sudo echo “- – -” > /sys/class/scsi_host/host20/scan
sudo echo “- – -” > /sys/class/scsi_host/host21/scan
sudo echo “- – -” > /sys/class/scsi_host/host22/scan
sudo echo “- – -” > /sys/class/scsi_host/host23/scan
sudo echo “- – -” > /sys/class/scsi_host/host24/scan
sudo echo “- – -” > /sys/class/scsi_host/host25/scan
sudo echo “- – -” > /sys/class/scsi_host/host26/scan
sudo echo “- – -” > /sys/class/scsi_host/host27/scan
sudo echo “- – -” > /sys/class/scsi_host/host28/scan
sudo echo “- – -” > /sys/class/scsi_host/host29/scan
sudo echo “- – -” > /sys/class/scsi_host/host31/scan
#########################################
2.6 初始化准备(segment和mirror)
segment 存放数据
mirror是segment的镜像
#################每个节点执行挂载
mkfs.xfs /dev/sda
mkfs.xfs /dev/sdb
mkfs.xfs /dev/sdc
mkfs.xfs /dev/sdd
mkfs.xfs /dev/sde
mkfs.xfs /dev/sdf
#自动挂载
vim /etc/fstab
/dev/sda /data1 xfs defaults 0 0
/dev/sdb /data2 xfs defaults 0 0
/dev/sdc /data3 xfs defaults 0 0
/dev/sdd /data4 xfs defaults 0 0
/dev/sde /data5 xfs defaults 0 0
/dev/sdf /data6 xfs defaults 0 0
mount -a
##################
mkdir -p /data{1..6}/primary/s{1..6}
mkdir -p /data{1..6}/mirror/s{1..6}
mkdir /data1/master
chown -R gpadmin:gpadmin /data{1..6}
#########################3个盘 3个segment
sudo mkfs.xfs /dev/sdb
sudo vim /etc/fstab
/dev/sdb /data1 xfs defaults 0 0
sudo mkdir /data1 && sudo mount -a
sudo mkdir -p /data1/primary/s1 ###创建segment数据目录
sudo mkdir -p /data1/mirror/s1 ###创建mirror数据目录
sudo mkdir /data1/master
sudo chown -R gpadmin:gpadmin /data1
2.7 初始化greenplum 集群
cd /usr/local/gpdb/docs/cli_help/gpconfigs
cp gpinitsystem_config /home/gpadmin/conf/gp_conf
vim gp_conf
ARRAY_NAME=”Greenplum Data Platform”
SEG_PREFIX=gpseg
PORT_BASE=6000
ARRAY_NAME=”gp_sydb”
declare -a DATA_DIRECTORY=(/data1/primary/s1 /data1/primary/s2 ) ######每个目录就是一个segment 集群的segment总数是 当前节点segment目录数量*mpp集群节点数量
MASTER_HOSTNAME=mpp01
MASTER_DIRECTORY=/data1/master
MASTER_DATA_DIRECTORY=/data1/master/gpseg-1
DATABASE_NAME=gp_sydb
#添加seg_hosts
MACHINE_LIST_FILE=/home/gpadmin/conf/seg_hosts
MASTER_PORT=5432
TRUSTED_SHELL=ssh
CHECK_POINT_SEGMENTS=8
ENCODING=UNICODE
#mirror config
MIRROR_PORT_BASE=43000
REPLICATION_PORT_BASE=34000
MIRROR_REPLICATION_PORT_BASE=44000
declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror/s1 /data1/mirror/s2 ) ######每个目录就是一个mirror 集群的mirror总数是 当前节点mirror目录数量*mpp集群节点数量
#####确认mpp节点主机名
cat /home/gpadmin/conf/seg_hosts
mpp01
mpp02
mpp03
#####初始化greenplum
每个节点执行
mkdir -p /data1/primary/s1 /data1/primary/s2 && mkdir -p /data1/mirror/s1 /data1/mirror/s2
########declare -a DATA_DIRECTORY=(/data1/primary/s1 /data1/primary/s2 ) ######每个目录就是一个segment 集群的segment总数是 当前节点segment目录数量*mpp集群节点数量
########declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror/s1 /data1/mirror/s2 ) ######每个目录就是一个mirror 集群的mirror总数是 当前节点mirror目录数量*mpp集群节点数量
rm -rf /data1/master/gpseg-1
rm -rf /data1/primary/s{1..4}/*
初始化之前:
#在各个节点执行,用来检查gpdb是否正确安装,自检相关的配置文件
gpssh-exkeys -f seg_hosts master到各个节点通信(测试一下免密登陆)
gpcheck –local (检查每个节点的状态)
gpinitsystem -c gp_conf -h seg_hosts
#遇到的报错
1.selecting default max_connections … /usr/local/gpdb/bin/postgres: error while loading shared libraries: libgpopt.so.3: cannot open shared object file: No such file or directory
no data was returned by command “”/usr/local/gpdb/bin/postgres” -V”
The program “postgres” is needed by initdb but was either not found in the same directory as “/usr/local/gpdb/bin/initdb” or failed unexpectedly.
Check your installation; “postgres -V” may have more information.
每个节点重新安装 插件gporca gp-xercesc 重新加载一下库文件 sudo ldconfig
#添加配置mirror
2.mirror error
/usr/local/gpdb/bin/gpinitsystem: line 1000: ((: MIRROR_REPLICATION_PORT_OFFSET=-: syntax error: operand expected (error token is “-“)
20190517:17:31:29:006271 gpinitsystem:mpp01:gpadmin-[INFO]:-Building group mirror array type , please wait…
./usr/local/gpdb/bin/lib/gp_bash_functions.sh: line 585: 0+: syntax error: operand expected (error token is “+”)
20190517:17:31:29:006271 gpinitsystem:mpp01:gpadmin-[INFO]:-Building the Master instance database, please wait…
MIRROR_PORT_BASE=43000
REPLICATION_PORT_BASE=34000
MIRROR_REPLICATION_PORT_BASE=44000
#####################修改添加所有的磁盘做seg和mirror segment和mirror的数量要对应,如果不对应会导致初始化的时候中断
mkdir
/data1/primary /data1/primary /data1/primary /data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary /data2/primary /data2/primary /data2/primary /data3/primary /data3/primary /data3/primary /data3/primary /data3/primary /data3/primary /data4/primary /data4/primary /data4/primary /data4/primary /data4/primary /data4/primary /data5/primary /data5/primary /data5/primary /data5/primary /data5/primary /data5/primary /data6/primary /data6/primary /data6/primary /data6/primary /data6/primary /data6/primary
/data1/mirror /data1/mirror /data1/mirror /data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror /data2/mirror /dat
a2/mirror /data2/mirror /data2/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror
如果初始化失败需要删除mirror和primary里面所有的数据
rm -rf /data{1..6}/primary/s{1..6}/*
rm -rf /data{1..6}/mirror/s{1..6}/*
删除/data1/master/下所有的数据
rm -rf /data1/master/gpseg-1
########检查是否存在漏删的文件:如果存在也会导致初始化报错,一次初始化检测,检查节点的时间比较久,请大家慎重check!
ls -alt /data{1..6}/mirror/s{1..6} && ls -alt /data{1..6}/primary/s{1..6}/
ls -alt /data1/master/
#然后继续初始化greenplum
gpinitsystem -c gp_conf -h seg_hosts
greenplum 常用的命令说明:
参考文档:https://www.cnblogs.com/pl-boke/p/9852383.html
########################
添加standby
#gpinitstandby -r -a 删除故障的standby节点
gpinitstandby -r -a mpp02
##########
20190523:11:00:55:010715 gpstate:mpp01:gpadmin-[WARNING]:-Standby PID = 0 <<<<<<<<
20190523:11:00:55:010715 gpstate:mpp01:gpadmin-[WARNING]:-Standby status = Standby process not running <<<<<<<<
#gpinitstandby -a -s 添加standby的节点
gpinitstandby -a -s mpp02
#添加之前在mpp02 上准备/data1/master/gpseg-1,添加standby master会拷贝master节点的配置到standby master
#查看集群状态gpstate -f
gpadmin@mpp01:~$ gpstate -f
20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-Starting gpstate with args: -f
20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-local Greenplum Version: ‘postgres (Greenplum Database) 6.0.0-beta.1 build dev’
20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-master Greenplum Version: ‘PostgreSQL 9.4.20 (Greenplum Database 6.0.0-beta.1 build dev) on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 7.4.0-1ubuntu1~18.04) 7.4.0, 64-bit compiled on May 21 2019 14:00:08’
20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-Obtaining Segment details from master…
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:-Standby master details
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:———————–
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby address = mpp02
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby data directory = /data1/master/gpseg-1
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby port = 5432
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby PID = 23755
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby status = Standby host passive
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:————————————————————–
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–pg_stat_replication
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:————————————————————–
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–WAL Sender State: streaming
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–Sync state: sync
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–Sent Location: 0/140000A0
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–Flush Location: 0/140000A0
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:–Replay Location: 0/140000A0
20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:————————————————————–
###############################
模拟故障切换
gpstop -a -m 停掉master的postgres
让mpp02变成主的master
gpadmin@mpp02:/data1/master/gpseg-1$ gpactivatestandby -a -d /data1/master/gpseg-1
sh: 0: getcwd() failed: No such file or directory
20190523:11:27:44:025254 gpactivatestandby:mpp02:gpadmin-[CRITICAL]:-PGPORT environment variable not set.
solution: check /home/gpadmin/.profile MASTER_DATA_DIRECTORY=/data1/master/gpseg-1/
gpactivatestandby -a -d /data1/master/gpseg-1
#主从切换后需要做数据恢复
TIPS:如果集群出现过primary和Mirror节点的切换,则最好再执行下下面的命令:
gprecoverseg -r 执行这步操作的原因:如果主节点down了,mirror节点接管后,会造成部分节点负担过重
gpadmin@mpp02:~$ gprecoverseg -r
20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-Starting gprecoverseg with args:
20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-local Greenplum Version: ‘postgres (Greenplum Database) 6.0.0-beta.1 build dev’
20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-master Greenplum Version: ‘PostgreSQL 9.4.20 (Greenplum Database 6.0.0-beta.1 build dev) on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 7.4.0-1ubuntu1~18.04) 7.4.0, 64-bit compiled on May 21 2019 14:00:08’
20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-Obtaining Segment details from master…
LOG: failed to acquire resources on one or more segments
DETAIL: FATAL: no pg_hba.conf entry for host “192.168.22.122”, user “gpadmin”, database “template1”
(seg18 192.168.22.122:43018)
LOG: query plan with multiple segworker groups is not supported
HINT: likely caused by a function that reads or modifies data in a distributed table
20190523:13:37:17:010532 gprecoverseg:mpp02:gpadmin-[CRITICAL]:-gprecoverseg failed. (Reason=’FATAL: DTM initialization: failure during startup recovery, retry failed, check segment status (cdbtm.c:1328)
‘) exiting…
修改/data1/master/gpseg-1/pg_hba.conf 添加访问ip到pg_hba.conf
host all gpadmin 172.16.1.114/32 trust
host all gpadmin 172.16.1.115/32 trust
host all gpadmin 172.16.1.116/32 trust
host all gpadmin 192.168.22.111/32 trust
host all gpadmin 192.168.22.122/32 trust
host all gpadmin 192.168.22.133/32 trust
#主备切换之后
Total primary segment failures (at master)
[WARNING]:-Total primary segment failures (at master)
[WARNING]:-Total number mirror segments acting as primary segments
#恢复segment mirror
gprecoverseg
#### gpstate -m
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Recovery 72 of 72
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:———————————————————-
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Synchronization mode = Incremental
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance host = mpp03
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance address = mpp03
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance directory = /data6/primary/s6/gpseg107
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance port = 6035
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance host = mpp01
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance address = mpp01
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance directory = /data6/mirror/s6/gpseg107
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance port = 43035
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Target = in-place
20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:———————————————————-
Continue with segment recovery procedure Yy|Nn (default=N): 输入Y
修复集群
gprecoverseg -a
20190523:13:53:49:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Ensuring that shared memory is cleaned up for stopped segments
20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Updating configuration with new mirrors
20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Updating mirrors
20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Running pg_rewind on required mirrors
检查集群修复的状态
gpstate -m
########
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data5/mirror/s6/gpseg65 43029 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s1/gpseg66 43030 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s2/gpseg67 43031 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s3/gpseg68 43032 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s4/gpseg69 43033 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s5/gpseg70 43034 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s6/gpseg71 43035 Passive Synchronized
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s1/gpseg72 43000 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s2/gpseg73 43001 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s3/gpseg74 43002 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s4/gpseg75 43003 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s5/gpseg76 43004 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s6/gpseg77 43005 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data2/mirror/s1/gpseg78 43006 Acting as Primary Not In Sync
20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data2/mirror/s2/gpseg79 43007 Acting as Primary Not In Sync
#####
修复成功
20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-For segments updated successfully, streaming will continue in the background.
20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Use gpstate -s to check the streaming progress.
20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-******************************************************************
修复primary segment 和 mirror segments
gpstate -m 查看greenplum的状态
gpstop -a -M fast 关闭整个greenplum 集群
以restricted方式启动数据库
gpstart -a -R