Hadoop 高可用
NameNode 高可用
- 都能实现热备
- 都是一个active NN 和一个 standby NN
- 都使用Zookeeper 和 ZKFC 来实现自劢失效恢复
- 失效切换都使用 fencing 配置的方法来 active NN
- NFS 数据数据共享变更方案把数据存储在共享存储里面,我们还需要考虑 NFS 的高可用设计
- QJM 不需要共享存储,但需要让每一个 DN 都知道两个 NN 的位置,并把块信息和心跳包 发送给active和 standby这两个 NN
vim /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration> <property>
<!-- 指定hdfs的nameservice为nsdcluster --> <name>fs.defaultFS</name> <value>hdfs://nsdcluster</value> </property>
<property>
<!--指定hadoop数据临时存放目录--> <name>hadoop.tmp.dir</name> <value>/var/hadoop</value> </property>
<property>
<!--指定zookeeper地址--> <name>ha.zookeeper.quorum</name> <value>node1:2181,node2:2181,node3:2181</value> </property> <property> <name>hadoop.proxyuser.nsd1804.groups</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.nsd1804.hosts</name> <value>*</value> </property> </configuration>
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
<configuration> <property> <name>dfs.replication</name> <value>2</value> </property> <property> <!--指定hdfs的nameservice为nsdcluster,需要和core-site.xml中的保持一致 --> <name>dfs.nameservices</name> <value>nsdcluster</value> </property> <property> <!-- nsdcluster下面有两个NameNode,分别是nn1,nn2 --> <name>dfs.ha.namenodes.nsdcluster</name> <value>nn1,nn2</value> </property> <property> <!-- nn1的RPC通信地址 --> <name>dfs.namenode.rpc-address.nsdcluster.nn1</name> <value>nn01:8020</value> </property> <property> <!-- nn2的RPC通信地址 --> <name>dfs.namenode.rpc-address.nsdcluster.nn2</name> <value>nn02:8020</value> </property> <property> <!-- nn1的http通信地址 --> <name>dfs.namenode.http-address.nsdcluster.nn1</name> <value>nn01:50070</value> </property> <property> <!-- nn1的http通信地址 --> <name>dfs.namenode.http-address.nsdcluster.nn2</name> <value>nn02:50070</value> </property> <property> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://node1:8485;node2:8485;node3:8485/nsdcluster</value> </property> <property> <!-- 指定JournalNode在本地磁盘存放数据的位置 --> <name>dfs.journalnode.edits.dir</name> <value>/var/hadoop/journal</value> </property> <property> <!-- 配置失败自动切换实现方式 --> <name>dfs.client.failover.proxy.provider.nsdcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <!-- 配置隔离机制,如果ssh是默认22端口,value直接写sshfence即可 --> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <property> <!-- 使用隔离机制时需要ssh免登陆 --> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <!-- 开启NameNode故障时自动切换 --> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <!-- 在NN和DN上开启WebHDFS (REST API)功能,不是必须 --> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> </configuration>
vim /hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<!-- 指定nodemanager启动时加载server的方式为shuffle server -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<!-- 指定resourcemanager地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>nn01</value>
</property>
<!-- 指定resourcemanager地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>nn02</value>
</property>
</configuration>
#—————————————————–#
初始化启动集群
ALL: 所有机器
nodeX: node1 node2 node3
NN1: nn01
NN2: nn02
#—————————————————–#
ALL: 同步配置文件到所有集群机器
把上面所有修改的文件同步到其他集群服务器上
NN1: 初始化ZK集群 ./bin/hdfs zkfc -formatZK
nodeX: 启动 journalnode 服务
cd /usr/local/hadoop/
./sbin/hadoop-daemon.sh start journalnode
NN1: 格式化
./bin/hdfs namenode -format
NN2: 数据同步到本地 /var/hadoop/dfs
把NN1上 /var/hadoop/dfs 同步到 NN2上
./bin/hdfs namenode -initializeSharedEdits
./sbin/hadoop-daemon.sh stop journalnode
./sbin/start-all.sh
./sbin/yarn-daemon.sh start resourcemanager
./bin/hdfs haadmin -getServiceState nn1
./bin/hdfs haadmin -getServiceState nn2
./bin/yarn rmadmin -getServiceState rm1
./bin/yarn rmadmin -getServiceState rm2
./bin/hdfs dfsadmin -report
./bin/yarn node -list
./bin/hadoop fs -ls /
./bin/hadoop fs -mkdir hdfs://nsdcluster/input
./sbin/hadoop-daemon.sh stop namenode
./sbin/yarn-daemon.sh stop resourcemanager
./sbin/hadoop-daemon.sh start namenode
./sbin/yarn-daemon.sh start resourcemanager