1、环境介绍

涉及到软件下载地址:https://pan.baidu.com/s/1hpcXUSJe85EsU9ara48MsQ

服务器:CentOS 6.8 其中:2 台 namenode、3 台 datanode

zookeeper集群地址:192.168.67.11:2181,192.168.67.12:2181

JDK:jdk-8u191-linux-x64.tar.gz

hadoop:hadoop-3.1.1.tar.gz

节点信息:

节点 IP namenode datanode resourcemanager journalnode
namenode1 192.168.67.101  
namenode2 192.168.67.102  
datanode1 192.168.67.103    
datanode2 192.168.67.104    
datanode3 192.168.67.105    

2、配置ssh免密登陆

2.1 在每台机器上执行 ssh-keygen -t rsa

2.2 vim ~/.ssh/id_rsa.pub 将所有机器上的公钥内容汇总到 authorized_keys 文件并分发到每台机器上。

2.3 授权 chmod 600 ~/.ssh/authorized_keys

3、配置hosts: 

  1. vim /etc/hosts
  2.  
  3. #增加如下配置
  4. 192.168.67.101 namenode1
  5. 192.168.67.102 namenode2
  6. 192.168.67.103 datanode1
  7. 192.168.67.104 datanode2
  8. 192.168.67.105 datanode3
  1. #将hosts文件分发至其他机器
  2. scp -r /etc/hosts namenode2:/etc/hosts
  3. scp -r /etc/hosts datanode1:/etc/hosts
  4. scp -r /etc/hosts datanode2:/etc/hosts
  5. scp -r /etc/hosts datanode3:/etc/hosts

4、关闭防火墙

  1. service iptables stop
  2. chkconfig iptables off

5、安装JDK

  1. tar -zxvf /usr/local/soft/jdk-8u191-linux-x64.tar.gz -C /usr/local/
  2.  
  3. vim /etc/profile
  4.  
  5. #增加JDK环境变量内容
  6. export JAVA_HOME=/usr/local/jdk1.8.0_191
  7. export JRE_HOME=${JAVA_HOME}/jre
  8. export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
  9. export PATH=${JAVA_HOME}/bin:$PATH
  1. 使环境变量生效:source /etc/profile

 6、安装hadoop

  1. tar -zxvf /usr/local/soft/hadoop-3.1.1.tar.gz -C /usr/local/
  2. vim /etc/profile
  3.  
  4. #增加hadoop环境变量内容
  5. export HADOOP_HOME=/usr/local/hadoop-3.1.1
  6. export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/lib
  1. 使环境变量生效:source /etc/profile
  1. #修改 start-dfs.sh 和 stop-dfs.sh 两个文件,增加配置
  2. vim /usr/local/hadoop-3.1.1/sbin/start-dfs.sh
  3. vim /usr/local/hadoop-3.1.1/sbin/stop-dfs.sh
  4.  
  5. #增加启动用户
  6. HDFS_DATANODE_USER=root
  7. HDFS_DATANODE_SECURE_USER=root
  8. HDFS_NAMENODE_USER=root
  9. HDFS_SECONDARYNAMENODE_USER=root
  10. HDFS_JOURNALNODE_USER=root
  11. HDFS_ZKFC_USER=root
 
  1. #修改 start-yarn.sh 和 stop-yarn.sh 两个文件,增加配置
  2. vim /usr/local/hadoop-3.1.1/sbin/start-yarn.sh
  3. vim /usr/local/hadoop-3.1.1/sbin/stop-yarn.sh
  4.  
  5. #增加启动用户
  6. YARN_RESOURCEMANAGER_USER=root
  7. HDFS_DATANODE_SECURE_USER=root
  8. YARN_NODEMANAGER_USER=root
  1. vim /usr/local/hadoop-3.1.1/etc/hadoop/hadoop-env.sh
  2.  
  3. #增加内容
  4. export JAVA_HOME=/usr/local/jdk1.8.0_191
  5. export HADOOP_HOME=/usr/local/hadoop-3.1.1
  1. #修改 workers 文件内容
  2. vim /usr/local/hadoop-3.1.1/etc/hadoop/workers

    #替换内容为
  3. datanode1
  4. datanode2
  5. datanode3
 
  1. vim /usr/local/hadoop-3.1.1/etc/hadoop/core-site.xml
  2.  
  3. #修改为如下配置
  4. <configuration>
  5. <!-- 指定hdfsnameservicenameservice -->
  6.     <property>
  7.         <name>fs.defaultFS</name>
  8.         <value>hdfs://mycluster/</value>
  9.     </property>
  10.  
  11.     <!-- 指定hadoop临时目录 -->
  12.     <property>
  13.         <name>hadoop.tmp.dir</name>
  14.         <value>file:/usr/local/hadoop-3.1.1/hdfs/temp</value>
  15.     </property>
  16.  
  17.     <!-- 指定zookeeper地址 -->
  18.     <property>
  19.         <name>ha.zookeeper.quorum</name>
  20.         <value>192.168.67.1:2181</value>
  21.     </property>
  22. </configuration>
 
  1. vim /usr/local/hadoop-3.1.1/etc/hadoop/hdfs-site.xml
  2.  
  3. #修改为如下配置
  4. <configuration>
  5. <property>
  6. <name>dfs.namenode.name.dir</name>
  7. <value>file:/usr/local/hadoop-3.1.1/hdfs/name</value>
  8. </property>
  9. <property>
  10. <name>dfs.datanode.data.dir</name>
  11. <value>file:/usr/local/hadoop-3.1.1/hdfs/data</value>
  12. </property>
  13. <property>
  14. <name>dfs.nameservices</name>
  15. <value>mycluster</value>
  16. </property>
  17. <property>
  18. <name>dfs.ha.namenodes.mycluster</name>
  19. <value>nn1,nn2</value>
  20. </property>
  21. <property>
  22. <name>dfs.namenode.rpc-address.mycluster.nn1</name>
  23. <value>namenode1:9000</value>
  24. </property>
  25. <property>
  26. <name>dfs.namenode.rpc-address.mycluster.nn2</name>
  27. <value>namenode2:9000</value>
  28. </property>
  29. <property>
  30. <name>dfs.namenode.http-address.mycluster.nn1</name>
  31. <value>namenode1:50070</value>
  32. </property>
  33. <property>
  34. <name>dfs.namenode.http-address.mycluster.nn2</name>
  35. <value>namenode2:50070</value>
  36. </property>
  37. <!--HA故障切换 -->
  38. <property>
  39. <name>dfs.ha.automatic-failover.enabled</name>
  40. <value>true</value>
  41. </property>
  42. <!-- journalnode 配置 -->
  43. <property>
  44. <name>dfs.namenode.shared.edits.dir</name>
  45. <value>qjournal://namenode1:8485;namenode2:8485;datanode1:8485;datanode2:8485;datanode3:8485/mycluster</value>
  46. </property>
  47. <property>
  48. <name>dfs.client.failover.proxy.provider.mycluster</name>
  49. <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  50. </property>
  51. <!--发生failover时,Standby的节点要执行一系列方法把原来那个Active节点中不健康的NameNode服务给杀掉,
  52. 这个叫做fence过程。sshfence会通过ssh远程调用fuser命令去找到Active节点的NameNode服务并杀死它-->
  53. <property>
  54. <name>dfs.ha.fencing.methods</name>
  55. <value>shell(/bin/true)</value>
  56. </property>
  57. <!--SSH私钥 -->
  58. <property>
  59. <name>dfs.ha.fencing.ssh.private-key-files</name>
  60. <value>/root/.ssh/id_rsa</value>
  61. </property>
  62. <!--SSH超时时间 -->
  63. <property>
  64. <name>dfs.ha.fencing.ssh.connect-timeout</name>
  65. <value>30000</value>
  66. </property>
  67. <!--Journal Node文件存储地址 -->
  68. <property>
  69. <name>dfs.journalnode.edits.dir</name>
  70. <value>/usr/local/hadoop-3.1.1/hdfs/journaldata</value>
  71. </property>
  72. <property>
  73. <name>dfs.qjournal.write-txns.timeout.ms</name>
  74. <value>60000</value>
  75. </property>
  76. </configuration>
  1. vim /usr/local/hadoop-3.1.1/etc/hadoop/mapred-site.xml
  2.  
  3. #修改为如下配置
  4. <configuration>
  5.     <!-- 指定mr框架为yarn方式 -->
  6.     <property>
  7.         <name>mapreduce.framework.name</name>
  8.         <value>yarn</value>
  9.     </property>
  10. </configuration>
  1. vim /usr/local/hadoop-3.1.1/etc/hadoop/yarn-site.xml
  2.  
  3. #修改为如下配置
  4. <configuration>
  5. <!-- Site specific YARN configuration properties -->
  6. <!-- 开启RM高可用 -->
  7.     <property>
  8.         <name>yarn.resourcemanager.ha.enabled</name>
  9.         <value>true</value>
  10.     </property>
  11.  
  12.     <!-- 指定RMcluster id -->
  13.     <property>
  14.         <name>yarn.resourcemanager.cluster-id</name>
  15.         <value>yrc</value>
  16.     </property>
  17.  
  18.     <!-- 指定RM的名字 -->
  19.     <property>
  20.         <name>yarn.resourcemanager.ha.rm-ids</name>
  21.         <value>rm1,rm2</value>
  22.     </property>
  23.  
  24.     <!-- 分别指定RM的地址 -->
  25.     <property>
  26.         <name>yarn.resourcemanager.hostname.rm1</name>
  27.         <value>namenode1</value>
  28.     </property>
  29.  
  30.     <property>
  31.         <name>yarn.resourcemanager.hostname.rm2</name>
  32.         <value>namenode2</value>
  33.     </property>
  34.  
  35.     <!-- 指定zk集群地址 -->
  36.     <property>
  37.         <name>yarn.resourcemanager.zk-address</name>
  38.         <value>192.168.67.11:2181,192.168.67.12:2181</value>
  39.     </property>
  40.  
  41.     <property>
  42.         <name>yarn.nodemanager.aux-services</name>
  43.         <value>mapreduce_shuffle</value>
  44.     </property>
  45. </configuration>
  1. #将这些修改的文件分发至其他4台服务器中
  2. /usr/local/hadoop-3.1.1/sbin/start-dfs.sh
  3. /usr/local/hadoop-3.1.1/sbin/stop-dfs.sh
  4. /usr/local/hadoop-3.1.1/sbin/start-yarn.sh
  5. /usr/local/hadoop-3.1.1/sbin/stop-yarn.sh
  6. /usr/local/hadoop-3.1.1/etc/hadoop/hadoop-env.sh
  7. /usr/local/hadoop-3.1.1/etc/hadoop/workers
  8. /usr/local/hadoop-3.1.1/etc/hadoop/core-site.xml
  9. /usr/local/hadoop-3.1.1/etc/hadoop/hdfs-site.xml
  10. /usr/local/hadoop-3.1.1/etc/hadoop/mapred-site.xml
  11. /usr/local/hadoop-3.1.1/etc/hadoop/yarn-site.xml
 
  1. 首次启动顺序
  2. 1、确保配置的zookeeper服务器已经运行
  3. 2、在所有journalnode机器上启动:hdfs --daemon start journalnode
  4. 3namenode1中执行格式化zkfchdfs zkfc -formatZK
  5. 4namenode1中格式化主节点:hdfs namenode -format
  6. 5、启动namenode1中的主节点:hdfs --daemon start namenode
  7. 6namenode2副节点同步主节点格式化:hdfs namenode -bootstrapStandby
  8. 7、启动集群:start-all.sh
 

7、验证

7.1 访问地址:

http://192.168.67.101/50070/

http://192.168.67.102/50070/

http://192.168.67.101/8088/

http://192.168.67.102/8088/

7.2 关闭 namenode 为 active 对应的服务器,观察另一台 namenode 状态是否由 standby 变更为 active

 

版权声明:本文为liuys635原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/liuys635/p/11341523.html