1.单机版
dhcp 的源地址是 0.0.0.0,目标地址是255.255.255.255,
端口是UDP67和UDP68,一个发送,一个接收。客户端向68端口(bootps)
广播请求配置,服务器向67端口(bootpc)广播回应请求。
默认的格式化位置: /tmp/hadoop-hadoop
SecondaryNameNode 相当于日志
bin/hdfs dfs -ls #查看文件系统
bin/hdfs dfs - put input #上传文件系统,上传的是分布式文件系统,在网页上可以看见
[root@server1 ~]# useradd -u 800 hadoop
[root@server1 ~]# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: it is based on a dictionary word
BAD PASSWORD: is too simple
Retype new password:
passwd: all authentication tokens updated successfully.
[root@server1 ~]# su - hadoop
[hadoop@server1 ~]$ ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ tar zxf hadoop-2.7.3.tar.gz
[hadoop@server1 ~]$ tar zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ ln -s jdk1.7.0_79/ java
[hadoop@server1 ~]$ ls
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
hadoop-2.7.3.tar.gz jdk1.7.0_79
[hadoop@server1 ~]$ cd hadoop-2.7.3
[hadoop@server1 hadoop-2.7.3]$ ls
bin include libexec NOTICE.txt sbin
etc lib LICENSE.txt README.txt share
[hadoop@server1 hadoop-2.7.3]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim hadoop-env.sh
25 export JAVA_HOME=/home/hadoop/java
[hadoop@server1 hadoop]$ cd
[hadoop@server1 ~]$ vim .bash_profile
[hadoop@server1 ~]$ source .bash_profile
[hadoop@server1 ~]$ jps
1774 Jps
[hadoop@server1 ~]$ cd hadoop-2.7.3
[hadoop@server1 hadoop-2.7.3]$ mkdir input
[hadoop@server1 hadoop-2.7.3]$ cp etc/hadoop/*.xml input
[hadoop@server1 hadoop-2.7.3]$ ls input/
capacity-scheduler.xml hdfs-site.xml kms-site.xml
core-site.xml httpfs-site.xml yarn-site.xml
hadoop-policy.xml kms-acls.xml
[hadoop@server1 hadoop-2.7.3]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
[hadoop@server1 hadoop-2.7.3]$ ls
bin include lib LICENSE.txt output sbin
etc input libexec NOTICE.txt README.txt share
[hadoop@server1 hadoop-2.7.3]$ ls output/
part-r-00000 _SUCCESS
[hadoop@server1 hadoop-2.7.3]$ cd output/
[hadoop@server1 output]$ ls
part-r-00000 _SUCCESS
[hadoop@server1 output]$ cat part-r-00000
1 dfsadmin
[hadoop@server1 output]$ cd
[hadoop@server1 ~]$ ln -s hadoop-2.7.3 hadoop
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim core-site.xml
[hadoop@server1 hadoop]$ vim hdfs-site.xml
[hadoop@server1 hadoop]$ cat slaves
localhost
[hadoop@server1 hadoop]$ vim slaves
[hadoop@server1 hadoop]$ cat slaves
172.25.44.1
[hadoop@server1 hadoop]$ ssh-keygen #建立免密连接
[hadoop@server1 hadoop]$ cd
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ ls
bin include lib LICENSE.txt output sbin
etc input libexec NOTICE.txt README.txt share
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ cd
[hadoop@server1 ~]$ cd .ssh/
[hadoop@server1 .ssh]$ ls
id_rsa id_rsa.pub
[hadoop@server1 .ssh]$ cp id_rsa.pub authorized_keys
[hadoop@server1 .ssh]$ ssh 172.25.44.1
[hadoop@server1 ~]$ logout
Connection to 172.25.44.1 closed.
[hadoop@server1 .ssh]$ ssh localhost
Last login: Sun Aug 26 16:12:41 2018 from server1
[hadoop@server1 ~]$ logout
Connection to localhost closed.
[hadoop@server1 .ssh]$ cd
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ cd /tmp
[hadoop@server1 tmp]$ ls
hadoop-hadoop hsperfdata_hadoop yum.log
[hadoop@server1 tmp]$ cd
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server1 hadoop]$ jps
2538 Jps
2429 SecondaryNameNode
2246 Datanode
2153 NameNode
[hadoop@server1 hadoop]$ bin/hadoop dfs -ls
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.
ls: `.': No such file or directory
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
[hadoop@server1 hadoop]$ bin/hdfs dfs -put input/
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2018-08-26 16:23 input
#2.完全分布式
[hadoop@server1 hadoop]$ sbin/stop-dfs.sh
[root@server1 ~]# rpm -q nfs-utils
nfs-utils-1.2.3-39.el6.x86_64
[root@server1 ~]# rpm -q rpcbind #如果没有此软件,就要自己下载
rpcbind-0.2.0-11.el6.x86_64
[root@server1 ~]# vim /etc/exports
1 /home/hadoop *(rw,anonuid=800,anongid=800)
[root@server1 ~]# /etc/init.d/rpcbind start
Starting rpcbind: [ OK ]
[root@server1 ~]# /etc/init.d/nfs start
[root@server1 ~]# showmount -e
[root@server2 ~]# yum install -y nfs-utils
[root@server2 ~]# /etc/init.d/rpcbind start
[root@server2 ~]# useradd -u 800 hadoop #为了保持同步,必须建立相同的用户
[root@server2 ~]# id hadoop
uid=800(hadoop) gid=800(hadoop) groups=800(hadoop)
[root@server2 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server2 ~]# ll -d /home/hadoop/
drwx------ 5 hadoop hadoop 4096 8月 26 16:07 /home/hadoop/
[root@server2 ~]# su - hadoop
[hadoop@server2 ~]$ ls
hadoop hadoop-2.7.3.tar.gz jdk1.7.0_79
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
[hadoop@server2 ~]$ jps
1328 Jps
[root@server3 ~]# yum install -y nfs-utils
[root@server3 ~]# /etc/init.d/rpcbind start
[root@server3 ~]# useradd -u 800 hadoop
[root@server3 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server3 ~]# ll -d /home/hadoop/
drwx------ 5 hadoop hadoop 4096 8月 26 16:07 /home/hadoop/
[root@server3 ~]# su - hadoop
[hadoop@server3 ~]$ ls
hadoop hadoop-2.7.3.tar.gz jdk1.7.0_79
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
[hadoop@server3 ~]$ jps
1674 Jps
[hadoop@server1 ~]$ ssh 172.25.44.2
[hadoop@server2 ~]$ logout
Connection to 172.25.44.2 closed.
[hadoop@server1 ~]$ ssh server2
[hadoop@server2 ~]$ logout
Connection to server2 closed.
[hadoop@server3 ~]$ logout
Connection to server3 closed.
[hadoop@server1 ~]$ ssh server3
[hadoop@server3 ~]$ logout
Connection to server3 closed.
[hadoop@server1 ~]$ cd hadoop/etc/hadoop/
[hadoop@server1 hadoop]$ vim hdfs-site.xml
[hadoop@server1 hadoop]$ vim slaves
172.25.44.2
172.25.44.3
[hadoop@server1 hadoop]$ cd /tmp/
[hadoop@server1 tmp]$ rm -fr *
[hadoop@server1 tmp]$ ls
[hadoop@server1 tmp]$ pwd
/tmp
[hadoop@server1 tmp]$ cd
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server1 hadoop]$ jps
3883 Jps
3774 SecondaryNameNode
3586 NameNode
[hadoop@server1 hadoop]$ ls
bin include lib LICENSE.txt NOTICE.txt README.txt share
etc input libexec logs output sbin
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -put input
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount input output
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls output
Found 2 items
-rw-r--r-- 2 hadoop supergroup 0 2018-08-26 17:14 output/_SUCCESS
-rw-r--r-- 2 hadoop supergroup 9984 2018-08-26 17:14 output/part-r-00000
[hadoop@server1 hadoop]$ rm -fr output/
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls output
Found 2 items
-rw-r--r-- 2 hadoop supergroup 0 2018-08-26 17:14 output/_SUCCESS
-rw-r--r-- 2 hadoop supergroup 9984 2018-08-26 17:14 output/part-r-00000
[hadoop@server1 hadoop]$ bin/hdfs dfs -get output
[hadoop@server1 hadoop]$ ls
bin include lib LICENSE.txt NOTICE.txt README.txt share
etc input libexec logs output sbin
#3.在线添加,去掉,节点
# 添加
[root@server4 ~]# yum install -y nfs-utils
[root@server4 ~]# /etc/init.d/rpcbind status
rpcbind is stopped
[root@server4 ~]# /etc/init.d/rpcbind start
Starting rpcbind: [ OK ]
[root@server4 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server4 ~]# su - hadoop
[hadoop@server4 ~]$ cd hadoop/etc/hadoop/
[hadoop@server4 hadoop]$ vim slaves
1 172.25.44.2
2 172.25.44.3
3 172.25.44.4
[hadoop@server4 hadoop]$ cd ..
[hadoop@server4 etc]$ cd ..
[hadoop@server4 hadoop]$ sbin/hadoop-daemon.sh start datanode
[hadoop@server4 hadoop]$ jps
1666 Datanode
1739 Jps
[hadoop@server1 hadoop]$ dd if=/dev/zero of=bigfile bs=1M count=300
300+0 records in
300+0 records out
314572800 bytes (315 MB) copied, 4.00155 s, 78.6 MB/s
[hadoop@server1 hadoop]$ bin/hdfs dfs -put bigfile
[hadoop@server1 hadoop]$ bin/hdfs dfsadmin -report
# 删除
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim slaves
1 172.25.44.2
2 172.25.44.4
[hadoop@server1 hadoop]$ vim hosts-exclude
1 172.25.44.3
[hadoop@server1 hadoop]$ vim hdfs-site.xml
[hadoop@server1 hadoop]$ ll /home/hadoop/hadoop/etc/hadoop/hosts-exclude
-rw-rw-r-- 1 hadoop hadoop 12 Aug 26 18:06 /home/hadoop/hadoop/etc/hadoop/hosts-exclude
[hadoop@server1 hadoop]$ cd ..
[hadoop@server1 etc]$ cd ..
[hadoop@server1 hadoop]$ bin/hdfs dfsadmin -refreshNodes
Refresh nodes successful
[hadoop@server1 hadoop]$ bin/hdfs dfsadmin -report
#4.高可用
环境rhel6.5
server1 master
server2 chunkserver
server3 chunkserver
server4 chunkserver
server5 master
zoo.cfg # 告诉三个通信节点的通信端口
F (follwer) 和 L (leader)
/tmp/zookeeper # 数据目录
sbin/hadoop-daemon.sh # 单独启动某个节点
[root@server1 ~]# /etc/init.d/rpcbind start
[root@server1 ~]# /etc/init.d/nfs start
Starting NFS services: [ OK ]
Starting NFS quotas: [ OK ]
Starting NFS mountd: [ OK ]
Starting NFS daemon: [ OK ]
Starting RPC idmapd: [ OK ]
[root@server1 ~]# su - hadoop
[hadoop@server1 ~]$ ls
hadoop hadoop-2.7.3.tar.gz jdk1.7.0_79
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ rm -fr /tmp/*
[root@server5 ~]# yum install -y nfs-utils
[root@server5 ~]# /etc/init.d/rpcbind start
[root@server5 ~]# useradd -u 800 hadoop
[root@server5 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server5 ~]# su - hadoop
[hadoop@server5 ~]$ ls
hadoop hadoop-2.7.3.tar.gz jdk1.7.0_79
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
# 部署三台Zookeeper集群
[root@server2 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server2 ~]# su - hadoop
[hadoop@server2 ~]$ ls
hadoop hadoop-2.7.3.tar.gz jdk1.7.0_79
hadoop-2.7.3 java jdk-7u79-linux-x64.tar.gz
[hadoop@server2 ~]$ rm -fr /tmp/* #删除之前实验数据,防止影响后面实验
[root@server3 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server3 ~]# su - hadoop
[hadoop@server3 ~]$ rm -fr /tmp/*
[root@server4 ~]# mount 172.25.44.1:/home/hadoop/ /home/hadoop/
[root@server4 ~]# su - hadoop
[hadoop@server4 ~]$ rm -fr /tmp/*
[hadoop@server2 ~]$ tar zxf zookeeper-3.4.9.tar.gz
[hadoop@server2 ~]$ cd zookeeper-3.4.9
[hadoop@server2 zookeeper-3.4.9]$ cd conf
[hadoop@server2 conf]$ ls
configuration.xsl log4j.properties zoo_sample.cfg
[hadoop@server2 conf]$ cp zoo_sample.cfg zoo.cfg
[hadoop@server2 conf]$ vim zoo.cfg #编辑 zoo.cfg 文件
30 server.1=172.25.44.2:2888:3888
31 server.2=172.25.44.3:2888:3888
32 server.3=172.25.44.4:2888:3888
[hadoop@server2 conf]$ mkdir /tmp/zookeeper
[hadoop@server2 conf]$ cd /tmp/zookeeper
[hadoop@server2 zookeeper]$ ls
[hadoop@server2 zookeeper]$ echo 1 > myid
[hadoop@server2 zookeeper]$ cd
[hadoop@server2 ~]$ cd zookeeper-3.4.9
[hadoop@server2 zookeeper-3.4.9]$ cd bin/
[hadoop@server2 bin]$ ./zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[hadoop@server2 bin]$ ./zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Mode: follower
[hadoop@server2 bin]$ pwd
/home/hadoop/zookeeper-3.4.9/bin
[hadoop@server2 bin]$ ./zkCli.sh
[zk: localhost:2181(CONNECTED) 0] ls
[zk: localhost:2181(CONNECTED) 1] ls /
[zookeeper]
[zk: localhost:2181(CONNECTED) 2] ls /zookeeper
[quota]
[zk: localhost:2181(CONNECTED) 3] get /zookeeper/quota
cZxid = 0x0
ctime = Thu Jan 01 08:00:00 CST 1970
mZxid = 0x0
mtime = Thu Jan 01 08:00:00 CST 1970
pZxid = 0x0
cversion = 0
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 0
numChildren = 0
[zk: localhost:2181(CONNECTED) 4]
[hadoop@server3 ~]$ mkdir /tmp/zookeeper
[hadoop@server3 ~]$ cd /tmp/zookeeper
[hadoop@server3 zookeeper]$ ls
[hadoop@server3 zookeeper]$ echo 2 > myid # 此数据是唯一的,此数字与配置文件中的定义保持一致,(server.2=172.25.44.3:2888:3888)其它节点依次类推
[hadoop@server3 zookeeper]$ ls
myid
[hadoop@server3 zookeeper]$ cd
[hadoop@server3 ~]$ cd zookeeper-3.4.9
[hadoop@server3 zookeeper-3.4.9]$ bin/zkServer.sh start # 启动服务
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[hadoop@server3 zookeeper-3.4.9]$ bin/zkServer.sh status #查看状态,是leader
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Mode: leader
[hadoop@server4 ~]$ mkdir /tmp/zookeeper
[hadoop@server4 ~]$ cd /tmp/zookeeper
[hadoop@server4 zookeeper]$ ls
[hadoop@server4 zookeeper]$ echo 3 > myid
[hadoop@server4 zookeeper]$ ls
myid
[hadoop@server4 zookeeper]$ cd
[hadoop@server4 ~]$ cd zookeeper-3.4.9
[hadoop@server4 zookeeper-3.4.9]$ bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[hadoop@server4 zookeeper-3.4.9]$ bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.9/bin/../conf/zoo.cfg
Mode: follower
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ ls
bigfile etc input libexec logs output sbin
bin include lib LICENSE.txt NOTICE.txt README.txt share
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim core-site.xml
[hadoop@server1 hadoop]$ vim hdfs-site.xml
19 <configuration>
20 <property>
21 <name>dfs.replication</name>
22 <value>3</value>
23 </property>
24
#指定 hdfs 的 nameservices 为 masters,和 core-site.xml 文件中的设置保持一
25 <property>
26 <name>dfs.nameservices</name>
27 <value>masters</value>
28 </property>
29
# masters 下面有两个 namenode 节点,分别是 h1 和 h2 (名称可自定义)
30 <property>
31 <name>dfs.ha.namenodes.masters</name>
32 <value>h1,h2</value>
33 </property>
34
# 指定 h1 节点的 rpc 通信地址
35 <property>
36 <name>dfs.namenode.rpc-address.masters.h1</name>
37 <value>172.25.44.1:9000</value>
38 </property>
39
# 指定 h1 节点的 http 通信地址
40 <property>
41 <name>dfs.namenode.http-address.masters.h1</name>
42 <value>172.25.44.1:50070</value>
43 </property>
44
# 指定 h2 节点的 rpc 通信地址
45 <property>
46 <name>dfs.namenode.rpc-address.masters.h2</name>
47 <value>172.25.44.5:9000</value>
48 </property>
49
# 指定 h2 节点的 http 通信地址
50 <property>
51 <name>dfs.namenode.http-address.masters.h2</name>
52 <value>172.25.44.5:50070</value>
53 </property>
54
# 指定 NameNode 元数据在 JournalNode 上的存放位置
55 <property>
56 <name>dfs.namenode.shared.edits.dir</name>
57 <value>qjournal://172.25.44.2:8485;172.25.44.3:8485;172.25.44.4:8485/masters</value>
58 </property>
59
# 指定 JournalNode 在本地磁盘存放数据的位置
60 <property>
61 <name>dfs.journalnode.edits.dir</name>
62 <value>/tmp/journaldata</value>
63 </property>
64
# 开启 NameNode 失败自动切换
65 <property>
66 <name>dfs.ha.automatic-failover.enabled</name>
67 <value>true</value>
68 </property>
69
# 配置失败自动切换实现方式
70 <property>
71 <name>dfs.client.failover.proxy.provider.masters</name>
72 <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</va lue>
73 </property>
74
# 配置隔离机制方法,每个机制占用一行
75 <property>
76 <name>dfs.ha.fencing.methods</name>
77 <value>sshfence
78 shell(/bin/true)
79 </value>
80 </property>
81
# 使用 sshfence 隔离机制时需要 ssh 免密码
82 <property>
83 <name>dfs.ha.fencing.ssh.private-key-files</name>
84 <value>/home/hadoop/.ssh/id_rsa</value>
85 </property>
86
# 配置 sshfence 隔离机制超时时间
87 <property>
88 <name>dfs.ha.fencing.ssh.connect-timeout</name>
89 <value>30000</value>
90 </property>
91
92 </configuration>
[hadoop@server1 hadoop]$ vim slaves #
1 172.25.44.2
2 172.25.44.3
3 172.25.44.4
[hadoop@server1 hadoop]$ pwd
/home/hadoop/hadoop/etc/hadoop
# 在三个 DN 上依次启动 zookeeper 集群,journalnode(第一次启动 hdfs 必须先启动 journalnode)
[hadoop@server2 hadoop]$ bin/zkServer.sh start
[hadoop@server2 hadoop]$ jps #
1820 QuorumPeerMain
2345 Jps
1603 Datanode
[hadoop@server2 hadoop]$ sbin/hadoop-daemon.sh start journalnode
[hadoop@server2 hadoop]$ jps
2615 Jps
2402 QuorumPeerMain
1603 Datanode
2551 JournalNode
[hadoop@server2 hadoop]$ jps
2224 Jps
1721 QuorumPeerMain
[hadoop@server3 hadoop]$ sbin/hadoop-daemon.sh start journalnode
[hadoop@server3 hadoop]$ jps
2399 JournalNode
2448 Jps
1721 QuorumPeerMain
[hadoop@server4 hadoop]$ bin/zkServer.sh start
[hadoop@server4 hadoop]$ jps
1823 Jps
1692 QuorumPeerMain
[hadoop@server4 hadoop]$ sbin/hadoop-daemon.sh start journalnode
[hadoop@server4 hadoop]$ jps
2166 JournalNode
1692 QuorumPeerMain
2215 Jps
[hadoop@server1 hadoop]$ cd ..
[hadoop@server1 etc]$ cd ..
[hadoop@server1 hadoop]$ ls
bigfile etc input libexec logs output sbin
bin include lib LICENSE.txt NOTICE.txt README.txt share
[hadoop@server1 hadoop]$ bin/hdfs namenode -format #格式化 HDFS 集群
[hadoop@server1 hadoop]$ scp -r /tmp/hadoop-hadoop 172.25.44.5:/tmp # Namenode 数据默认存放在/tmp,需要把数据拷贝到 h2(server5)
[hadoop@server1 hadoop]$ bin/hdfs zkfc -formatZK # 格式化 zookeeper (只需在 h1 上执行即可,注意大小写)
[hadoop@server1 hadoop]$ sbin/start-dfs.sh #启动 hdfs 集群,执行这条命令,不能输yes,如果提示让输入yes,就执行下面的操作
[hadoop@server1 hadoop]$ ssh 172.25.44.2
Last login: Sun Aug 26 17:02:50 2018 from server1
[hadoop@server2 ~]$ logout
Connection to 172.25.44.2 closed.
[hadoop@server1 hadoop]$ ssh 172.25.44.3
Last login: Sun Aug 26 17:02:58 2018 from server1
[hadoop@server3 ~]$ logout
Connection to 172.25.44.3 closed.
[hadoop@server1 hadoop]$ ssh 172.25.44.4
[hadoop@server4 ~]$ logout
Connection to 172.25.44.4 closed.
[hadoop@server1 hadoop]$ ssh 172.25.44.5
[hadoop@server5 ~]$ logout
Connection to 172.25.44.5 closed.
[hadoop@server1 hadoop]$ ssh server2
[hadoop@server2 ~]$ logout
Connection to server2 closed.
[hadoop@server1 hadoop]$ ssh server3
[hadoop@server3 ~]$ logout
Connection to server3 closed.
[hadoop@server1 hadoop]$ ssh server4
[hadoop@server4 ~]$ logout
Connection to server4 closed.
[hadoop@server1 hadoop]$ ssh server5
Last login: Tue Aug 28 11:03:56 2018 from server1
[hadoop@server5 ~]$ logout
Connection to server5 closed.
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server2 hadoop]$ cd
[hadoop@server2 ~]$ cd zookeeper-3.4.9
[hadoop@server2 zookeeper-3.4.9]$ bin/zkCli.sh
[zk: localhost:2181(CONNECTED) 2] ls /hadoop-ha
[masters]
[zk: localhost:2181(CONNECTED) 3] ls /hadoop-ha/masters
[]
[zk: localhost:2181(CONNECTED) 4] ls /hadoop-ha/masters
[ActiveBreadCrumb, ActiveStandbyElectorLock]
[zk: localhost:2181(CONNECTED) 5] ls /hadoop-ha/masters/ActiveBreadCrumb
[]
[zk: localhost:2181(CONNECTED) 6] get /hadoop-ha/masters/ActiveBreadCrumb # 查看提供服务的是server1
mastersh1server1 �F(�>
cZxid = 0x10000000b
ctime = Tue Aug 28 11:04:52 CST 2018
mZxid = 0x10000000b
mtime = Tue Aug 28 11:04:52 CST 2018
pZxid = 0x10000000b
cversion = 0
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 28
numChildren = 0
[hadoop@server5 ~]$ jps
1730 DFSZKFailoverController
1633 NameNode
1979 Jps
# h2(servver5处于备用standby状态)
# 测试故障自动切换
[hadoop@server1 hadoop]$ pwd
/home/hadoop/hadoop
[hadoop@server1 hadoop]$ jps
1566 NameNode
2222 DFSZKFailoverController
2343 Jps
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ pwd
/home/hadoop/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
[hadoop@server1 hadoop]$ bin/hdfs dfs -put etc/hadoop/ input
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2018-08-28 11:29 input
[hadoop@server1 hadoop]$ jps
1566 NameNode
2697 Jps
2222 DFSZKFailoverController
[hadoop@server1 hadoop]$ kill -9 1566
# server5提供服务
# server1挂掉了,查看不到
[hadoop@server1 hadoop]$ sbin/hadoop-daemon.sh start namenode #重新启动,状态已经改变 server1处于备用standby状态
#2.yarn 的高可用
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ cp mapred-site.xml.template mapred-site.xml
[hadoop@server1 hadoop]$ vim mapred-site.xml # 指定 yarn 为 MapReduce 的框架
19
20
21 mapreduce.framework.name
22 yarn
23
24
[hadoop@server1 hadoop]$ vim yarn-site.xml
15
# 配置可以在 nodemanager 上运行 mapreduce 程序
16
17 yarn.nodemanager.aux-services
18 mapreduce_shuffle
19
20
# 激活 RM 高可用
21
22 yarn.resourcemanager.ha.enabled
23 true
24
25
# 指定 RM 的集群 id
26
27 yarn.resourcemanager.cluster-id
28 RM_CLUSTER
29
30
# 定义 RM 的节点
31
32 yarn.resourcemanager.ha.rm-ids
33 rm1,rm2
34
35
# 指定 RM1 的地址
36
37 yarn.resourcemanager.hostname.rm1
38 172.25.44.1
39
40
# 指定 RM2 的地址
41
42 yarn.resourcemanager.hostname.rm2
43 172.25.44.5
44
45
# 激活 RM 自动恢复
46
47 yarn.resourcemanager.recovery.enabled
48 true
49
50
# 配置 RM 状态信息存储方式,有 MemStore 和 ZKStore
51
52 yarn.resourcemanager.store.class
53 org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
54
55
# 配置为 zookeeper 存储时,指定 zookeeper 集群的地址
56
57 yarn.resourcemanager.zk-address
58 172.25.44.2:2181,172.25.44.3:2181,172.25.44.4:2181
59
60
#
61
[hadoop@server1 hadoop]$ sbin/start-yarn.sh # 启动 yarn 服务
[hadoop@server1 hadoop]$ jps
4082 ResourceManager
3695 DFSZKFailoverController
4357 Jps
3833 NameNode
[hadoop@server3 hadoop]$ jps
1958 NodeManager
1201 QuorumPeerMain
1851 JournalNode
2062 Jps
1758 Datanode
[hadoop@server4 hadoop]$ jps
1217 QuorumPeerMain
1751 Datanode
2055 Jps
1844 JournalNode
1951 NodeManager
[hadoop@server5 ~]$ jps
2610 Jps
2445 DFSZKFailoverController
2346 NameNode
[hadoop@server5 ~]$ cd hadoop
[hadoop@server5 hadoop]$ sbin/yarn-daemon.sh start resourcemanager #RM2 上需要手动启动
# server2上查看
[zk: localhost:2181(CONNECTED) 10] ls
[zk: localhost:2181(CONNECTED) 11] ls /yarn-leader-election
[RM_CLUSTER]
[zk: localhost:2181(CONNECTED) 12] ls /yarn-leader-election/RM_CLUSTER
[ActiveBreadCrumb, ActiveStandbyElectorLock]
[zk: localhost:2181(CONNECTED) 13] ls /yarn-leader-election/RM_CLUSTER/ActiveBreadCrumb
[]
[zk: localhost:2181(CONNECTED) 14] get /yarn-leader-election/RM_CLUSTER/ActiveBreadCrumb
RM_CLUSTERrm1
cZxid = 0x10000001f
ctime = Tue Aug 28 12:13:33 CST 2018
mZxid = 0x10000001f
mtime = Tue Aug 28 12:13:33 CST 2018
pZxid = 0x10000001f
cversion = 0
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 17
numChildren = 0
# 测试 yarn 故障切换
[hadoop@server1 hadoop]$ jps
4082 ResourceManager
3695 DFSZKFailoverController
4357 Jps
3833 NameNode
[hadoop@server1 hadoop]$ kill -9 4082
[hadoop@server1 hadoop]$ sbin/yarn-daemon.sh start resourcemanager
3.Hbase分布式部署
资源的管理器(对资源做调度,没有他作业无法提交)
[hadoop@server1 hadoop]$ cd
[hadoop@server1 ~]$ tar zxf hbase-1.2.4-bin.tar.gz
[hadoop@server1 ~]$ cd hbase-1.2.4
[hadoop@server1 hbase-1.2.4]$ cd conf/
[hadoop@server1 conf]$ ls
hadoop-metrics2-hbase.properties hbase-policy.xml regionservers
hbase-env.cmd hbase-site.xml
hbase-env.sh log4j.properties
[hadoop@server1 conf]$ vim hbase-env.sh
27 export JAVA_HOME=/home/hadoop/java #指定 jdk
128 export HBASE_MANAGES_ZK=false #默认值时 true,hbase 在启动时自动开启 zookeeper,如需自己维护 zookeeper 集群需设置为 false
129 export HADOOP_HOME=/home/hadoop/hadoop # 指定 hadoop 目录,否则 hbase无法识别 hdfs 集群配置
[hadoop@server1 conf]$ vim hbase-site.xml
23 <configuration>
# 指定 region server 的共享目录,用来持久化 HBase。这里指定的 HDFS 地址是要跟 core-site.xml 里面的 fs.defaultFS 的 HDFS 的 IP 地址或者域名、端口必须一致。
24 <property>
25 <name>hbase.rootdir</name>
26 <value>hdfs://masters/hbase</value>
27 </property>
28
# 启用 hbase 分布式模式
29 <property>
30 <name>hbase.cluster.distributed</name>
31 <value>true</value>
32 </property>
33
# Zookeeper 集群的地址列表,用逗号分割。默认是 localhost,是给伪分布式用的,要修改才能在完全分布式的情况下使用
34 <property>
35 <name>hbase.zookeeper.quorum</name>
36 <value>172.25.44.2,172.25.44.3,172.25.44.4</value>
37 </property>
38
# 指定 hbase 的 master
39 <property>
40 <name>hbase.master</name>
41 <value>h1</value>
42 </property>
43
44 </configuration>
[hadoop@server1 conf]$ vim regionservers
1 172.25.44.2
2 172.25.44.3
3 172.25.44.4
[hadoop@server1 conf]$ cd ..
[hadoop@server1 hbase-1.2.4]$ bin/start-hbase.sh # 启动 hbase主节点运行
[hadoop@server1 hbase-1.2.4]$ jps
3182 DFSZKFailoverController
3703 Jps
3351 NameNode
3587 HMaster
# server2上查看,master存在
[zk: localhost:2181(CONNECTED) 3] get /hbase/master
�master:16000��;��PBUF
server1�}�ڶ��,�}
cZxid = 0x300000014
ctime = Tue Aug 28 13:42:14 CST 2018
mZxid = 0x300000014
mtime = Tue Aug 28 13:42:14 CST 2018
pZxid = 0x300000014
cversion = 0
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x1657f0852c60002
dataLength = 55
numChildren = 0
[hadoop@server5 ~]$ cd hbase-1.2.4
[hadoop@server5 hbase-1.2.4]$ ls
bin conf hbase-webapps lib logs README.txt
CHANGES.txt docs LEgal LICENSE.txt NOTICE.txt
[hadoop@server5 hbase-1.2.4]$ cd bin/
[hadoop@server5 bin]$ cd ..
[hadoop@server5 hbase-1.2.4]$ bin/hbase-daemon.sh start master # 备节点运行
[hadoop@server5 hbase-1.2.4]$ jps
1501 HMaster
1179 NameNode
1639 Jps
HBase Master 默认端口时 16000,还有个 web 界面默认在 Master 的 16010 端口上,
HBase RegionServers 会默认绑定 16020 端口,在端口 16030 上有一个展示信息的界面。
[hadoop@server1 hbase-1.2.4]$ bin/hbase shell
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。