一文解决伪分布式hadoop集群搭建
python爬虫人工智能大数据
共 3356字,需浏览 7分钟
·
2020-08-19 22:48
◆ ◆ ◆ ◆ ◆
1.关闭防火墙及开机自启动
/*普通用户:切换为root用户*/
$ su - root
/*root用户:关闭防火墙及开机自启动*/
# systemctl stop firewalld.service
# systemctl disable firewalld.service
# vim /etc/hostname
master
# vim /etc/hosts
192.168.XXX.XXX master
# su - yan
$ cd
$ ssh-keygen -t rsa
$ ssh master
$ ssh-copy-id master
$ ssh master --不需要再输入密码即表示免密登录配置成功
/*普通用户:切换到家目录,创建文件目录*/
cd
mkdir hadoop
mkdir hadoopdata
cd hadoop
/*需要上传的介质*/
hadoop-2.7.7.tar.gz
jdk-8u144-linux-x64.tar.gz
tar -zxvf hadoop-2.7.7.tar.gz
tar -zxvf jdk-8u144-linux-x64.tar.gz
ll
total 394764
drwxr-xr-x. 10 yan yan 161 Feb 12 16:36 hadoop-2.7.7
-rw-r--r--. 1 yan yan 218720521 Dec 17 2018 hadoop-2.7.7.tar.gz
drwxr-xr-x. 8 yan yan 255 Jul 22 2017 jdk1.8.0_144
-rw-r--r--. 1 yan yan 185515842 Oct 17 2017 jdk-8u144-linux-x64.tar.gz
cd
vim .bash_profile
export JAVA_HOME=/home/yan/hadoop/jdk1.8.0_144
export HADOOP_HOME=/home/yan/hadoop/hadoop-2.7.7
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$PATH
source .bash_profile
/*查看是否生效*/
java -version
java version "1.8.0_144"
Java(TM) SE Runtime Environment (build 1.8.0_144-b01)
Java HotSpot(TM) 64-Bit Server VM (build 25.144-b01, mixed mode)
$ hadoop version
Hadoop 2.7.7
Subversion Unknown -r c1aad84bd27cd79c3d1a7dd58202a8c3ee1ed3ac
Compiled by stevel on 2018-07-18T22:47Z
Compiled with protoc 2.5.0
From source with checksum 792e15d20b12c74bd6f19a1fb886490
This command was run using /home/yan/hadoop/hadoop-2.7.7/share/hadoop/common/hadoop-common-2.7.7.jar
/*切换到hadoop配置文件目录*/
cd /home/yan/hadoop/hadoop-2.7.7/etc/hadoop
$ vim hadoop-env.sh
export JAVA_HOME=/home/yan/hadoop/jdk1.8.0_144
$ vim mapred-env.sh
export JAVA_HOME=/home/yan/hadoop/jdk1.8.0_144
$ vim yarn-env.sh
export JAVA_HOME=/home/yan/hadoop/jdk1.8.0_144
$ vim core-site.xml
<configuration>
<property>
<name>fs.defaultFSname>
<value>hdfs://master:9000value>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/home/yan/hadoopdatavalue>
property>
configuration>
$ vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replicationname>
<value>1value>
property>
configuration>
$ vim yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.resourcemanager.address
master:18040
yarn.resourcemanager.scheduler.address
master:18030
yarn.resourcemanager.resource-tracker.address
master:18025
/*创建一个副本*/
$ cp mapred-site.xml.template mapred-site.xml
$ vim mapred-site.xml
mapreduce.framework.name
yarn
$ vim slaves
master
$ cd
/*格式化文件系统*/
$ hdfs namenode -format
/*启动集群*/
$ start-all.sh
方法一:jps查看进程(出现6个,缺一不可)
jps
9713 DataNode
10071 ResourceManager
10505 Jps
9915 SecondaryNameNode
9596 NameNode
10175 NodeManager
方法二:web端查看(观察界面是否出现)
http://master:50070/
http://master:8088/
cd /home/yan/hadoop/hadoop-2.7.7/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-2.7.7.jar pi 5 5
cd
vim word.txt
Hello Yan
Wuhan Win
I love U
test hadoop fs -mkdir /
test hadoop fs -put word.txt /
cd /home/yan/hadoop/hadoop-2.7.7/share/hadoop/mapreduce
test/word.txt /output hadoop jar hadoop-mapreduce-examples-2.7.7.jar wordcount /
hadoop fs -cat /output/part-r-00000
———— 下次见 ————
python爬虫人工智能大数据公众号
评论