一、HDFS优缺点
优点:
缺点:
二、HDFS组成结构
- NameNode
- 管理HDFS的名称空间
- 配置副本策略
- 管理数据块映射信息
- 处理客户端请求
- Datanode
- 存储实际的数据库
- 执行数据块的读/写
- Client
- Secondary NameNode
- 辅助NameNode
- 紧急情况下,可以恢复NameNode
HDFS在文件上是分块存储的,2.x版本默认块大小是128M,可以通过dfs.blocksize配置。HDFS块大小取决于磁盘传输速率
三、HDFS的Shell操作
- 启动Hadoop
sbin/start-dfs.sh
sbin/start-yarn.sh
- -help :帮助命令
hadoop fs -help rm
- -ls :显示目录信息
hadoop fs -ls /
- -mkdir :创建目录
加-p 代表创建多级目录
hadoop fs -mkdir /study
hadoop fs -mkdir -p /study/mmc
- -moveFromLocal :从本地剪切到HDFS
hadoop fs -moveFromLocal test.txt /usr/mmc
@H_404_156@hadoop fs -appendToFile test2.txt /usr/mmc/test.txt
hadoop fs -cat /usr/mmc/test.txt
hadoop fs -copyToLocal /usr/mmc/test.txt ./test.txt
hadoop fs -cp /usr/mmc/test.txt /study/mmc
hadoop fs -get /usr/mmc/test.txt
- -getmerge :合并下载多个文件
hadoop fs -getmerge /usr/mmc/* merge.txt
hadoop fs -tail /usr/mmc/test.txt
hadoop fs -rm /study/mmc/test.txt
hadoop fs -du -h /
- -setrep :设置HDFS文件的副本数
四、HDFS的客户端操作
- 启动虚拟机上部署的Hadoop
- windows本机环境变量设置(可不设置)
- 在系统变量中新建
- 在系统变量的PATH中增加如下:
- java客户端操作HDFS
- 引入pom文件
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.10.0</version>
</dependency>
</dependencies>
- demo
public class HDFSClient {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
Configuration configuration=new Configuration();
//获取客户端连接
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
//创建目录
fs.mkdirs(new Path("/mmc"));
//关闭资源
fs.close();
System.out.println("over");
}
}
- API操作
很简单,自己看api就会用
public class HDFSClient {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
}
FileSystem fs;
@Before
public void before() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
//获取客户端连接
fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
}
@Test
public void testmkdir() throws URISyntaxException, IOException, InterruptedException {
//创建目录
fs.mkdirs(new Path("/mmc"));
//关闭资源
fs.close();
System.out.println("over");
}
/**
* 上传文件
* @throws IOException
*/
@Test
public void testcopyFromLocalFile() throws IOException {
fs.copyFromLocalFile(new Path("d://a.txt"),new Path("/mmc"));
}
/**
* 删除文件
* @throws IOException
*/
@Test
public void testDelete() throws IOException {
fs.delete(new Path("/mmc"),true);
}
/**
* 查看文件详情
* @throws IOException
*/
@Test
public void testListFiles() throws IOException {
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path("/usr"), true);
while (iterator.hasNext()){
LocatedFileStatus fileStatus = iterator.next();
if(fileStatus.isFile()){
System.out.println(fileStatus.getPath().getName());
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getLen());
}
}
}
}
- 操作文件IO流
/**
* @description: IO流操作HDFS
* @author: mmc
* @create: 2020-05-28 23:01
**/
public class HDFSIO {
/**
* 上传文件
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void putFiletoHDFS() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
//获取客户端连接
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
//获取输入流
FileInputStream fis=new FileInputStream("d://a.txt");
//获取输出流
FSDataOutputStream outputStream = fs.create(new Path("/usr/mmc/a.txt"));
//复制流
IoUtils.copyBytes(fis,outputStream,configuration);
IoUtils.closeStream(fis);
IoUtils.closeStream(outputStream);
fs.close();
}
@Test
public void getFileFromHDFS() throws URISyntaxException, IOException, InterruptedException{
Configuration configuration=new Configuration();
//获取客户端连接
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
//输入流
FSDataInputStream inputStream = fs.open(new Path("/usr/mmc/a.txt"));
FileOutputStream outputStream=new FileOutputStream("e://b.txt");
IoUtils.copyBytes(inputStream,outputStream,configuration);
IoUtils.closeStream(outputStream);
IoUtils.closeStream(inputStream);
fs.close();
}
/**
* 下载部分文件
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
public void readFileSeek1() throws URISyntaxException, IOException, InterruptedException{
Configuration configuration=new Configuration();
//获取客户端连接
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
//输入流
FSDataInputStream inputStream = fs.open(new Path("/usr/mmc/a.txt"));
FileOutputStream outputStream=new FileOutputStream("e://b.txt.part1");
//只拷贝部分流(128M)
byte[] buf=new byte[1024];
for (int i=0;i<1024*128;i++){
inputStream.read(buf);
outputStream.write(buf);
}
IoUtils.closeStream(outputStream);
IoUtils.closeStream(inputStream);
fs.close();
}
/**
* 下载剩余部分文件
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
public void readFileSeek2() throws URISyntaxException, IOException, InterruptedException{
Configuration configuration=new Configuration();
//获取客户端连接
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.1.21:9000"),configuration,"root");
//输入流
FSDataInputStream inputStream = fs.open(new Path("/usr/mmc/a.txt"));
//指定起点开始读取
inputStream.seek(1024*1024*128);
FileOutputStream outputStream=new FileOutputStream("e://b.txt.part2");
IoUtils.copyBytes(inputStream,outputStream,configuration);
IoUtils.closeStream(outputStream);
IoUtils.closeStream(inputStream);
fs.close();
}
}
- HDFS读写数据流程
写数据:
读数据:
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。