赞
踩
虚拟机安装 Hadoop 3.1.3,并运行了 HDFS。
使用本机 Java API 读取文件出错,在虚拟机中正常。
代码
package com.ggkkdd.hdfs.example; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.IOException; import java.io.PrintStream; import java.net.URI; public class MergeFile { Path inputPath = null; Path outputPath = null; public MergeFile(Path inputPath, Path outputPath) { this.inputPath = inputPath; this.outputPath = outputPath; } public MergeFile(String inputPath, String outputPath) { this.inputPath = new Path(inputPath); this.outputPath = new Path(outputPath); } public void doMerge() throws IOException { Configuration conf = new Configuration(); System.setProperty("HADOOP_USER_NAME", "hadoop"); conf.set("fs.defaultFS", "hdfs://hadoop-master:9000"); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf); FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf); FileStatus[] sourceStatus = fsSource.listStatus(inputPath, new MyPathFilter(".*\\.abc")); System.out.println(sourceStatus.length); FSDataOutputStream fsDos = fsDst.create(outputPath); PrintStream ps = new PrintStream(System.out); for (FileStatus sta : sourceStatus) { System.out.println("路径:" + sta.getPath()); System.out.println("文件大小:" + sta.getLen()); System.out.println("权限:" + sta.getPermission()); System.out.println("内容 :"); FSDataInputStream fsDis = fsSource.open(sta.getPath()); byte[] data = new byte[1024]; int read; while ((read = fsDis.read(data)) > 0) { System.out.println(read); ps.write(data, 0, read); fsDos.write(data, 0, read); } fsDis.close(); } } public static void main(String[] args) throws IOException { MergeFile merge = new MergeFile("hdfs://hadoop-master:9000/user/hadoop/input", "hdfs://hadoop-master:9000/user/hadoop/output/merge.txt"); // MergeFile merge = new MergeFile("/user/hadoop/input", // "/user/hadoop/output/merge.txt"); merge.doMerge(); } } class MyPathFilter implements PathFilter { String reg = null; public MyPathFilter(String reg) { this.reg = reg; } @Override public boolean accept(Path path) { return !path.toString().matches(reg); } }
... Connected to the target VM, address: '127.0.0.1:50938', transport: 'socket' log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. 9 路径:hdfs://hadoop-master:9000/user/hadoop/input/capacity-scheduler.xml 文件大小:8260 权限:rw-r--r-- 内容 : Exception in thread "main" org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-1745224033-127.0.1.1-1639451530131:blk_1073741825_1001 file=/user/hadoop/input/capacity-scheduler.xml at org.apache.hadoop.hdfs.DFSInputStream.refetchLocations(DFSInputStream.java:875) at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:858) at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:837) at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:566) at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:756) at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:825) at java.io.DataInputStream.read(DataInputStream.java:100) at com.ggkkdd.hdfs.example.MergeFile.doMerge(MergeFile.java:53) at com.ggkkdd.hdfs.example.MergeFile.main(MergeFile.java:68) Disconnected from the target VM, address: '127.0.0.1:50938', transport: 'socket' Process finished with exit code 1
由于是外部访问远程 HDFS,读取文件的时候是直接访问 NameNode,能正常返回元数据等,但读取数据的时候,NameNode 返回的 DataNode 节点的地址 127.0.0.1 在本机无法访问。
需要在代码中开启 客户端使用 DataNote 的主机名。
conf.set("dfs.client.use.datanode.hostname", "true");
或者在文件中配置
<configuration>
...
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>
最后在本机中绑定 DataNote 的主机名和 IP:
sudo echo "x.x.x.x xxxhostname" >> /etc/hosts
可以在服务器使用
hostname
查看主机名。
如果不知道 hostname、无配置 host 的话,会出现无法解决网络地址问题。
那么可以用 debug 模式打断点进去看主机名。
本机 host 配置
10.211.55.3 hadoop-master
10.211.55.3 ubuntu-linux-20-04-desktop
Java API 调用
conf.set("dfs.client.use.datanode.hostname", "true");
在 Google 寻找解决方法半天没头绪,最后回百度看到了一条类似的问题,于是乎参考并实践了一下,完美解决!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。