赞
踩
cd /usr/local/hadoop
./bin/hdfs dfs -mkdir -p /user/hadoop
./bin/hdfs dfs -put test.txt /user/hadoop/test
./bin/hdfs dfs -appendToFile local.txt /user/hadoop/test/test.txt
/bin/hdfs dfs -cat /user/hadoop/test/test.txt
if $(hadoop fs -test -e /usr/local/hadoop/test.txt);
then $(hadoop fs -copyToLocal /user/hadoop/test/test.txt ./test.txt);
else $(hadoop fs -copyToLocal /user/hadoop/test/test.txt ./test2.txt);
fi
hadoop fs -cat /user/hadoop/test/test.txt
hadoop fs -ls -h /user/hadoop/test/test.txt
hadoop fs -ls -h /user/hadoop/test
if $(hadoop fs -test -d /user/hadoop/test/test1);
then $(hadoop fs -touchz /user/hadoop/test/test1);
else $(hadoop fs -mkdir -p /user/hadoop/test/test1 );
fi
#查询
/usr/local/hadoop$ hadoop fs -ls -h /user/hadoop/test
hadoop fs -rm -r /user/hadoop/test/test1
向末尾进行追加
hadoop fs -appendToFile local.txt /user/hadoop/test/test.txt
hadoop fs -cat /user/hadoop/test/test.txt
向开头追加
追加到原文件的开头,在 HDFS 中不存在与这种操作对应的命令,因此,无法使用一条命令来完成。可以先移动到本地进行操作,再进行上传覆盖(如果要修改test.txt,就将test.txt插入到local.txt后面,然后将local.txt上传)
hadoop fs -get /user/hadoop/test/test.txt
cat test.txt >> local.txt
hadoop fs -copyFromLocal -f local.txt /user/hadoop/test/test.txt
hadoop fs -cat /user/hadoop/test/test.txt
hadoop fs -rm /user/hadoop/test/test.txt
hadoop fs -ls /user/hadoop/test
if $(hadoop fs -test -d /user/hadoop/test/test1);
then $(hadoop fs -rm -r /user/hadoop/test/test1);
else $(hadoop fs -rm -r /user/hadoop/test/test1 );
fi
-在HDFS中,将文件从源路径移动到目的路径。
hadoop fs -mv /user/hadoop/test/local.txt /user/hadoop/test2
hadoop fs -ls /user/hadoop/test2
package hadoop1; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.fs.Path; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.*; public class ShowTheContent extends FSDataInputStream { public ShowTheContent(InputStream in) { super(in); } /** * 实现按行读取 每次读入一个字符,遇到"\n"结束,返回一行内容 */ public static String readline(BufferedReader br) throws IOException { char[] data = new char[1024]; int read = -1; int off = 0; // 循环执行时,br 每次会从上一次读取结束的位置继续读取 // 因此该函数里,off 每次都从 0 开始 while ((read = br.read(data, off, 1)) != -1) { if (String.valueOf(data[off]).equals("\n")) { off += 1; break; } off += 1; } if (off > 0) { return String.valueOf(data); } else { return null; } } /** * 读取文件内容 */ public static void cat(Configuration conf, String remoteFilePath) throws IOException { FileSystem fs = FileSystem.get(conf); Path remotePath = new Path(remoteFilePath); FSDataInputStream in = fs.open(remotePath); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String line = null; while ((line = ShowTheContent.readline(br)) != null) { System.out.println(line); } br.close(); in.close(); fs.close(); } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://localhost:9000"); String remoteFilePath = "/user/hadoop/test2/local.txt"; // HDFS 路径 try { ShowTheContent.cat(conf, remoteFilePath); } catch (Exception e) { e.printStackTrace(); } } }
hadoop jar ./jar/2.jar
package hadoop2; import org.apache.hadoop.fs.FSDataInputStream; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.IOUtils; import java.io.*; import java.net.URL; public class MyFSDataInputStream { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } /** * 主函数 */ public static void main(String[] args) throws Exception { String remoteFilePath = "hdfs:///user/hadoop/test2/local.txt"; // HDFS 文件 InputStream in = null; try { /* 通过 URL 对象打开数据流,从中读取数据 */ in = new URL(remoteFilePath).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
hadoop jar ./jar/1.jar
文件名称 | 文件内容 |
---|---|
file1.txt | this is file1.txt |
file2.txt | this is file2.txt |
file3.txt | this is file3.txt |
file4.abc | this is file4.abc |
file5.abc | this is file5.abc |
将这些文件上传到HDFS的“/user/hadoop”目录下。请参考授课讲义第3章 分布式文件系统HDFS的7.3 HDFS常用Java API及应用案例,编写Java应用程序,实现从该目录中过滤出所有后缀名不为“.abc”的文件,对过滤之后的文件进行读取,并将这些文件的内容合并到文件“/user/hadoop/merge.txt”中。
package hadoop3; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; class MyPathFilter implements PathFilter { String reg = null; MyPathFilter(String reg) { this.reg = reg; } public boolean accept(Path path) { //① if(path.toString().matches(reg)) { return true; } return false; } } public class Merge { Path inputPath = null; Path outputPath = null; public Merge(String input, String output) { this.inputPath = new Path(input); this.outputPath = new Path(output); } public void doMerge() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf); FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf); FileStatus[] sourceStatus = fsSource.listStatus(inputPath, new MyPathFilter(".*\\.txt")); FSDataOutputStream fsdos = fsDst.create(outputPath); PrintStream ps = new PrintStream(System.out); for (FileStatus sta : sourceStatus) { System.out.println("path : " + sta.getPath() + " file size : " + sta.getLen() + " auth: " + sta.getPermission()); /*File file = new File(sta.getPath() + ""); if (!file.isFile()) { continue; }*/ System.out.println("next"); FSDataInputStream fsdis = fsSource.open(sta.getPath()); byte[] data = new byte[1024]; int read = -1; while ((read = fsdis.read(data)) > 0) { ps.write(data, 0, read); fsdos.write(data, 0 ,read); } fsdis.close(); } ps.close(); fsdos.close(); } public static void main(String[] args) throws IOException{ Merge merge = new Merge( "hdfs://localhost:9000/user/hadoop", "hdfs://localhost:9000/user/hadoop/merge.txt" ); merge.doMerge(); } }
hadoop@ztb-virtual-machine:/usr/local/hadoop$ hadoop jar ./jar/3.jar
path : hdfs://localhost:9000/user/hadoop/file1.txt file size : 18 auth: rw-r--r--
next
2021-09-28 21:15:47,808 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
this is file1.txt
path : hdfs://localhost:9000/user/hadoop/file2.txt file size : 18 auth: rw-r--r--
next
this is file2.txt
path : hdfs://localhost:9000/user/hadoop/file3.txt file size : 18 auth: rw-r--r--
next
this is file3.txt
2021-09-28 21:15:48,248 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
详细请看
https://blog.csdn.net/qq_50596778
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。