赞
踩
hadoop fs -put test.txt /test/
hadoop fs -appendToFile local.txt /test/test.txt
hadoop fs -cat /test/test.txt
if $(hadoop fs -test -e /usr/local/hadoop/test.txt);
then $(hadoop fs -copyToLocal /test/test.txt ./test.txt);
else $(hadoop fs -copyToLocal /test/test.txt ./test2.txt);
fi
hadoop fs -cat /test/test.txt
hadoop fs -ls -h /test/test.txt
hadoop fs -ls -h /test/test.txt
#!/bin/bash
if $(hadoop fs -test -d /test/test1);
then $(hadoop fs -touchz /test/test1);
else $(hadoop fs -mkdir -p /test/test1 );
fi
#创建
if $(hadoop fs -test -d /test/test1);
then $(hadoop fs -touchz /test/test1);
else $(hadoop fs -mkdir -p /test/test1 );
fi
#删除
if $(hadoop fs -test -d /test/test1);
then $(hadoop fs -rm/test/test1);
else $(hadoop fs -rm /test/test1 );
fi
hadoop fs -appendToFile local.txt /test/test.txt
hadoop fs -rm /test/text.txt
if $(hadoop fs -test -d /test/test1);
then $(hadoop fs -rm -r /test/test1);
else $(hadoop fs -rm -r /test/test1 );
fi
hadoop fs -mv /test/test.txt /test2
package hadoop1; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.fs.Path; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.*; public class ShowTheContent extends FSDataInputStream { public ShowTheContent(InputStream in) { super(in); } /** * 实现按行读取 每次读入一个字符,遇到"\n"结束,返回一行内容 */ public static String readline(BufferedReader br) throws IOException { char[] data = new char[1024]; int read = -1; int off = 0; // 循环执行时,br 每次会从上一次读取结束的位置继续读取 // 因此该函数里,off 每次都从 0 开始 while ((read = br.read(data, off, 1)) != -1) { if (String.valueOf(data[off]).equals("\n")) { off += 1; break; } off += 1; } if (off > 0) { return String.valueOf(data); } else { return null; } } /** * 读取文件内容 */ public static void cat(Configuration conf, String remoteFilePath) throws IOException { FileSystem fs = FileSystem.get(conf); Path remotePath = new Path(remoteFilePath); FSDataInputStream in = fs.open(remotePath); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String line = null; while ((line = ShowTheContent.readline(br)) != null) { System.out.println(line); } br.close(); in.close(); fs.close(); } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://47.113.222.36:9000"); String remoteFilePath = "/user/root/test/file1.txt"; // HDFS 路径 try { ShowTheContent.cat(conf, remoteFilePath); } catch (Exception e) { e.printStackTrace(); } } }
package hadoop2; import org.apache.hadoop.fs.FSDataInputStream; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.IOUtils; import java.io.*; import java.net.URL; public class MyFSDataInputStream { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } /** * 主函数 */ public static void main(String[] args) throws Exception { String remoteFilePath = "hdfs:///user/root/test/file2.txt"; // HDFS 文件 InputStream in = null; try { /* 通过 URL 对象打开数据流,从中读取数据 */ in = new URL(remoteFilePath).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
文件名称 | 文件内容 |
---|---|
file1.txt | this is file1.txt |
file2.txt | this is file2.txt |
file3.txt | this is file3.txt |
file4.abc | this is file4.abc |
file5.abc | this is file5.abc |
将这些文件上传到HDFS的“/user/hadoop”目录下。请参考授课讲义第3章 分布式文件系统HDFS的7.3 HDFS常用Java API及应用案例,编写Java应用程序,实现从该目录中过滤出所有后缀名不为“.abc”的文件,对过滤之后的文件进行读取,并将这些文件的内容合并到文件“/user/hadoop/merge.txt”中
。
package hadoop; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; class MyPathFilter implements PathFilter { String reg = null; MyPathFilter(String reg) { this.reg = reg; } public boolean accept(Path path) { //① if(path.toString().matches(reg)) { return true; } return false; } } public class Merge { Path inputPath = null; Path outputPath = null; public Merge(String input, String output) { this.inputPath = new Path(input); this.outputPath = new Path(output); } public void doMerge() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://47.113.222.36:9000"); conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf); FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf); FileStatus[] sourceStatus = fsSource.listStatus(inputPath, new MyPathFilter(".*\\.txt")); FSDataOutputStream fsdos = fsDst.create(outputPath); PrintStream ps = new PrintStream(System.out); for (FileStatus sta : sourceStatus) { System.out.println("path : " + sta.getPath() + " file size : " + sta.getLen() + " auth: " + sta.getPermission()); /*File file = new File(sta.getPath() + ""); if (!file.isFile()) { continue; }*/ System.out.println("next"); FSDataInputStream fsdis = fsSource.open(sta.getPath()); byte[] data = new byte[1024]; int read = -1; while ((read = fsdis.read(data)) > 0) { ps.write(data, 0, read); fsdos.write(data, 0 ,read); } fsdis.close(); } ps.close(); fsdos.close(); } public static void main(String[] args) throws IOException{ Merge merge = new Merge( "hdfs://47.113.222.36:9000/user/root/test", "hdfs://47.113.222.36:9000/user/root/merge.txt" ); merge.doMerge(); } }
hadoop jar ./myapp/hadoop.jar
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。