赞
踩
根据每月工作日并结合国家法定节假日计算工作量(必须给我实现,我不管你们用什么方法)
由hive默认函数不支持该需求,那只好自己动手来写了,在不可能也要创造可能啊,毕竟客户god嘛
hive组件支持用户自定义的UDFS函数,根据规约来实现吧
pom.xml如下
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>hive-udf</artifactId> <version>1.0-SNAPSHOT</version> <name>hive-udf</name> <!-- FIXME change it to the project's website --> <url>http://www.example.com</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> </properties> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.1.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-exec --> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>3.1.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> </dependencies> <build> <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> <plugins> <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> <plugin> <artifactId>maven-clean-plugin</artifactId> <version>3.1.0</version> </plugin> <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> <plugin> <artifactId>maven-resources-plugin</artifactId> <version>3.0.2</version> </plugin> <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>3.8.0</version> </plugin> <plugin> <artifactId>maven-surefire-plugin</artifactId> <version>2.22.1</version> </plugin> <plugin> <artifactId>maven-jar-plugin</artifactId> <version>3.0.2</version> </plugin> <plugin> <artifactId>maven-install-plugin</artifactId> <version>2.5.2</version> </plugin> <plugin> <artifactId>maven-deploy-plugin</artifactId> <version>2.8.2</version> </plugin> <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> <plugin> <artifactId>maven-site-plugin</artifactId> <version>3.7.1</version> </plugin> <plugin> <artifactId>maven-project-info-reports-plugin</artifactId> <version>3.0.0</version> </plugin> </plugins> </pluginManagement> </build> </project>
首先,你必须创建一个新的类并继承UDF,它包含一个或多个函数方法,具体如下:
WorkDay类
package org.example; import org.apache.hadoop.hive.ql.exec.UDF; import java.io.*; import java.text.SimpleDateFormat; import java.util.*; public final class WorkDay extends UDF { // 返回工作日集合,只排除周末 public static List<Date> getWorkDays(int year, int month){ // 用于储存每月工作日 List<Date> dates = new ArrayList<Date>(); Calendar cal = Calendar.getInstance(); //设置月份 cal.set(Calendar.YEAR, year); // 设置月份 cal.set(Calendar.MONTH, month - 1); // 设置为当月第一天 cal.set(Calendar.DATE, 1); while(cal.get(Calendar.YEAR) == year && cal.get(Calendar.MONTH) < month){ // 判断当前天为本周的第几天 int day = cal.get(Calendar.DAY_OF_WEEK); // 如果不为周六或者周天,将日期进行储存 if(!(day == Calendar.SUNDAY || day == Calendar.SATURDAY)){ dates.add((Date)cal.getTime().clone()); } // 将当前日期增加一天 cal.add(Calendar.DATE, 1); } // 返回当前月工作日集合 return dates; } /** * * @param dateList * @return 返回日期字符串集合 */ public static List<String> getDateString(List<Date> dateList){ // 储存日期字符串 List<String> dateString = new ArrayList<>(); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); for (Date date : dateList){ String date2 = simpleDateFormat.format(date); dateString.add(date2); } return dateString; } public static String evaluate(int year, int month) { // 获取工作日集合(排除周末) List<Date> workDays = getWorkDays(year,month); // 获取工作日字符串集合 List<String> dateString = getDateString(workDays); // 获取法定节假日集合 List<String> holidays = getHolidays(); // 从工作日中除去法定节假日 for(String item:holidays){ if(item.indexOf('+')!=-1){ if(!dateString.contains(item.substring(1,item.length()))&&Integer.valueOf(item.substring(6,8))==month){ dateString.add(item.substring(1,item.length())); } }else{ dateString.remove(item.substring(1,item.length())); } } //dateString.removeAll(holidays); System.out.println(dateString.size()); System.out.println("Work Day! " + dateString.size()); return String.valueOf(dateString.size()); } public static void main(String[] args) { System.out.println(evaluate(2020,05)); } /** * * @return 返回法定节假日集合 */ public static List<String> getHolidays(){ return ReadHdfsTxt.readTxtFile("/lib/Holidays.txt"); //return ReadTxt.readTxtFile("d:\\Holidays.txt"); } }
Timeiter 类
package org.example; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; public class Timeiter { // public static void main(String[] args) { // System.out.println(getDays("2018-06-28", "2018-07-3")); // } /** * 获取两个日期之间的所有日期 * * @param startTime * 开始日期 * @param endTime * 结束日期 * @return */ public static List<String> getDays(String startTime, String endTime) { // 返回的日期集合 List<String> days = new ArrayList<String>(); DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); try { Date start = dateFormat.parse(startTime); Date end = dateFormat.parse(endTime); Calendar tempStart = Calendar.getInstance(); tempStart.setTime(start); Calendar tempEnd = Calendar.getInstance(); tempEnd.setTime(end); tempEnd.add(Calendar.DATE, +1);// 日期加1(包含结束) while (tempStart.before(tempEnd)) { days.add(dateFormat.format(tempStart.getTime())); tempStart.add(Calendar.DAY_OF_YEAR, 1); } } catch (ParseException e) { e.printStackTrace(); } return days; } }
ReadHdfsTxt 类
package org.example; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; import java.util.ArrayList; import java.util.List; /** * @author : jizhuang.wang * @version V1.0 * @Project: hive-udf * @Package org.example * @Description: TODO * @date Date : 20201218 14:44 */ public class ReadHdfsTxt { /** * 读取文件内容 */ public static List<String> readTxtFile(String remoteFilePath) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://node1.bankbigdata.cn:8020"); if(StringUtils.isEmpty(remoteFilePath)){ remoteFilePath = "/lib/Holidays.txt"; // HDFS路径 } List<String> holidays = new ArrayList<>(); try { FileSystem fs = FileSystem.get(conf); Path remotePath = new Path(remoteFilePath); FSDataInputStream in = fs.open(remotePath); BufferedReader d = new BufferedReader(new InputStreamReader(in)); String lineTxt = null; while ((lineTxt = d.readLine()) != null) { holidays.add(lineTxt); } d.close(); in.close(); fs.close(); } catch (IOException e) { e.printStackTrace(); } return holidays; } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://node1.bankbigdata.cn:8020"); String remoteFilePath = "/lib/Holidays.txt"; // HDFS路径 try { System.out.println("读取文件: " + remoteFilePath); ReadHdfsTxt.readTxtFile(remoteFilePath); System.out.println("\n读取完成"); } catch (Exception e) { e.printStackTrace(); } } }
ReadTxt类
package org.example; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; /** * @author : jizhuang.wang * @version V1.0 * @Project: hive-udf * @Package org.example * @Description: TODO * @date Date : 20201218 12:28 */ public class ReadTxt { /** * 功能:Java读取txt文件的内容 * 步骤:1:先获得文件句柄 * 2:获得文件句柄当做是输入一个字节码流,需要对这个输入流进行读取 * 3:读取到输入流后,需要读取生成字节流 * 4:一行一行的输出。readline()。 * 备注:需要考虑的是异常情况 * * @param filePath */ public static List<String> readTxtFile(String filePath) { List<String> holidays = new ArrayList<>(); try { String encoding = "GBK"; File file = new File(filePath); if (file.isFile() && file.exists()) { //判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);//考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; while ((lineTxt = bufferedReader.readLine()) != null) { //System.out.println(lineTxt); holidays.add(lineTxt); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } return holidays; } }
maven->clean->install
target->xxxx.jar包
上传到hdfs中编译分布式调用
Holidays.txt内容
+2020-01-26
-2020-01-02
-2020-01-03
-2020-03-05
+2020-12-19
+2020-12-26
打开hive可视化工具dbeaver
add jar hdfs://node1.bankbigdata.cn:8020/lib/hive-udf-1.1-SNAPSHOT.jar;
add jar hdfs://node1.bankbigdata.cn:8020/lib/Holidays.txt;
CREATE TEMPORARY FUNCTION work_hdfs_days AS 'org.example.WorkDay';
SELECT work_hdfs_days(2020,12)
调用结果如下所示:
到此完成!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。