赞
踩
cast(substr(‘2020-03-16 18:48:09’, 1, 10) as date) as d
结果:2020-03-16
date_parse(‘2020-03-16 18:48:09’, ‘%Y-%m-%d %T’) as d
结果:1584355711000,毫秒多三个000
date_diff(‘day’,cast(‘2018-09-05’ as date),cast(‘2018-09-07’ as date))
date_diff(‘day’, cast(date_format(date_parse(dt, ‘%Y%m%d’), ‘%Y-%m-%d’) as date), cast(json_extract_scalar(p_detail, ‘$.flightDate’) as date)) as ndo
结果:2 后面减去前面
1、hive取得当前日期时间(带 - 不带 - ):
select current_date()
结果:2020-04-09
date_format(current_date(),‘yyyyMMdd’)
结果:20200409
2、取得当前日期时间(带毫秒,以及不带毫秒):
select current_timestamp()
结果:2020-04-09 16:35:47.084
select date_format(current_timestamp(),‘yyyy-MM-dd HH:mm:ss’)
结果:2020-04-09 16:35:47
3、hive取得当前时间戳:
select unix_timestamp();
结果:1586421405
4、时间戳转日期:
select from_unixtime(1517725479, ‘yyyy-MM-dd HH:dd:ss’);
结果:2018-02-04 14:04:39
5、日期转unix时间戳:
select unix_timestamp(‘2017-01-01 12:12:12’,‘yyyy-MM-dd HH:dd:ss’);
结果:1484193612
6、hive取得当前时间(年月日时分秒):
select from_unixtime(unix_timestamp(),‘yyyy-MM-dd HH:dd:ss’);
结果:2020-04-09 16:09:11
7、hive取得当前时间(年月日):
from_unixtime(unix_timestamp(),‘yyyy-MM-dd’)
结果:2020-04-09
8、取得昨天日期:
select date_add(from_unixtime(unix_timestamp(),‘yyyy-MM-dd’), -1);
结果:2020-04-08
9、取得明天日期:
select date_add(from_unixtime(unix_timestamp(),‘yyyy-MM-dd’), 1);
结果:2020-04-10
10、hive取得两个日期之间差值(差值为天数)必须为 ‘yyyy-MM-dd’ 形式,前面减去后面:
select datediff(‘2020-04-09’, ‘2020-04-01’)
结果:8
11、时间改格式
from_unixtime(unix_timestamp(‘20200406’, ‘yyyyMMdd’),‘yyyy-MM-dd’)
2020-04-06
12、获取当前小时
hour(from_unixtime(unix_timestamp(),‘yyyy-MM-dd HH:dd:ss’))
结果:16
13、where dt = 昨天
datediff(from_unixtime(unix_timestamp(dt, ‘yyyymmdd’), ‘yyyy-mm-dd’), from_unixtime(unix_timestamp(),‘yyyy-MM-dd’)) = -1
字符串转 date 使用 strp ,并相减
# 这两个是准的,days 如果小于 0 则是 -1,大于 0 则是0,1....
# 但是如果用 seconds 是不行的,因为二者相减 = days * 86400 + seconds = total_seconds()
(datetime.datetime.strptime('2020-12-08 12:30:46', "%Y-%m-%d %H:%M:%S") - datetime.datetime.strptime('2020-12-08 15:42:07', "%Y-%m-%d %H:%M:%S")).days
(datetime.datetime.strptime('2020-12-08 12:30:46', "%Y-%m-%d %H:%M:%S") - datetime.datetime.strptime('2020-12-08 15:42:07', "%Y-%m-%d %H:%M:%S")).seconds
(datetime.datetime.strptime('2020-12-08 12:30:46', "%Y-%m-%d %H:%M:%S") - datetime.datetime.strptime('2020-12-08 15:42:07', "%Y-%m-%d %H:%M:%S")).total_seconds()
df['diff_day'] = df.apply(lambda row: (row['create_time'] - row['p_sendtime']).total_seconds() if row['create_time'] == row['create_time'] else 99999, axis=1)
字符串转 date & 时间戳转 date:
import pytz import datetime from datetime import timezone,timedelta # 对的时间,和对的时间戳 datetime.datetime.now(pytz.timezone('Asia/Shanghai')) datetime.datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d %H:%M:%S") datetime.datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() p_createtime = time.mktime(datetime.datetime.now().timetuple()) p_createtime = time.mktime(datetime.datetime.strptime(create_time, "%Y-%m-%d %H:%M:%S").timetuple()) # time.struct_time(tm_year=2020, tm_mon=3, tm_mday=3, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=1, tm_yday=63, tm_isdst=0) time_local = time.localtime(1583164800) # '2020-03-03 00:00:00' format_time = time.strftime("%Y-%m-%d %H:%M:%S", time_local) # 获取当前时间,转 str datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # 获取星期几 d = datetime.datetime.now() d.weekday() # 获取前一天 yesterday = datetime.today() + timedelta(-1) label_1day_before = datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(1)), "%Y%m%d") # 获取前一天 today = datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(0)), "%Y%m%d") today = datetime.datetime.strptime(today, '%Y%m%d') last_day = today - datetime.timedelta(1)
# 获取当前日期,并字符串格式化 df = df.withColumn("today", F.date_format(F.current_date(), "yyyyMMdd")) # 字符串转 date,并计算时间差,不用 time_fun 自定义 udf 函数 df = df.withColumn('diff_day', F.datediff(F.date_format(F.from_unixtime(F.unix_timestamp('today', 'yyyyMMdd')), "yyyy-MM-dd"), F.date_format(F.from_unixtime(F.unix_timestamp('dt', 'yyyyMMdd')), "yyyy-MM-dd"))) # time 是 毫秒级的 str server_df = server_df.withColumn("time", F.col("time").cast(DoubleType())) server_df = server_df.withColumn('timestamp', F.col("time") / 1000.0) server_df = (server_df.select('username', 'time', 'dt', 'timestamp').withColumn("time", F.from_unixtime("timestamp"))) server_df = server_df.withColumn("hour", F.substring(F.col("time"), 12, 2).cast(IntegerType())) # 日期相加减 portrait_df = portrait_df.withColumnRenamed("dt", "real_dt").withColumnRenamed("user_id", "uid")\ .withColumn("dt", F.date_add(F.date_format(F.from_unixtime(F.unix_timestamp('real_dt', 'yyyyMMdd')), "yyyy-MM-dd"), 1)) # 时间加减,求分钟、小时、等 df.withColumn("first_fly_time", (F.unix_timestamp("p_first_flight_arrival_time", "yyyy-MM-dd HH:mm") - F.unix_timestamp("p_first_flight_departure_time", "yyyy-MM-dd HH:mm")) / 60)\
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。