赞
踩
日期函数
- from pyspark.sql.functions import current_date
-
- spark.range(3).withColumn('date',current_date()).show()
- # +---+----------+
- # | id| date|
- # +---+----------+
- # | 0|2018-03-23|
- # | 1|2018-03-23|
- from pyspark.sql.functions import current_timestamp
-
- spark.range(3).withColumn('date',current_timestamp()).show()
- # +---+--------------------+
- # | id| date|
- # +---+--------------------+
- # | 0|2018-03-23 17:40:...|
- # | 1|2018-03-23 17:40:...|
- # | 2|2018-03-23 17:40:...|
- # +---+--------------------+
- from pyspark.sql.functions import date_format
-
- df = spark.createDataFrame([('2015-04-08',)], ['a'])
-
- df.select(date_format('a', 'MM/dd/yyy').alias('date')).show()
- from pyspark.sql.functions import to_date, to_timestamp
-
- # 1.转日期
- df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- df.select(to_date(df.t).alias('date')).show()
- # [Row(date=datetime.date(1997, 2, 28))]
-
- # 2.带时间的日期
- df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- df.select(to_timestamp(df.t).alias('dt')).show()
- # [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
-
- # 还可以指定日期格式
- df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).show()
- # [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
- from pyspark.sql.functions import year, month, dayofmonth
-
- df = spark.createDataFrame([('2015-04-08',)], ['a'])
- df.select(year('a').alias('year'),
- month('a').alias('month'),
- dayofmonth('a').alias('day')
- ).show()
- from pyspark.sql.functions import hour, minute, second
- df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
- df.select(hour('a').alias('hour'),
- minute('a').alias('minute'),
- second('a').alias('second')
- ).show()
- from pyspark.sql.functions import quarter
-
- df = spark.createDataFrame([('2015-04-08',)], ['a'])
- df.select(quarter('a').alias('quarter')).show()
- from pyspark.sql.functions import date_add, date_sub
- df = spark.createDataFrame([('2015-04-08',)], ['d'])
- df.select(date_add(df.d, 1).alias('d-add'),
- date_sub(df.d, 1).alias('d-sub')
- ).show()
- from pyspark.sql.functions import add_months
- df = spark.createDataFrame([('2015-04-08',)], ['d'])
-
- df.select(add_months(df.d, 1).alias('d')).show()
- from pyspark.sql.functions import datediff, months_between
-
- # 1.日期差
- df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
- df.select(datediff(df.d2, df.d1).alias('diff')).show()
-
- # 2.月份差
- df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
- df.select(months_between(df.t, df.d).alias('months')).show()
计算当前日期的下一个星期1,2,3,4,5,6,7的具体日子,属于实用函数
- from pyspark.sql.functions import next_day
-
- # "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
- df = spark.createDataFrame([('2015-07-27',)], ['d'])
- df.select(next_day(df.d, 'Sun').alias('date')).show()
- from pyspark.sql.functions import last_day
-
- df = spark.createDataFrame([('1997-02-10',)], ['d'])
- df.select(last_day(df.d).alias('date')).show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。