赞
踩
异常消息如下
用户类引发异常:作业因阶段失败而中止:阶段1.0中的任务0失败4次,最近失败:阶段1.0中丢失任务0.3(TID 11,10.215.155.82):org.joda中的java.lang.NullPointerException .time.tz.CachedDateTimeZone.getInfo(CachedDateTimeZone.java:143)org.joda.time.ff.DachedTimeZone.getOffset(CachedDateTimeZone.java:103)org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java) :org.joda.time.base的org.joda.time.format.DateTimeFormatter.printTo(DateTimeFormatter.java:521)org.joda.time.base上的org.joda.time.format.DateTimeFormatter.print(DateTimeFormatter.java:625) . AbstractDateTime.toString(AbstractDateTime.java:328)at com.xxx.ieg.face.demo.DateTimeNullReferenceReappear
我的代码如下:
import org.apache.hadoop.conf.Configuration
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.{ SparkConf, SparkContext }
import org.joda.time.DateTime
import org.joda.time.format.{ DateTimeFormat, DateTimeFormatter }
object DateTimeNullReferenceReappear extends App {
case class Record(uin: String = "", date: DateTime = null, value: Double = 0.0)
val cfg = new Configuration
val sparkConf = new SparkConf()
sparkConf.setAppName("bourne_exception_reappear")
val sc = new SparkContext(sparkConf)
val data = TDWSparkContext.tdwTable( // this function just read data from an data warehouse
sc,
tdwuser = FaceConf.TDW_USER,
tdwpasswd = FaceConf.TDW_PASSWORD,
dbName = "my_db",
tblName = "my_table",
parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
.map(row => {
Record(uin = row(2),
date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
value = row(4).toDouble)
}).map(x => (x.uin, (x.date, x.value)))
.groupByKey
.map(x => {
x._2.groupBy(_._1.toString("yyyyMMdd")).mapValues(_.map(_._2).sum) // throw exception here
})
// val data = TDWSparkContext.tdwTable( // It works, as I don't user datetime toString in the groupBy
// sc,
// tdwuser = FaceConf.TDW_USER,
// tdwpasswd = FaceConf.TDW_PASSWORD,
// dbName = "hy",
// tblName = "t_dw_cf_oss_tblogin",
// parts = Array("p_20150323", "p_20150324", "p_20150325", "p_20150326", "p_20150327", "p_20150328", "p_20150329"))
// .map(row => {
// Record(uin = row(2),
// date = DateTimeFormat.forPattern("yyyyMMdd").parseDateTime(row(0)),
// value = row(4).toDouble)
// }).map(x => (x.uin, (x.date.toString("yyyyMMdd"), x.value)))
// .groupByKey
// .map(x => {
// x._2.groupBy(_._1).mapValues(_.map(_._2).sum)
// })
data.take(10).map(println)
}
所以,似乎在 groupBy 中调用 toString 会导致异常,所以有人可以解释一下吗?
谢谢
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。