赞
踩
FeatureHasher:将不同数据类型通过hash算法转换成特征向量。如String、bool、int等等。
def FeatureHasher(): Unit ={ import org.apache.spark.ml.feature.FeatureHasher val spark: SparkSession = SparkSession.builder().appName("implicits").master("local[2]").getOrCreate() val dataset = spark.createDataFrame(Seq( (2.2, true, "1", "foo"), (3.3, false, "2", "bar"), (4.4, false, "3", "baz"), (5.5, false, "4", "foo") )).toDF("real", "bool", "stringNum", "string") val hasher = new FeatureHasher() //输入映射列 .setInputCols("real", "bool", "stringNum", "string") //输出映射列 .setOutputCol("features") val featurized = hasher.transform(dataset) //输出特征向量 featurized.show(false) [0,WrappedArray(a, b, c),(3,[0,1,2],[1.0,1.0,1.0])] [1,WrappedArray(a, b, b, c, a),(3,[0,1,2],[2.0,2.0,1.0])] }
运行结果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。