赞
踩
实际工程开发中,算法同学用python对训练模型后需要给Java后端调用,但一般的joblib.dump
保存的模型Java后端无法直接使用,因此借助专门的python模型保存库和对应的jar包,来进行模型的保存和读取。
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=195, max_depth=14, max_features=11, oob_score=True,
random_state=123, n_jobs=-1, verbose=0)
pipeline_model = PMMLPipeline([("random_forest", model)])
pipeline_model.fit(train_x, train_y)
sklearn2pmml(pipeline_model, "./model/random_forest.pmml", with_repr=True)
需要用到的依赖
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-evaluator</artifactId>
<version>1.6.4</version>
</dependency >
具体实现调用代码
import org.dmg.pmml.Field; import org.jpmml.model.*; import org.jpmml.evaluator.*; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; public class load_model { public Evaluator load_pmml (String pmml_model_path) throws Exception{ Evaluator evaluator = new LoadingModelEvaluatorBuilder().load(new File(pmml_model_path)).build(); return evaluator; } public static Float accuracy(int tp, int tn, int fp, int fn){ float tp1 = (float) tp; float tn1 = (float) tn; float fp1 = (float) fp; float fn1 = (float) fn; return (tp1 + tn1) / (tp1 + tn1 + fp1 + fn1); } public static Float metric_precision(int tp, int tn, int fp, int fn){ float tp1 = (float) tp; float tn1 = (float) tn; float fp1 = (float) fp; float fn1 = (float) fn; return (tp1) / (tp1 + fp1); } public static Float metric_recall(int tp, int tn, int fp, int fn){ float tp1 = (float) tp; float tn1 = (float) tn; float fp1 = (float) fp; float fn1 = (float) fn; return (tp1) / (tp1 + fn1); } public boolean check_input(Evaluator evaluator, Map<String, Float> input_sample){ List<InputField> inputFields = evaluator.getInputFields(); List<String> input_name = new ArrayList<String>(); int index = 0; for (InputField inputField : inputFields){ input_name.add(inputField.getName().toString()); boolean flag = input_sample.containsKey(inputField.getName().toString()); if (!flag){ index ++; System.out.println( index + " 输入数据缺少该特征:" + inputField); } } int redundant = 0; for (String i : input_sample.keySet()){ if (!input_name.contains(i)){ redundant ++; System.out.println(redundant + " 输入特征冗余: " + i ); } } if (index != 0 || redundant != 0){ return false; } return true; } public List<Map<String, Float>> load_csv_file(String csv_path, int limit){ BufferedReader reader = null; ArrayList<Map<String, Float>> data = new ArrayList(); try{ reader = new BufferedReader(new FileReader(csv_path)); String[] title = reader.readLine().split(","); int count = 1; String line = null; while ((count <= limit) & ((line = reader.readLine()) != null)){ String[] item = line.split(","); Map<String, Float> temp = new HashMap<String, Float>(); for (int i = 0; i < item.length; i++){ // System.out.println(title[i] + item[i]); if ((!title[i].equals("d")) & (!title[i].equals("ip"))){ temp.put(title[i], Float.parseFloat(item[i])); } } data.add(temp); count += 1; } } catch (IOException e){ e.printStackTrace(); } return data; } public void predict(List<Map<String, Float>> data, Evaluator evaluator){ Long timestamp1 = System.currentTimeMillis(); List<Float> label = new ArrayList(); List<Float> pre = new ArrayList(); List<Object> result = new ArrayList(); int tp = 0; int tn = 0; int fp = 0; int fn = 0; for (Map<String, Float> sample : data){ Map<String, ?> res = evaluator.evaluate(sample); res = EvaluatorUtil.decodeAll(res); Float temp_label = Float.parseFloat(sample.get("label").toString()); Float temp = Float.parseFloat(res.get("label").toString()); label.add(temp_label); result.add(temp); if ((temp == (float) 1) & (temp_label == (float) 0)){ fp += 1; } if ((temp == (float) 1) & (temp_label == (float) 1)){ tp += 1; } if ((temp == (float) 0) & (temp_label == (float) 0)){ tn += 1; } if ((temp == (float) 0) & (temp_label == (float) 1)){ fn += 1; } } long timestamp2 = System.currentTimeMillis(); Float acc = accuracy(tp, tn, fp, fn); Float precison = metric_precision(tp, tn, fp, fn); Float recall = metric_recall(tp, tn, fp, fn); Float error = error_ratio(tp, tn, fp, fn); System.out.printf("time : %s, total samples: %d, right: %d\n", (timestamp2 - timestamp1) / (float) 1000, result.size(), right); System.out.printf("accuracy: %f, precision: %f, recall: %f, error: %f\n", acc, precison, recall, error); } public static void main(String[] args) throws Exception { load_model test = new load_model(); // 直接从pmml文件读取模型 String pmml_path = "random_forest_offline_train.pmml"; Evaluator evaluator = test.load_pmml(pmml_path); // 模型的输入特征 List<InputField> inputFields = evaluator.getInputFields(); int index = 1; for (InputField inputField : inputFields){ System.out.println("index: " + index + " " + inputField.toString()); index ++; } // 模型输出类型 // label List<TargetField> targetFields = evaluator.getTargetFields(); // System.out.println(targetFields.toString()); // 类别的概率 List<OutputField> outputFields = evaluator.getOutputFields(); // System.out.println(outputFields.get(0) + "\n" + outputFields.get(1).toString()); // 输入特征与模型中特征不匹配时,输出为null Map<String, ?> res = evaluator.evaluate(fea); res = EvaluatorUtil.decodeAll(res); System.out.println("label: " + res.get("label")); System.out.println("label = 0 的概率: " + res.get("probability(0)")); System.out.println("label = 1 的概率: " + res.get("probability(1)")); // csv文件批量数据测试 String csv_path = "data.csv"; int limit = 10000; // 只取部分数据测试 List<Map<String, Float>> data = test.load_csv_file(csv_path, limit); // 测试输入特征与模型特征是否一致 System.out.println("输入数据与模型特征是否一致: " + test.check_input(evaluator, data.get(0))); test.predict(data, evaluator); // Making the model evaluator eligible for garbage collection evaluator = null; } }
1.6.4
,如果使用低版本的jar包,模型的读取方式以及数据的读取方式都会发生改变,且如果sklearn2pmml版本较高,而jar包版本较低,Java会无法成功读取模型。例如:sklearn2pmml保存的模型版本为4-4(保存的模型可以直接以字符形式打开查看,前几行中会有版本信息),而jar包版本为1.4.3,则会出现以下错误:PMML namespace URI http://www.dmg.org/PMML-4_4 is not supported
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。