赞
踩
8.7 下表由雇员数据库的训练数据组成。数据已泛化。例如,age“3135”表示年龄在31~35之间。对于给定的行,count表示department、status、age和salary在该行具有给定值的元组数。
Department Status Age Salary Count
Sales Senior 31-35 46K-50K 30
Sales junior 26-30 26K-30K 40
Sales junior 31-35 31K-35K 40
sysytems junior 21-25 46K-50K 20
sysytems Senior 31-35 66K-70K 5
sysytems junior 26-30 46K-50K 3
sysytems Senior 41-45 66K-70K 3
marketing Senior 36-40 46K-50K 10
marketing junior 31-35 41K-45K 4
secretary Senior 45-60 36K-40K 4
secretary junior 26-30 26K-30K 6
————————————————
给定一个数据元组,属性为“systems”, “26. . . 30”, 和 “46–50K”,该元组status的朴素贝叶斯分类是什么?
实现:
JavaBean.java
/**
* 训练样本的属性 javaBean
*
*/
public class JavaBean {
int age;
String department;
String status;
String salary;
String count;
public JavaBean(){
}
public JavaBean(int age,String department,String status,String salary,String count){
this.age=age;
this.department=department;
this.status=status;
this.salary=salary;
this.count=count;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getDepartment() {
return department;
}
public void setDepartment(String department) {
this.department = department;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getSalary() {
return salary;
}
public void setSalary(String salary) {
this.salary = salary;
}
public String getCount() {
return count;
}
public void setCount(String count) {
this.count = count;
}
@Override
public String toString() {
return "JavaBean [age=" + age + ", department=" + department + ", status="
+ status + ", salary=" + salary + ", count="
+ count + "]";
}
}
Test.java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
public class TestNB {
/**data_length
* 算法的思想
*/
public static ArrayList<JavaBean> list = new ArrayList<JavaBean>();;
static int data_length=0;
public static void main(String[] args) {
// 1.读取数据,放入list容器中
File file = new File("D://test.txt");
txt2String(file);
//数据测试样本
testData(26,"systems","46K");
}
// 读取样本数据
public static void txt2String(File file) {
try {
BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件
String s = null;
while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行
data_length++;
splitt(s);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
}
// 存入ArrayList中
public static void splitt(String str){
String strr = str.trim();
String[] abc = strr.split("[\\p{Space}]+");
int age=Integer.parseInt(abc[0]);
int count=Integer.parseInt(abc[4]);
JavaBean bean=new JavaBean(age, abc[1], abc[2], abc[3], count);
list.add(bean);
}
// 训练样本,测试
public static void testData(int age,String department,String salary){
//训练样本
int number_senior=0;
int number_junior=0;
// age 个数
int num_age_senior=0;
int num_age_junior=0;
// department
int num_department_senior=0;
int num_department_junior=0;
// salary
int num_salary_senior=0;
int num_salary_junior=0;
int data_count=0;
for(int i=0;i<list.size();i++) {
JavaBean bb=list.get(i);
data_count+=bb.getCount();
}
//遍历List 获得数据
for(int i=0;i<list.size();i++){
JavaBean bb=list.get(i);
if(bb.getStatus().equals("senior")){ //senior
number_senior+=bb.getCount();
if(bb.getDepartment().equals(department)){//department
num_department_senior+=bb.getCount();
}
if(bb.getSalary().equals(salary)){//salary
num_salary_senior+=bb.getCount();
}
if(bb.getAge()==age){//age
num_age_senior+=bb.getCount();
}
}else {//junior
number_junior+=bb.getCount();
if(bb.getDepartment().equals(department)){//department
num_department_junior+=bb.getCount();
}
if(bb.getSalary().equals(salary)){//salary
num_salary_junior+=bb.getCount();
}
if(bb.getAge()==age){//age
num_age_junior+=bb.getCount();
}
}
}
System.out.println("number_senior:"+number_senior);
System.out.println("number_junior:"+number_junior);
System.out.println("num_age_senior:"+num_age_senior);
System.out.println("num_age_junior:"+num_age_junior);
System.out.println("num_department_senior:"+num_department_senior);
System.out.println("num_department_junior:"+num_department_junior);
System.out.println("num_salary_senior:"+num_salary_senior);
System.out.println("num_salary_junior:"+num_salary_junior);
/// 判断
double nb_senior=(1.0*num_age_senior/number_senior)*(1.0*num_department_senior/number_senior)*(1.0*num_salary_senior/number_senior);
double nb_junior=(1.0*num_age_junior/number_junior)*(1.0*num_department_junior/number_junior)*(1.0*num_salary_junior/number_junior);
System.out.println("该数据元组的senior的概率:"+nb_senior);
System.out.println("该数据元组的junior的概率:"+nb_junior);
if(nb_senior>nb_junior){
System.out.println("senior的概率大");
}else {
System.out.println("junior的概率大");
}
}
}样本:
结果:
小结:
朴素贝叶斯中的朴素一词的来源就是假设各特征之间相互独立。这一假设使得朴素贝叶斯算法变得简单,但有时会牺牲一定的分类准确率。
首先给出贝叶斯公式:
换成分类任务的表达式:
我们最终求的p(类别|特征)即可!就相当于完成了我们的任务。
则,朴素贝特斯公式为:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。