赞
踩
import pandas as pd
employees = pd.read_csv('employees.csv',parse_dates=['Start Date'])
employees.Mgmt = employees.Mgmt.astype(bool)
employees.Salary = employees.Salary.fillna(0).astype(int)
employees['Gender'] = employees['Gender'].astype("category")
employees['Team'] = employees['Team'].astype("category")
# 以下两种写法都可以,第二种将条件表达式放入变量,更清晰
employees[employees['First Name'] == 'Thomas']
find_Thomas = employees['First Name'] == 'Thomas'
employees[find_Thomas]
# 函数写法 以下两两等价 test = employees['Salary'] > 120000 test = employees['Salary'].gt(120000) test = employees['Salary'] >= 120000 test = employees['Salary'].ge(120000) test = employees['Salary'] < 120000 test = employees['Salary'].lt(120000) test = employees['Salary'] <= 120000 test = employees['Salary'].le(120000) test = employees['First Name'] == 'Thomas' test = employees['First Name'].eq('Thomas') test = employees['First Name'] != 'Thomas' test = employees['First Name'].ne('Thomas')
~ 条件反转
# ~ 条件反转
test = employees['Salary'] > 120000
# 反转条件 小于等于120000
employees[~test]
1.AND 条件
# 工资大于100000的女性
is_famale = employees['Gender'] == 'Female'
salary_ge100000 = employees['Salary'].ge(100000)
employees[is_famale & salary_ge100000]
2.OR 条件
# Thomas 或者 Julie
is_Thomas = employees['First Name'].eq('Thomas')
is_Julie = employees['First Name'].eq('Julie')
employees[is_Thomas | is_Julie]
3. isin() ,可以简化多个OR条件写法
names = ['Thomas', 'Dennis', 'Phillip','Jeremy', 'Phillip']
test = employees['First Name'].isin(five_names)
employees[test]['First Name'].value_counts()
4.between() 方法 应用于数值、日期 ,包含上下限
test = employees['Salary'].between(61933, 62096)
employees[test]
test = employees['Start Date'].between('1995-06-04', '1995-07-14')
employees[test]
between() 方法应用到字符串时 不包含上限
# 这条语句筛选了A开头的名字
test = employees['First Name'].between('A', 'B')
employees[test]
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。