赞
踩
本次练习使用 鸢尾属植物数据集.\iris.txt
,在这个数据集中,包括了三类不同的鸢尾属植物:Iris Setosa,Iris Versicolour,Iris Virginica。每类收集了50个样本,因此这个数据集一共包含了150个样本。
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,skiprows=1,delimiter=',')
print(iris[:10]) //打印前10行
x = np.loadtxt(file, delimiter=',', skiprows=1, usecols=0)
print(x[:10])
print('萼片长度中位数:',np.median(x))
print('萼片长度平均数数:',np.mean(x))
print('萼片长度的标准差:',np.std(x))
aMax=np.amax(x)
aMin=np.amin(x)
y = (x - aMin)/np.ptp(x)//用极差进行标准化
print('标准化后:',y[:10])
print('萼片长度5分位数和95分位数:',np.percentile(x, [5, 95]))
结果:
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,skiprows=1,delimiter=',')
i,j = np.shape(iris)
iris[np.random.choice(i,20),np.random.choice(j,20)] = np.nan//随机选取索引
print(iris[:10])
结果:
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,skiprows=1,delimiter=',')
i,j = np.shape(iris) //i:行数,j:列数
iris[np.random.choice(i,20),np.random.choice(j,20)] = np.nan//随机选取索引
print(iris[:10])
iris_sepallength = iris[:,0].astype('float')
print(iris_sepallength[:10])
sepallength_nan = np.isnan(iris_sepallength)
print('缺失值个数:',np.sum(sepallength_nan))
print('缺失值位置:',np.where(sepallength_nan == True))
结果:
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,skiprows=1,delimiter=',')
iris_sepallength = iris[:,0].astype('float')
iris_peltalength = iris[:,2].astype('float')
#如果where只给出了条件,返回的是索引
index = np.where(np.logical_and(iris_sepallength < 5.0,iris_peltalength > 1.5))
print(iris[index])
结果:
代码:
import numpy as np
file = r'iris.txt'
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
i,j = np.shape(iris_data)
iris_data[np.random.choice(i,20),np.random.choice(j,20)] = np.nan
deled_iris_data = iris_data[np.sum(np.isnan(iris_data),axis=1)== 0]
print(deled_iris_data[:10])
结果:
相关系数:
代码:
import numpy as np
file = r'iris.txt'
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
sepallength = iris_data[:,0]
petallength = iris_data[:,2]
m1 = np.mean(sepallength)
m2 = np.mean(petallength)
x = np.mean((sepallength - m1)*(petallength - m2))
y = np.std(sepallength)*np.std(petallength)
print('相关系数:',x/y)
x=np.cov(sepallength,petallength,ddof=False)
print('协方差矩阵:',x)
y=np.std(sepallength)*np.std(petallength)
print('相关系数:',x[0,1]/y)
结果:
代码:
import numpy as np
file = r'iris.txt'
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
i,j = np.shape(iris_data)
iris_data[np.random.choice(i,20),np.random.choice(j,20)] = np.nan
iris_data[np.where(np.isnan(iris_data))] = 0 //找到为nan的位置并赋值为0
print(iris_data[:])
结果:
代码:
import numpy as np
file = r'iris.txt'
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
x,p=np.unique(iris_data,return_counts=True)
print('每一个值和相应数量:')
print(x,'\n',p)
结果:
代码:
import numpy as np
file = r'iris.txt'
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
sepallength = iris_data[:,0]
petallength = iris_data[:,2]
petal_length_bin=np.digitize(iris_data[:,2],[0,3,5,10])//直方图(区间0-3,3-5,5-10)
print(petal_length_bin)
#映射字典
label_map={1:'small',2:'medium',3:'larger',4:np.nan}
petal_length_cat=[label_map[x] for x in petal_length_bin]
print('前10个转换数据:',petal_length_cat[:])
结果:
(pi x petallength x sepallength ^ 2)/ 3
**代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,delimiter=',',skiprows=1)
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
sepallength = iris_data[:,0]
petallength = iris_data[:,2]
volumn = (np.pi * petallength * sepallength**2) / 3
volumn = volumn[:,np.newaxis] //增加维度
iris_data = np.concatenate([iris,volumn],axis=1) //拼接
#iris_data = np.hstack((iris,volumn))
print(iris_data)
结果:
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,delimiter=',',skiprows=1)
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
species=np.array(['Iris-setosa','Iris-versicolor','Iris-virginica'])
species_out=np.random.choice(species,10000,p=[0.5,0.25,0.25])
print('随机抽取情况:',np.unique(species_out,return_counts=True))
结果:
代码:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,delimiter=',',skiprows=1)
iris_data = np.loadtxt(file ,dtype=float,delimiter=',',skiprows=1,usecols=(0,1,2,3))
petallength = iris_data[:,2]
print(np.unique(petallength,return_counts=True))
petlen,counts = np.unique(petallength,return_counts=True)
print('最长见花瓣的长度:',petlen[np.argmax(counts)])
print('出现的最大次数:',np.max(counts))
结果:
import numpy as np
file = r'iris.txt'
iris = np.loadtxt(file,dtype=object,skiprows=1,delimiter=',')//读取文件
print(iris[:10])
iris_petalwidth = iris[:,3].astype('float')//取出第四列
print(iris_petalwidth)
index = np.where(iris_petalwidth > 1.0)//返回一个元组,包含着numpy数组
print(index)
print('第一次大于1.0的位置',index[0][0])//从0开始计数
结果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。