赞
踩
使用Numpy或Pandas导入数据。
>>> import pandas as pd
>>> import numpy as np
>>> np.info(np.ndarry.dtype
>>> help(pd.read_csv)
>>> filename = 'huck_finn.txt'
>>> # 打开文件进行读取
>>> file = open(filename, mode='r')
>>> # 读取文件内容
>>> text = file.read()
>>> # 检查文件是否关闭
>>> print(file.closed)
>>> # 关闭文件
>>> file.close()
>>> print(text)
使用with(上下文管理器)打开纯文本文件
>>> with open('huck_finn.txt', 'r') as file:
print(file.readline()) # 读取一行数据
print(file.readline())
print(file.readline())
具有一种数据类型的文件
>>> filename = 'mnist.txt'
>>> data = np.loadtxt(filename,
delimiter=',', # 分隔符
skiprows=2, # 跳过前两行
usecols=[0, 2], # 读取第一和第三列
dtype=str) # 类型
混合数据类型的文件
>>> filename = 'titanic.csv'
>>> data = np.genfromtxt(filename, delimiter=',', names=True, dtype=None)
>>> data_array = np.rectfromcsv(filename)
np.rectfromcsv()函数的默认dtype为None
>>> filename = 'winequality-red.csv'
>>> data = pd.read_csv(filename,
nrows=5, # 读取行数
header=None, # 编号用作col名称
sep='\t', # 分隔符
commet='#', # 用于分割注释的字符
na_values=[""]) # 要识别为Na的字符串
>>> file = 'urbanpop.xlsx'
>>> data = pd.ExcelFile(file)
>>> df_sheet2 = data.parse('1960-1966',
skiprows=[0],
names=['Country', 'AAM: War(2002)'])
>>> df_sheet1 = data.parse(0,
parse_cols=[0],
skiprows=[0],
names=['Country'])
要访问表名,使用sheet_names属性
>>> data.sheet_names
>>> from sas7bdat import SAS7BDAT
>>> with SAS7BDAT('urbanpop.sas7bdat') as file:
df_sas = file.to_data_frame()
>>> data = pd.read_stata('urbanpop.dta')
>>> from sqlalchemy import create_engine
>>> engine = create_engine('sqlite://Northwind.sqlite')
使用table_names()方法获取表名列表
>>> table_names = engine.table_names()
>>> con = engine.connect()
>>> rs = con.execute("SELECT * FROM Orders")
>>> df = pd.DataFrame(rs.fetchall())
>>> df.columns = rs.keys()
>>> con.close()
>>> with engine.connect() as con:
rs = con.execute("SELECT OrderID FROM Orders")
df = pd.DataFrame(rs.fetchmany(size=5))
df.columns = rs.keys()
>>> df = pd.read_sql_query("SELECT * FROM Orders", engine)
>>> data_array.dtype # 数组元素的数据类型
>>> data_array.shape # 数组大小
>>> len(data_array) # 数组的长度
>>> df.head() # 返回第一个DataFrame行
>>> df.tail() # 返回最后的数据帧行
>>> df.index # 索引
>>> df.columns # DataFrame列
>>> df.info() # DataFrame的信息
>>> data_array = data.values # 转换一个数据帧到一个NumPy数组
>>> import pickle
>>> with open('pickled_fruit.pkl', 'rb') as file:
pickled_data = pickle.load(file)
>>> import h5py
>>> filename = 'H-H1_LOSC_4_v1-815411200-4096.hdf5'
>>> data = h5py.File(filename, 'r')
>>> import scipy.io
>>> filename = 'workspace.mat'
>>> mat = scipy.io.loadmat(filename)
>>> print(mat.keys()) # 字典键
>>> for key in data.keys():
print(key)
meta
quality
strain
>>> pickled_data.values() # 返回字典值
>>> print(mat.items()) # 返回(键,值)元组对的格式
>>> for key in data ['meta'].keys() # 探索HDF5结构
print(key)
Description
DescriptionURL
Detector
Duration
GPSstart
Observatory
Type
UTCstart
>>> print(data['meta']['Description'].value) # 检索键的值
命令
!ls # 列出文件和目录的目录内容
%cd .. # 更改当前工作目录
%pwd # 返回当前工作目录路径
os library
>>> import os
>>> path = "/usr/tmp"
>>> wd = os.getcwd() # 将当前目录的名称存储在字符串中
>>> os.listdir(wd) # 以列表形式输出目录的内容
>>> os.chdir(path) # 更改当前工作目录
>>> os.rename("test1.txt", # 重命名一个文件
"test2.txt")
>>> os.remove("test1.txt") # 删除现有文件
>>> os.mkdir("newdir") # 创建一个新目录
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。