# 1. 使用to_excel创建Excel文件 import pandas as pd df = pd.DataFrame({'id':[1,2,3],'name':['zs','ls','ww']}) # 默认会有索引,将ID列设置成索引,会返回一个新的df,如果想要在原来的df上修改需要添加参数inplace=True df = df.set_index('id') df.to_excel('./output.xlsx') print('end')
# 2. 使用pandas读取文件 import pandas as pd # 此处需要安装依赖库xlrd people = pd.read_excel('~/Desktop/People.xlsx') print('获取文件中的行和列:',people.shape) print("-"*20) print('获取文件中的列名:',people.columns) print("-"*20) # 默认取前五行 print('获取文件中的前几行数据信息:',people.head()) print("-"*20) print('获取文件中的后几行数据信息:',people.tail()) print("-"*20) # 注意常见问题: # 1. 读取的时候,默认会将第一行作为列名,我们可以修改 people = pd.read_excel('~/Desktop/People.xlsx',header = 1) print(people.columns) 输出: 获取文件中的行和列: (19972, 6) -------------------- 获取文件中的列名: Index(['ID', 'Type', 'Title', 'FirstName', 'MiddleName', 'LastName'], dtype='object') -------------------- 获取文件中的前几行数据信息: ID Type Title FirstName MiddleName LastName 0 1 Employee NaN Ken J Sánchez 1 2 Employee NaN Terri Lee Duffy 2 3 Employee NaN Roberto NaN Tamburello 3 4 Employee NaN Rob NaN Walters 4 5 Employee Ms. Gail A Erickson -------------------- 获取文件中的后几行数据信息: ID Type Title FirstName MiddleName LastName 19967 20773 Individual Customer NaN Crystal NaN Guo 19968 20774 Individual Customer NaN Isabella F Richardson 19969 20775 Individual Customer NaN Crystal S He 19970 20776 Individual Customer NaN Crystal NaN Zheng 19971 20777 Individual Customer NaN Crystal NaN Hu -------------------- Index([1, 'Employee', 'NULL', 'Ken', 'J', 'Sánchez'], dtype='object')
# 2. 使用pandas读取文件 import pandas as pd #2. 如果第一行或者其他行不满足我们的需求时,我们可以自定义 # 第一种: 设置header为None,会使用默认的01234 people = pd.read_excel('~/Desktop/People.xlsx',header = None) print(people.columns) print("-"*20) print(people.head()) print("-"*20) # 第二种: 认为的设置默认值 people.columns = ['ID1','Type1','Title1','FirstName1','MiddleName1','LastName1'] print(people.columns) print("-"*20) print(people.head()) print("-"*20) # 重新存储 people.set_index('ID1',inplace = True) print(people.head()) print("-"*20) people.to_excel('./People1.xlsx') print('end') print("-"*20) # 注意读取数据的时候,会将ID1右作为一列输出出来,所以可以在读取的时候用参数指定一下 people1 = pd.read_excel('./People1.xlsx',index_col = "ID1") print(people1.head()) 输出: Int64Index([0, 1, 2, 3, 4, 5], dtype='int64') -------------------- 0 1 2 3 4 5 0 ID Type Title FirstName MiddleName LastName 1 1 Employee NaN Ken J Sánchez 2 2 Employee NaN Terri Lee Duffy 3 3 Employee NaN Roberto NaN Tamburello 4 4 Employee NaN Rob NaN Walters -------------------- Index(['ID1', 'Type1', 'Title1', 'FirstName1', 'MiddleName1', 'LastName1'], dtype='object') -------------------- ID1 Type1 Title1 FirstName1 MiddleName1 LastName1 0 ID Type Title FirstName MiddleName LastName 1 1 Employee NaN Ken J Sánchez 2 2 Employee NaN Terri Lee Duffy 3 3 Employee NaN Roberto NaN Tamburello 4 4 Employee NaN Rob NaN Walters -------------------- Type1 Title1 FirstName1 MiddleName1 LastName1 ID1 ID Type Title FirstName MiddleName LastName 1 Employee NaN Ken J Sánchez 2 Employee NaN Terri Lee Duffy 3 Employee NaN Roberto NaN Tamburello 4 Employee NaN Rob NaN Walters -------------------- end -------------------- Type1 Title1 FirstName1 MiddleName1 LastName1 ID1 ID Type Title FirstName MiddleName LastName 1 Employee NaN Ken J Sánchez 2 Employee NaN Terri Lee Duffy 3 Employee NaN Roberto NaN Tamburello 4 Employee NaN Rob NaN Walters
import pandas as pd # 指定读哪个表 sheet = pd.read_excel('~/Desktop/sheet.xlsx',sheet_name='sheet2') print(sheet.head()) print("-"*20) # 3. 如果数据在表格中没有顶格写时 # skiprows : 跳过几行 # usecols: 使用那几列(C,指的就是Excel上的ABCD....) book = pd.read_excel('~/Desktop/Books.xlsx',skiprows=3,usecols ="C:F") print(book.head()) 输出: ID age 0 0 18 1 1 19 -------------------- ID Name InStore 0 NaN Book_001 NaN 1 NaN Book_002 NaN 2 NaN Book_003 NaN 3 NaN Book_004 NaN 4 NaN Book_005 NaN
import pandas as pd book = pd.read_excel('~/Desktop/Books.xlsx',skiprows=3,usecols ="C:F") print(book['ID']) print("-"*20) # 修改ID 的值 book["ID"].at[0] = 1 print(book['ID']) print("-"*20) ''' at和iat函数是只能选择某个位置的值,iat是按照行索引和列索引的位置来选取数据的。而at是按照行索引和列索引来选 取数据; loc和iloc函数的功能包含at和iat函数的功能 ''' # 使用for循环给ID列添加内容 for i in book.index: book["ID"].at[i] = i+1 # # 注意: 添加后ID由于默认是float类型,修改一下类型 book["ID"]= book["ID"].astype(int) print(book['ID']) print("-"*20) # # 给inStore添加交替值 book["InStore"] = book["InStore"].astype(str) for i in book.index: book["InStore"].at[i] = 'Yes' if i%2==0 else "No" book.set_index('ID',inplace=True) book.to_excel('./books1.xlsx') 输出: 0 NaN 1 NaN 2 NaN 3 NaN 4 NaN 5 NaN 6 NaN 7 NaN 8 NaN 9 NaN 10 NaN 11 NaN 12 NaN 13 NaN 14 NaN 15 NaN 16 NaN 17 NaN 18 NaN 19 NaN Name: ID, dtype: float64 -------------------- 0 1.0 1 NaN 2 NaN 3 NaN 4 NaN 5 NaN 6 NaN 7 NaN 8 NaN 9 NaN 10 NaN 11 NaN 12 NaN 13 NaN 14 NaN 15 NaN 16 NaN 17 NaN 18 NaN 19 NaN Name: ID, dtype: float64 -------------------- 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 Name: ID, dtype: int64
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。