目录
2.1
numpy
import numpy as np
array = np.array([[1,2,3],[2,3,4]])
print(array)
print('number of dim:',array.ndim)
print('shape:',array.shape)
print('size:',array.size)
pandas
1,pandas 基本介绍
df2 = pd.DataFrame({'A':1.,
'B':pd.Timestamp('20130102'),
'C':pd.Series(1,index = list(range(4)),dtype = 'float32'),
'D':np.array([3]*4,dtype = 'int32'),
'E':pd.Categorical(["test","train","test","train"]),
'F':'foo'})
df2
2,pandas 选择数据
3,pandas 设置数值
import pandas as pd
import numpy as np
dates =pd.date_range('20130101',periods=6)
df= pd.DataFrame(np.arange(24).reshape((6,4)),index = dates,columns = ['A','B','C','D'])
df['E'] = pd.Series([1,2,3,4,5,6],index = pd.date_range('20130101',periods = 6))
4,pandas 处理丢失数据
print(df.iloc[[1,2,3],1:3])
df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df.fillna(value = 0)
pd.dropna(axis = 0,how ='any') #how ={'any','all'}
pd.dropna(axis = 1)