1:方括号里写数组,是对行进行操作,方括号里写字符串,是对列进行操作
df=df.sort_values(by='Count_AnimalName',ascending=False)
#print(df.head(5))
print(df[:20])
print(df['Row_Labels'])
print(type(b))
2:t3.loc(定位取值)
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
#print(t3)
a=t3.loc['a','z']
print(a)
#取第a行,列数不管它
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
#print(t3)
b=t3.loc['a',]
print(b)
#取列,不管行。
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
#print(t3)
c=t3.loc[:,'w']
print(c)
#取多行多列
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
#print(t3)
d=t3.loc[['a','c'],]
print(d)
f=t3.loc[['a','c'],[w,z]]
3:t3.iloc获取位置
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
#print(t3)
e=t3.iloc[1]
print(e)
#取第二列
3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
f=t3.iloc[:,[2,1]]
print(f)
#取连续的行和列
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
g=t3.iloc[0:,:2]
#赋值
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
t3.iloc[0:,:2]=30
print(t3)
#赋值为nan,可以直接赋值
#coding=utf-8
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('wxyz'))
t3.iloc[0:,:2]=np.nan
print(t3)
4:布尔值大于800
df=df.sort_values(by='Count_AnimalName',ascending=False)
#print(df.head(5))
# print(df[:20])
# print(df['Row_Labels'])
t=df[df['Count_AnimalName']>800]
print(t)
#大于800小于1000,需要用&号表示‘且’,用|号表示‘或’
不同条件之间要使用() 括起来。
#使用str.len()的方法
df=df.sort_values(by='Count_AnimalName',ascending=False)
#print(df.head(5))
# print(df[:20])
# print(df['Row_Labels'])
t=df[(800<df['Count_AnimalName'])&(df['Count_AnimalName']<1000)]
print(t)
df=df.sort_values(by='Count_AnimalName',ascending=False)
#print(df.head(5))
# print(df[:20])
# print(df['Row_Labels'])
t=df[(800<df['Count_AnimalName'])&(df['Count_AnimalName']<1000)]
#print(t)
h=df[(df['Row_Labels'].str.len()>3)&(df['Count_AnimalName']>700)]
print(h)
5:str的方法
df['info'].str.split('/')
6.tolist的方法(把Series转换成列表)
7:Panda中缺失数据的处理
#pd有2种方法,一种判断为NAN,一种判断不为NAN,isnull,notnull
t3.loc['a','z']=np.nan
#print(a)
#print(t3)
t3=t3[pd.notnull(t3['z'])]
print(t3)
#删除。
t3.loc[('a','b'),('z','y')]=np.nan
#print(a)
#print(t3)
# t3=t3[pd.notnull(t3['z'])]
#print(t3)
b=t3.dropna(axis=0,how='all',inplace=False)
print(b)
#填充数据
b=t3.dropna(axis=0,how='all',inplace=False)
#print(b)
c=t3.fillna(0)
print(c)
#填充均值
b=t3.dropna(axis=0,how='all',inplace=False)
#print(b)
c=t3.fillna(0)
#print(c)
#print(t3)
print(t3.mean())
c=t3.fillna(t3.mean())
print(c)
#填充一个列里的均值
b=t3.dropna(axis=0,how='all',inplace=False)
print(t3)
print('*'*100)
t3['y']=t3['y'].fillna(t3['y'].mean())
print(t3)
8:Panda处理NAN比较有人性化,不会把NAN算进去算均值等
#处理为0的数据:t[t==0]=np.nan