1.创建多层次索引
1.1 隐式构造
-
最常见的方法是给DataFrame构造函数的index参数传递两个或更多的数组
# 导入pandas
import numpy as np
import pandas as pd
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = [
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
]
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 40 | 51 | 30 | 16 | 57 | 45 |
李四 | 74 | 45 | 70 | 48 | 75 | 1 | |
王五 | 57 | 45 | 35 | 25 | 22 | 76 | |
2班 | 鲁班 | 93 | 80 | 69 | 31 | 17 | 29 |
张三丰 | 90 | 38 | 36 | 77 | 56 | 30 | |
张无忌 | 35 | 50 | 79 | 45 | 38 | 76 |
-
Series也可以创建多层索引
data = np.random.randint(0,100,size=6)
index = [
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
]
s = pd.Series(data=data,index=index)
s
1班 张三 7 李四 9 王五 57 2班 鲁班 88 张三丰 36 张无忌 5 dtype: int32
1.2 显式构造pd.MultiIndex
-
使用数组
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_arrays([
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
])
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 56 | 0 | 70 | 16 | 54 | 65 |
李四 | 11 | 99 | 94 | 66 | 82 | 51 | |
王五 | 37 | 16 | 71 | 4 | 82 | 72 | |
2班 | 鲁班 | 39 | 33 | 65 | 69 | 77 | 68 |
张三丰 | 53 | 15 | 23 | 99 | 79 | 7 | |
张无忌 | 53 | 30 | 18 | 95 | 7 | 36 |
-
使用tuple
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_tuples(
(
("1班","张三"),("1班","李四"),("1班","王五"),
("2班","鲁班"),("2班","张三丰"),("2班","张无忌")
)
)
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 27 | 89 | 20 | 7 | 5 | 1 |
李四 | 75 | 60 | 58 | 79 | 50 | 15 | |
王五 | 37 | 47 | 56 | 43 | 59 | 55 | |
2班 | 鲁班 | 41 | 25 | 43 | 71 | 46 | 37 |
张三丰 | 66 | 53 | 52 | 21 | 53 | 91 | |
张无忌 | 29 | 26 | 22 | 49 | 56 | 24 |
-
使用product
笛卡尔积:{a,b}{c,d} ==> {a,c},{a,d},{b,c},{b,d}
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_product([
["1班","2班"],
["张三","李四","王五"]
])
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 75 | 54 | 83 | 5 | 27 | 56 |
李四 | 52 | 63 | 1 | 10 | 63 | 84 | |
王五 | 67 | 6 | 5 | 96 | 45 | 16 | |
2班 | 张三 | 77 | 10 | 10 | 94 | 41 | 73 |
李四 | 86 | 34 | 51 | 50 | 18 | 87 | |
王五 | 9 | 73 | 91 | 43 | 38 | 45 |
2.多层列索引
除了行索引index,列索引columns也能用同样的方法创建多层索引
-
使用数组
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_arrays([
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
])
# 列索引
columns = pd.MultiIndex.from_arrays([
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
])
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 20 | 58 | 72 | 66 | 62 | 71 |
李四 | 67 | 22 | 63 | 46 | 16 | 21 | |
王五 | 34 | 92 | 15 | 1 | 74 | 30 | |
2班 | 鲁班 | 18 | 97 | 24 | 5 | 50 | 86 |
张三丰 | 58 | 24 | 17 | 32 | 49 | 52 | |
张无忌 | 50 | 33 | 26 | 38 | 41 | 82 |
-
使用tuple
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_tuples(
(
("1班","张三"),("1班","李四"),("1班","王五"),
("2班","鲁班"),("2班","张三丰"),("2班","张无忌")
)
)
# 列索引
columns = pd.MultiIndex.from_tuples(
(
("期中","语文"),("期中","数学"),("期中","英语"),
("期末","语文"),("期末","数学"),("期末","英语")
)
)
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 55 | 37 | 66 | 70 | 6 | 4 |
李四 | 38 | 32 | 79 | 79 | 78 | 2 | |
王五 | 80 | 49 | 56 | 51 | 32 | 19 | |
2班 | 鲁班 | 36 | 68 | 81 | 13 | 35 | 73 |
张三丰 | 94 | 56 | 94 | 45 | 15 | 34 | |
张无忌 | 78 | 5 | 93 | 57 | 24 | 65 |
-
使用product
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_product([
["1班","2班"],
["张三","李四","王五"]
])
# 列索引
columns = pd.MultiIndex.from_product([
["期中","期末"],
["语文","数学","英语"]
])
df = pd.DataFrame(data=data,index=index,columns=columns)
df
期中 | 期末 | ||||||
---|---|---|---|---|---|---|---|
语文 | 数学 | 英语 | 语文 | 数学 | 英语 | ||
1班 | 张三 | 1 | 18 | 17 | 4 | 94 | 72 |
李四 | 41 | 33 | 22 | 75 | 36 | 77 | |
王五 | 42 | 82 | 28 | 21 | 84 | 57 | |
2班 | 张三 | 18 | 64 | 9 | 0 | 41 | 80 |
李四 | 24 | 99 | 66 | 92 | 34 | 25 | |
王五 | 28 | 41 | 16 | 2 | 85 | 36 |