# 一样,数据处理就先给导入pandas先
import pandas as pd

# df1==df2
df1 = pd.DataFrame({\'一班\':[90,80,66,75,99,55,76,78,98,None,90],
                   \'二班\':[75,98,100,None,77,45,None,66,56,80,57],
                   \'三班\':[45,89,77,67,65,100,None,75,64,88,99]})
df2 = pd.DataFrame({\'一班\':[90,80,66,75,99,55,76,78,98,None,90],
                  \'二班\':[75,98,100,None,77,45,None,66,56,80,57],
                  \'三班\':[45,89,77,67,65,100,None,75,64,88,99]})

 

1数据堆叠

  数据堆叠分为以下两种:

    • 行堆叠
    • 列堆叠

  pd.concat(objs, axis=0)

  • objs:参与合并的多个DataFrame。无默认
  • axis:表示轴向,axis=0表示行合并,axis=1表示列合并
pd.concat([df1, df2, df3], axis=1)
  一班 三班 二班 一班 三班 二班 一班 三班 二班
0 90.0 45.0 75.0 90.0 45.0 75.0 90.0 45.0 75.0
1 80.0 89.0 98.0 80.0 89.0 98.0 80.0 89.0 98.0
2 66.0 77.0 100.0 66.0 77.0 100.0 66.0 77.0 100.0
3 75.0 67.0 NaN 75.0 67.0 NaN 75.0 67.0 NaN
4 99.0 65.0 77.0 99.0 65.0 77.0 99.0 65.0 77.0
5 55.0 100.0 45.0 55.0 100.0 45.0 55.0 100.0 45.0
6 76.0 NaN NaN 76.0 NaN NaN 76.0 NaN NaN
7 78.0 75.0 66.0 78.0 75.0 66.0 78.0 75.0 66.0
8 98.0 64.0 56.0 98.0 64.0 56.0 98.0 64.0 56.0
9 NaN 88.0 80.0 NaN 88.0 80.0 NaN 88.0 80.0
10 90.0 99.0 57.0 90.0 99.0 57.0 90.0 99.0 57.0

 

  当然,如果axis=0(行堆叠)时,也可以使用append函数

# append 直接在末尾追加,注意特征数目相同,并且数据类型相同
df1.append(df2)
  一班 三班 二班
0 90.0 45.0 75.0
1 80.0 89.0 98.0
2 66.0 77.0 100.0
3 75.0 67.0 NaN
4 99.0 65.0 77.0
5 55.0 100.0 45.0
6 76.0 NaN NaN
7 78.0 75.0 66.0
8 98.0 64.0 56.0
9 NaN 88.0 80.0
10 90.0 99.0 57.0
0 90.0 45.0 75.0
1 80.0 89.0 98.0
2 66.0 77.0 100.0
3 75.0 67.0 NaN
4 99.0 65.0 77.0
5 55.0 100.0 45.0
6 76.0 NaN NaN
7 78.0 75.0 66.0
8 98.0 64.0 56.0
9 NaN 88.0 80.0
10 90.0 99.0 57.0

 

2主键合并

  主键合并大概是应用最关的合并方式了,也是我最喜欢的方式。

 

pd.merge(left, right, how=\’inner\’, on=None, left_on=None, right_on=None, suffixes=(\’_x\’, \’_y\’))

  • left:表示进行合并的左边的DataFrame。无默认。
  • right:表示进行合并的右边的DataFrame。无默认。
  • how:表示合并的方法。默认为\’inner\’。可取\’left\'(左连接),\’right\’(右连接),\’inner\’(内连接),\’outer\’(外连接)。
  • on:表示合并的主键。默认为空。
  • left_on:表示左边的合并主键。默认为空。
  • right_on:表示右边的合并主键。默认为空。
  • suffixes:表示列名相同的时候的后缀。默认为(\’_x\’, \’_y\’)

 

# 合并数据
pd.merge(df1, df2, on=\'一班\')
  一班 三班_x 二班_x 三班_y 二班_y
0 90.0 45.0 75.0 45.0 75.0
1 90.0 45.0 75.0 99.0 57.0
2 90.0 99.0 57.0 45.0 75.0
3 90.0 99.0 57.0 99.0 57.0
4 80.0 89.0 98.0 89.0 98.0
5 66.0 77.0 100.0 77.0 100.0
6 75.0 67.0 NaN 67.0 NaN
7 99.0 65.0 77.0 65.0 77.0
8 55.0 100.0 45.0 100.0 45.0
9 76.0 NaN NaN NaN NaN
10 78.0 75.0 66.0 75.0 66.0
11 98.0 64.0 56.0 64.0 56.0
12 NaN 88.0 80.0 88.0 80.0

 

pd.merge(df1, df2, left_on=\'一班\', right_on=\'二班\', suffixes=(\'_1\', \'_2))
  一班_1 三班_1 二班_1 一班_2 三班_2 二班_2
0 80.0 89.0 98.0 NaN 88.0 80.0
1 66.0 77.0 100.0 78.0 75.0 66.0
2 75.0 67.0 NaN 90.0 45.0 75.0
3 98.0 64.0 56.0 80.0 89.0 98.0
4 NaN 88.0 80.0 75.0 67.0 NaN
5 NaN 88.0 80.0 76.0 NaN NaN

 

3重叠合并

  不是特别建议,毕竟重叠合并没什么依据,而且浪费数据资源。

 

  DataFrame.combine_first(other) 重叠合并,当两者皆有以前者为准,为空时,则使用后者的补上。

df1[\'一班\'].combine_first(df1[\'二班\'])
0     90.0
1     80.0
2     66.0
3     75.0
4     99.0
5     55.0
6     76.0
7     78.0
8     98.0
9     80.0
10    90.0
Name: 一班, dtype: float64

版权声明:本文为WoLykos原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/WoLykos/p/9382201.html