Pandas 连接合并追加操作
concat
concat(objs, axis=0, join='outer',
ignore_index: bool = False, keys = None, levels = None, names = None, verify_integrity: bool = False, sort: bool = False, copy: bool = True)
连接 2 个
Series
s1 = pd.DataFrame([1,2,3]) s2 = pd.DataFrame([4,6,5]) s1 0 0 1 1 2 2 3 s2 0 0 4 1 6 2 5 pd.concat([s1, s2]) 0 0 1 1 2 2 3 0 4 1 6 2 5 # 忽略索引 pd.concat([s1, s2], ignore_index=True) 0 0 1 1 2 2 3 3 4 4 6 5 5
连接 2个
DataFrame
# 普通连接(行) df1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number']) df2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number']) df1 letter number 0 a 1 1 b 2 df2 letter number 0 c 3 1 d 4 pd.concat([df1, df2]) letter number 0 a 1 1 b 2 0 c 3 1 d 4 # 普通连接(列) pd.concat([df1, df2], axis=1) letter number letter number 0 a 1 c 3 1 b 2 d 4 # 如果字段不相同,填充 `Nan` df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], columns=['letter', 'number', 'animal']) df3 letter number animal 0 c 3 cat 1 d 4 dog pd.concat([df1, df3], sort=False) letter number animal 0 a 1 NaN 1 b 2 NaN 0 c 3 cat 1 d 4 dog # 内连接(只连接相同字段) pd.concat([df1, df3], join='inner') letter number 0 a 1 1 b 2 0 c 3 1 d 4 # 排序后,列拼接 pd.concat([df1, df2], axis=1) letter number letter number 0 a 1 c 3 1 b 2 d 4 df2.sort_values('number', ascending=False, inplace=True) # 这一步至关重要 df2.reset_index(drop=True, inplace=True) pd.concat([df1, df2], axis=1) letter number letter number 0 a 1 d 4 1 b 2 c 3
merge
merge(left, right, how: str = 'inner', on = None, left_on = None, right_on = None, left_index: bool = False,
right_index: bool = False, sort: bool = False, suffixes = ('_x', '_y'), copy: bool = True,
indicator: bool = False, validate = None)
普通合并
df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]}) df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]}) df1 lkey value 0 foo 1 1 bar 2 2 baz 3 3 foo 5 df2 rkey value 0 foo 5 1 bar 6 2 baz 7 3 foo 8 df1.merge(df2, left_on='lkey', right_on='rkey') lkey value_x rkey value_y 0 foo 1 foo 5 1 foo 1 foo 8 2 foo 5 foo 5 3 foo 5 foo 8 4 bar 2 bar 6 5 baz 3 baz 7 df1.merge(df2, left_on='lkey', right_on='rkey',suffixes=('_left', '_right')) lkey value_left rkey value_right 0 foo 1 foo 5 1 foo 1 foo 8 2 foo 5 foo 5 3 foo 5 foo 8 4 bar 2 bar 6 5 baz 3 baz 7
内、左、右连接
df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) df1 a b 0 foo 1 1 bar 2 df2 a c 0 foo 3 1 baz 4 df1.merge(df2, how='inner', on='a') a b c 0 foo 1 3 df1.merge(df2, how='left', on='a') a b c 0 foo 1 3.0 1 bar 2 NaN df1.merge(df2, how='right', on='a') a b c 0 foo 1.0 3 1 baz NaN 4 # 相同值,但是不同的字段名,左连接 df3 = pd.DataFrame({'d': ['foo', 'baz'], 'c': [3, 4]}) df3 d c 0 foo 3 1 baz 4 df1.merge(df3, how='left', left_on='a', right_on='d') a b d c 0 foo 1 foo 3.0 1 bar 2 NaN NaN
笛卡尔积
df1 = pd.DataFrame({'left': ['foo', 'bar']}) df2 = pd.DataFrame({'right': [7, 8]}) df1 left 0 foo 1 bar df2 right 0 7 1 8 df1.merge(df2, how='cross') left right 0 foo 7 1 foo 8 2 bar 7 3 bar 8
append
append(other, ignore_index=False, verify_integrity=False, sort=False)
普通追加
df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) df A B 0 1 2 1 3 4 df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) df2 A B 0 5 6 1 7 8 df.append(df2) A B 0 1 2 1 3 4 0 5 6 1 7 8 df.append(df2, ignore_index=True) A B 0 1 2 1 3 4 2 5 6 3 7 8 # 通过 for 循环追加 df = pd.DataFrame(columns=['A']) for i in range(5): df = df.append({'A': i}, ignore_index=True) df A 0 0 1 1 2 2 3 3 4 4 # 等价于 concat 连接,如下 pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)],ignore_index=True) A 0 0 1 1 2 2 3 3 4 4