In [11]:
import numpy as np
import pandas as pd
In [12]:
left = pd.DataFrame({'p': [2, 5, 8], 'left_val': ['x', 'y', 'z']})
left
Out[12]:
p left_val
0 2 x
1 5 y
2 8 z
In [13]:
right = pd.DataFrame({'p': [1, 2, 4, 6, 7],
                      'right_val': [1, 2, 4, 6, 7]})
right
Out[13]:
p right_val
0 1 1
1 2 2
2 4 4
3 6 6
4 7 7
In [14]:
pd.merge_asof(left, right, on='p')
Out[14]:
p left_val right_val
0 2 x 2
1 5 y 4
2 8 z 7
In [15]:
pd.merge_asof(left, right, on='p', allow_exact_matches=False)
Out[15]:
p left_val right_val
0 2 x 1
1 5 y 4
2 8 z 7
In [16]:
pd.merge_asof(left, right, on='p', direction='forward')
Out[16]:
p left_val right_val
0 2 x 2.0
1 5 y 6.0
2 8 z NaN
In [17]:
pd.merge_asof(left, right, on='p', direction='nearest')
Out[17]:
p left_val right_val
0 2 x 2
1 5 y 4
2 8 z 7

You can use indexed DataFrames as well.

In [18]:
left = pd.DataFrame({'left_val': ['x', 'y', 'z']}, index=[2, 5, 8])
left
Out[18]:
left_val
2 x
5 y
8 z
In [19]:
right = pd.DataFrame({'right_val': [1, 2, 4, 6, 7]},
                     index=[1, 2, 4, 6, 7])
right
Out[19]:
right_val
1 1
2 2
4 4
6 6
7 7
In [20]:
pd.merge_asof(left, right, left_index=True, right_index=True)
Out[20]:
left_val right_val
2 x 2
5 y 4
8 z 7

Here is a real-world times-series example

In [23]:
quotes = pd.DataFrame({
    'time': pd.to_datetime(['20190220 13:30:00.023',
                            '20190220 13:30:00.023',
                            '20190220 13:30:00.030',
                            '20190220 13:30:00.041',
                            '20190220 13:30:00.048',
                            '20190220 13:30:00.049']),
    'ticker': ['GOOG', 'MSFT', 'MSFT','MSFT', 'GOOG', 'AAPL'],
    'bid': [720.50, 51.95, 51.97, 51.99,720.50, 97.99],
    'ask': [720.93, 51.96, 51.98, 52.00,720.93, 98.01]},
    columns=['time', 'ticker', 'bid', 'ask'])
In [24]:
quotes
Out[24]:
time ticker bid ask
0 2019-02-20 13:30:00.023 GOOG 720.50 720.93
1 2019-02-20 13:30:00.023 MSFT 51.95 51.96
2 2019-02-20 13:30:00.030 MSFT 51.97 51.98
3 2019-02-20 13:30:00.041 MSFT 51.99 52.00
4 2019-02-20 13:30:00.048 GOOG 720.50 720.93
5 2019-02-20 13:30:00.049 AAPL 97.99 98.01
In [25]:
trades = pd.DataFrame({
    'time': pd.to_datetime(['20190220 13:30:00.023',
                            '20190220 13:30:00.038',
                            '20190220 13:30:00.048',
                            '20190220 13:30:00.048',
                            '20190220 13:30:00.048']),
    'ticker': ['MSFT', 'MSFT','GOOG', 'GOOG', 'AAPL'],
    'price': [51.95, 51.95,720.77, 720.92, 98.00],
    'quantity': [75, 155,100, 100, 100]},
    columns=['time', 'ticker', 'price', 'quantity'])
In [26]:
trades
Out[26]:
time ticker price quantity
0 2019-02-20 13:30:00.023 MSFT 51.95 75
1 2019-02-20 13:30:00.038 MSFT 51.95 155
2 2019-02-20 13:30:00.048 GOOG 720.77 100
3 2019-02-20 13:30:00.048 GOOG 720.92 100
4 2019-02-20 13:30:00.048 AAPL 98.00 100

By default we are taking the asof of the quotes

In [27]:
pd.merge_asof(trades, quotes,
                      on='time',
                      by='ticker')
Out[27]:
time ticker price quantity bid ask
0 2019-02-20 13:30:00.023 MSFT 51.95 75 51.95 51.96
1 2019-02-20 13:30:00.038 MSFT 51.95 155 51.97 51.98
2 2019-02-20 13:30:00.048 GOOG 720.77 100 720.50 720.93
3 2019-02-20 13:30:00.048 GOOG 720.92 100 720.50 720.93
4 2019-02-20 13:30:00.048 AAPL 98.00 100 NaN NaN

We only asof within 2ms between the quote time and the trade time

In [28]:
pd.merge_asof(trades, quotes,
                       on='time',
                       by='ticker',
                       tolerance=pd.Timedelta('2ms'))
Out[28]:
time ticker price quantity bid ask
0 2019-02-20 13:30:00.023 MSFT 51.95 75 51.95 51.96
1 2019-02-20 13:30:00.038 MSFT 51.95 155 NaN NaN
2 2019-02-20 13:30:00.048 GOOG 720.77 100 720.50 720.93
3 2019-02-20 13:30:00.048 GOOG 720.92 100 720.50 720.93
4 2019-02-20 13:30:00.048 AAPL 98.00 100 NaN NaN

We only asof within 10ms between the quote time and the trade time and we exclude exact matches on time.
However prior data will propagate forward.

In [29]:
pd.merge_asof(trades, quotes,
                      on='time',
                      by='ticker',
                      tolerance=pd.Timedelta('10ms'),
                      allow_exact_matches=False)
Out[29]:
time ticker price quantity bid ask
0 2019-02-20 13:30:00.023 MSFT 51.95 75 NaN NaN
1 2019-02-20 13:30:00.038 MSFT 51.95 155 51.97 51.98
2 2019-02-20 13:30:00.048 GOOG 720.77 100 NaN NaN
3 2019-02-20 13:30:00.048 GOOG 720.92 100 NaN NaN
4 2019-02-20 13:30:00.048 AAPL 98.00 100 NaN NaN