import numpy as np
import pandas as pd
left = pd.DataFrame({'p': [2, 5, 8], 'left_val': ['x', 'y', 'z']})
left
right = pd.DataFrame({'p': [1, 2, 4, 6, 7],
'right_val': [1, 2, 4, 6, 7]})
right
pd.merge_asof(left, right, on='p')
pd.merge_asof(left, right, on='p', allow_exact_matches=False)
pd.merge_asof(left, right, on='p', direction='forward')
pd.merge_asof(left, right, on='p', direction='nearest')
You can use indexed DataFrames as well.
left = pd.DataFrame({'left_val': ['x', 'y', 'z']}, index=[2, 5, 8])
left
right = pd.DataFrame({'right_val': [1, 2, 4, 6, 7]},
index=[1, 2, 4, 6, 7])
right
pd.merge_asof(left, right, left_index=True, right_index=True)
Here is a real-world times-series example
quotes = pd.DataFrame({
'time': pd.to_datetime(['20190220 13:30:00.023',
'20190220 13:30:00.023',
'20190220 13:30:00.030',
'20190220 13:30:00.041',
'20190220 13:30:00.048',
'20190220 13:30:00.049']),
'ticker': ['GOOG', 'MSFT', 'MSFT','MSFT', 'GOOG', 'AAPL'],
'bid': [720.50, 51.95, 51.97, 51.99,720.50, 97.99],
'ask': [720.93, 51.96, 51.98, 52.00,720.93, 98.01]},
columns=['time', 'ticker', 'bid', 'ask'])
quotes
trades = pd.DataFrame({
'time': pd.to_datetime(['20190220 13:30:00.023',
'20190220 13:30:00.038',
'20190220 13:30:00.048',
'20190220 13:30:00.048',
'20190220 13:30:00.048']),
'ticker': ['MSFT', 'MSFT','GOOG', 'GOOG', 'AAPL'],
'price': [51.95, 51.95,720.77, 720.92, 98.00],
'quantity': [75, 155,100, 100, 100]},
columns=['time', 'ticker', 'price', 'quantity'])
trades
By default we are taking the asof of the quotes
pd.merge_asof(trades, quotes,
on='time',
by='ticker')
We only asof within 2ms between the quote time and the trade time
pd.merge_asof(trades, quotes,
on='time',
by='ticker',
tolerance=pd.Timedelta('2ms'))
We only asof within 10ms between the quote time and the trade time and we exclude exact matches on time.
However prior data will propagate forward.
pd.merge_asof(trades, quotes,
on='time',
by='ticker',
tolerance=pd.Timedelta('10ms'),
allow_exact_matches=False)