0

I have a tables that looks like this:

create_table "cboe_raws", force: :cascade do |t|
t.date "quote_date", null: false
t.string "underlying_symbol", null: false
t.string "security_type", null: false
t.string "option_symbol", null: false
t.date "expiration_date", null: false
t.float "strike_price", default: 0.0, null: false
t.string "call_put_flag", null: false
t.integer "days_to_expire", default: 0, null: false
t.string "series_type", null: false
t.float "first_trade_price", default: 0.0, null: false
t.float "high_trade_price", default: 0.0, null: false
t.float "low_trade_price", default: 0.0, null: false
t.float "last_trade_price", default: 0.0, null: false
t.integer "total_exchange_vol", default: 0, null: false
t.integer "total_industry_vol", default: 0, null: false
t.integer "open_interest", default: 0, null: false
t.integer "previous_open_interest", default: 0, null: false
t.float "previous_close", default: 0.0, null: false
t.integer "firm_open_buy_qty", default: 0, null: false
t.integer "firm_open_buy_vol", default: 0, null: false
t.integer "firm_close_buy_qty", default: 0, null: false
t.integer "firm_close_buy_vol", default: 0, null: false
t.integer "firm_open_sell_qty", default: 0, null: false
t.integer "firm_open_sell_vol", default: 0, null: false
t.integer "firm_close_sell_qty", default: 0, null: false
t.integer "firm_close_sell_vol", default: 0, null: false
t.integer "bd_open_buy_qty", default: 0, null: false
t.integer "bd_open_buy_vol", default: 0, null: false
t.integer "bd_close_buy_qty", default: 0, null: false
t.integer "bd_close_buy_vol", default: 0, null: false
t.integer "bd_open_sell_qty", default: 0, null: false
t.integer "bd_open_sell_vol", default: 0, null: false
t.integer "bd_close_sell_qty", default: 0, null: false
t.integer "bd_close_sell_vol", default: 0, null: false
t.integer "mm_buy_qty", default: 0, null: false
t.integer "mm_buy_vol", default: 0, null: false
t.integer "mm_sell_qty", default: 0, null: false
t.integer "mm_sell_vol", default: 0, null: false
t.integer "cust_lt_100_open_buy_qty", default: 0, null: false
t.integer "cust_lt_100_open_buy_vol", default: 0, null: false
t.integer "cust_lt_100_close_buy_qty", default: 0, null: false
t.integer "cust_lt_100_close_buy_vol", default: 0, null: false
t.integer "cust_lt_100_open_sell_qty", default: 0, null: false
t.integer "cust_lt_100_open_sell_vol", default: 0, null: false
t.integer "cust_lt_100_close_sell_qty", default: 0, null: false
t.integer "cust_lt_100_close_sell_vol", default: 0, null: false
t.integer "cust_100_199_open_buy_qty", default: 0, null: false
t.integer "cust_100_199_open_buy_vol", default: 0, null: false
t.integer "cust_gt_199_clsoe_buy_qty", default: 0, null: false
t.integer "cust_100_199_close_buy_vol", default: 0, null: false
t.integer "cust_100_199_open_sell_qty", default: 0, null: false
t.integer "cust_100_199_open_sell_vol", default: 0, null: false
t.integer "cust_100_199_close_sell_qty", default: 0, null: false
t.integer "cust_100_199_close_sell_vol", default: 0, null: false
t.integer "cust_gt_199_open_buy_qty", default: 0, null: false
t.integer "cust_gt_199_open_buy_vol", default: 0, null: false
t.integer "cust_gt_199_close_buy_qty", default: 0, null: false
t.integer "cust_gt_199_close_buy_vol", default: 0, null: false
t.integer "cust_gt_199_open_sell_qty", default: 0, null: false
t.integer "cust_gt_199_open_sell_vol", default: 0, null: false
t.integer "cust_gt_199_close_sell_qty", default: 0, null: false
t.integer "cust_gt_199_close_sell_vol", default: 0, null: false
t.integer "procust_lt_100_open_buy_qty", default: 0, null: false
t.integer "procust_lt_100_open_buy_vol", default: 0, null: false
t.integer "procust_lt_100_close_buy_qty", default: 0, null: false
t.integer "procust_lt_100_close_buy_vol", default: 0, null: false
t.integer "procust_lt_100_open_sell_qty", default: 0, null: false
t.integer "procust_lt_100_open_sell_vol", default: 0, null: false
t.integer "procust_lt_100_close_sell_qty", default: 0, null: false
t.integer "procust_lt_100_close_sell_vol", default: 0, null: false
t.integer "procust_100_199_open_buy_qty", default: 0, null: false
t.integer "procust_100_199_open_buy_vol", default: 0, null: false
t.integer "procust_100_199_close_buy_qty", default: 0, null: false
t.integer "procust_100_199_close_buy_vol", default: 0, null: false
t.integer "procust_100_199_open_sell_qty", default: 0, null: false
t.integer "procust_100_199_open_sell_vol", default: 0, null: false
t.integer "procust_100_199_close_sell_qty", default: 0, null: false
t.integer "procust_100_199_close_sell_vol", default: 0, null: false
t.integer "procust_gt_199_open_buy_qty", default: 0, null: false
t.integer "procust_gt_199_open_buy_vol", default: 0, null: false
t.integer "procust_gt_199_close_buy_vol", default: 0, null: false
t.integer "procust_gt_199_open_sell_qty", default: 0, null: false
t.integer "procust_gt_199_open_sell_vol", default: 0, null: false
t.integer "procust_gt_199_close_sell_qty", default: 0, null: false
t.integer "procust_gt_199_close_sell_vol", default: 0, null: false
t.boolean "active", default: true, null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "cust_100_199_close_buy_qty"
t.integer "procust_gt_199_close_buy_qty"
t.index ["active"], name: "index_cboe_raws_on_active"
t.index ["call_put_flag"], name: "index_cboe_raws_on_call_put_flag"
t.index ["expiration_date"], name: "index_cboe_raws_on_expiration_date"
t.index ["option_symbol"], name: "index_cboe_raws_on_option_symbol"
t.index ["quote_date"], name: "index_cboe_raws_on_quote_date"
t.index ["strike_price"], name: "index_cboe_raws_on_strike_price"

I then ran a migration to add indexes:

class IndexTable < ActiveRecord::Migration[7.0]
 def change
 add_index :cboe_raws, :expiration_date
 add_index :cboe_raws, :call_put_flag
 add_index :cboe_raws, :quote_date
 add_index :cboe_raws, :active
 add_index :cboe_raws, :option_symbol
 add_index :cboe_raws, :strike_price
 end
end

I then created a DB function as follows:

CREATE OR REPLACE FUNCTION market_maker_books (symbol VARCHAR)
 RETURNS TABLE (
 contract_underlying_symbol VARCHAR,
 contract_expiration_date DATE,
 contract_strike_price DOUBLE PRECISION,
 contract_call_put_flag VARCHAR,
 contract_mm_buy_vol_sum BIGINT,
 contract_mm_sell_vol_sum BIGINT,
 contract_buys_minus_sells BIGINT
 )
 AS $$
 BEGIN
 RETURN QUERY SELECT underlying_symbol,
 expiration_date,
 strike_price,
 call_put_flag,
 SUM(mm_buy_vol) AS mm_buy_vol_sum,
 SUM(mm_sell_vol) AS mm_sell_vol_sum,
 SUM(mm_buy_vol) - SUM(mm_sell_vol) AS buys_minus_sells
 FROM cboe_raws
 WHERE underlying_symbol = symbol AND expiration_date >= CURRENT_DATE
 GROUP BY underlying_symbol, expiration_date, strike_price, call_put_flag
 ORDER BY expiration_date, call_put_flag, strike_price;
 END; $$
 LANGUAGE 'plpgsql';

Calling the function above takes about 18-19 seconds on a databse with 8.6 million rows. I need that to be much much faster, because this database is only going to grow.

Does anyone have suggestions on how I can speed this up?

Here is the explain:

Function Scan on market_maker_books (cost=0.05..3.05 rows=1000 width=164) (actual time=241.870..241.906 rows=491 loops=1)
 Buffers: shared hit=678 read=1251
 I/O Timings: read=194.877
Planning Time: 0.029 ms
Execution Time: 241.956 ms
Erwin Brandstetter
186k28 gold badges463 silver badges636 bronze badges
asked May 20, 2022 at 17:43
8
  • 1
    Please consider reading this advice. Any particular reason you complicate things by wrapping a pretty simple query into a function? Commented May 20, 2022 at 17:49
  • @mustaccio I guess I just wanted it as an available function in the DB. I have another query based off this function. Am I taking a performance hit by wrapping it in a function? I create the function in a database migration Commented May 20, 2022 at 17:52
  • @Jrich138: there is a performance overhead in calling a PL/pgSQL however that's only something you would notice when calling the function many times. But to help you we need the execution plan of the query (not the function) using explain (analyze, buffers, format text) (not just a "simple" explain) as formatted text and make sure you preserve the indention of the plan. Paste the text, then put ``` on the line before the plan and on a line after the plan. Commented May 20, 2022 at 18:22
  • 1
    Btw: table and index definitions are better shown as SQL statement (CREATE TABLE, CREATE INDEX). I for one am not sure what that pseudo-code is really doing Commented May 20, 2022 at 18:22
  • @a_horse_with_no_name Ah! Got it. Thank you. I will try to use explain. I have never used it Commented May 20, 2022 at 22:00

1 Answer 1

1

Assuming current Postgres 14 for lack of declaration.

Index

A multicolumn B-tree index on (underlying_symbol, expiration_date) would make your query fast. Any variant will do as long as underlying_symbol is the leading column: equality first, range later. See:

Even a plain index on just (underlying_symbol) might go a long way - depending on undisclosed data distribution.

Seeing that your table has very wide rows, a "covering" index might be ideal - if you get index-only scans out of it:

CREATE INDEX foo ON cboe_raws (underlying_symbol, expiration_date DESC NULLS LAST)
INCLUDE (strike_price, call_put_flag, mm_buy_vol, mm_sell_vol, mm_buy_vol);

Function

You typically don't need PL/pgSQL for a single, plain SQL query. Use an SQL function instead. I suggest the new standard-SQL variant:

CREATE OR REPLACE FUNCTION market_maker_books(_symbol varchar)
 RETURNS TABLE (contract_underlying_symbol varchar
 , contract_expiration_date date
 , contract_strike_price float8
 , contract_call_put_flag varchar
 , contract_mm_buy_vol_sum bigint
 , contract_mm_sell_vol_sum bigint
 , contract_buys_minus_sells bigint)
 LANGUAGE sql STABLE STRICT PARALLEL SAFE
BEGIN ATOMIC
SELECT c.underlying_symbol
 , c.expiration_date
 , c.strike_price
 , c.call_put_flag
 , SUM(c.mm_buy_vol) -- AS mm_buy_vol_sum
 , SUM(c.mm_sell_vol) -- AS mm_sell_vol_sum
 , SUM(c.mm_buy_vol) - SUM(c.mm_sell_vol) -- AS buys_minus_sells
FROM cboe_raws c
WHERE c.underlying_symbol = _symbol
AND c.expiration_date >= CURRENT_DATE
GROUP BY c.underlying_symbol, c.expiration_date, c.strike_price, c.call_put_flag
ORDER BY c.expiration_date, c.call_put_flag, c.strike_price;
END;

See:

Might be untangled and compacted further:

CREATE OR REPLACE FUNCTION market_maker_books(_symbol varchar)
 RETURNS TABLE (expiration_date date
 , call_put_flag varchar
 , strike_price float8
 , mm_buy_vol_sum bigint
 , mm_sell_vol_sum bigint
 , buys_minus_sells bigint)
 LANGUAGE sql STABLE STRICT PARALLEL SAFE
BEGIN ATOMIC
SELECT c.expiration_date, c.call_put_flag, c.strike_price
 , SUM(c.mm_buy_vol)
 , SUM(c.mm_sell_vol)
 , SUM(c.mm_buy_vol) - SUM(c.mm_sell_vol)
FROM cboe_raws c
WHERE c.underlying_symbol = _symbol
AND c.expiration_date >= CURRENT_DATE
GROUP BY 1, 2, 3
ORDER BY 1, 2, 3;
END;
answered May 20, 2022 at 23:52

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.