View optimization in postgresql -

i have aggregate last 'reading' each table in single view in order optimize database access i've noticed executing many single queries cost less using view i'm wondering if there wrong in view or can optimized.

here tables:

create table hives(   id character(20) not null,   master character(20) default null::bpchar,   owner integer,   [...]   constraint hives_pkey primary key (id),   constraint hives_master_extk foreign key (master)       references hives (id) match simple       on update cascade on delete set null,   constraint hives_owner_extk foreign key (owner)       references users (id) match simple       on update cascade on delete cascade )  create table dt_rain(   hive character(20) not null,   hiveconnection integer,   instant timestamp time zone not null,   rain integer,   constraint dt_rain_pkey primary key (hive, instant),   constraint dt_rain_hive_connections_extk foreign key (hiveconnection)       references hives_connections (id) match simple       on update cascade on delete set null,   constraint dt_rain_hive_extk foreign key (hive)       references hives (id) match simple       on update cascade on delete cascade )  create table dt_temperature (   hive character(20) not null,   hiveconnection integer,   instant timestamp time zone not null,   internal integer,   external integer,   constraint dt_temperature_pkey primary key (hive, instant),   constraint dt_temperature_hive_connections_extk foreign key (hiveconnection)       references hives_connections (id) match simple       on update cascade on delete set null,   constraint dt_temperature_hive_extk foreign key (hive)       references hives (id) match simple       on update cascade on delete cascade )

every data table holds history of readings , large , share same format: hive (key hives table), instant, data.

i'm interested in obtaining last value here view:

create or replace view dt_last select id      hive,      b.instant inout_instant, "input", "output", timeout,      c.instant temperature_instant, "internal", "external",      d.instant weight_instant, weight,      e.instant rain_instant, rain,      f.instant voltage_instant, operational, panel, cell,      g.instant gps_instant, latitude, longitude, altitude hives  left outer join (     select hive, instant, "input", "output", timeout dt_inout_summary x         x.instant = (             select max(x1.instant) dt_inout_summary x1 x1.hive = x.hive         )     ) b on (id = b.hive)  left outer join (     select hive, instant, "internal", "external" dt_temperature x         x.instant = (             select max(x1.instant) dt_temperature x1 x1.hive = x.hive         )     ) c on (id = c.hive)  left outer join (     select hive, instant, weight dt_weight x         x.instant = (             select max(x1.instant) dt_weight x1 x1.hive = x.hive         )     ) d on (id = d.hive)  left outer join (     select hive, instant, rain dt_rain x         x.instant = (             select max(x1.instant) dt_inout_summary x1 x1.hive = x.hive         )     ) e on (id = e.hive)  left outer join (     select hive, instant, operational, panel, cell dt_voltage x         x.instant = (             select max(x1.instant) dt_inout_summary x1 x1.hive = x.hive         )     ) f on (id = f.hive)  left outer join (     select hive, instant, latitude, longitude, altitude dt_gps x         x.instant = (             select max(x1.instant) dt_gps x1 x1.hive = x.hive         )     ) g on (id = g.hive)

selecting view costs 1 second per record wich more expensive executing select * hive='' order instant desc limit 1; 6 times per hive. i'm puzzled

here graphical view of query analyzer followed explain analyze output planner

    merge left join  (cost=127051.81..264142.76 rows=37741 width=153) (actual time=8862.466..8862.564 rows=13 loops=1)   merge cond: (hives.id = x_1.hive)   ->  nested loop left join  (cost=0.29..136523.11 rows=801 width=137) (actual time=4198.324..4198.395 rows=13 loops=1)         join filter: (hives.id = x_5.hive)         rows removed join filter: 36         ->  nested loop left join  (cost=0.29..136505.79 rows=801 width=85) (actual time=4198.300..4198.363 rows=13 loops=1)               join filter: (hives.id = x_4.hive)               ->  nested loop left join  (cost=0.29..135968.88 rows=801 width=65) (actual time=4198.254..4198.315 rows=13 loops=1)                     join filter: (hives.id = x_3.hive)                     rows removed join filter: 12                     ->  nested loop left join  (cost=0.29..135407.01 rows=801 width=53) (actual time=4198.171..4198.227 rows=13 loops=1)                           join filter: (hives.id = x.hive)                           rows removed join filter: 108                           ->  nested loop left join  (cost=0.29..345.31 rows=17 width=33) (actual time=0.011..0.049 rows=13 loops=1)                                 ->  index scan using hives_pkey on hives  (cost=0.14..12.39 rows=17 width=21) (actual time=0.005..0.011 rows=13 loops=1)                                       heap fetches: 13                                 ->  index scan using dt_weight_pkey on dt_weight x_2  (cost=0.15..19.57 rows=1 width=96) (actual time=0.002..0.002 rows=0 loops=13)                                       index cond: (hives.id = hive)                                       filter: (instant = (subplan 6))                                       subplan 6                                         ->  result  (cost=5.50..5.51 rows=1 width=0) (actual time=0.009..0.009 rows=1 loops=1)                                               initplan 5 (returns $5)                                                 ->  limit  (cost=0.15..5.50 rows=1 width=8) (actual time=0.007..0.007 rows=1 loops=1)                                                       ->  index scan backward using dt_weight_pkey on dt_weight x1_2  (cost=0.15..16.21 rows=3 width=8) (actual time=0.003..0.003 rows=1 loops=1)                                                             index cond: ((hive = x_2.hive) , (instant not null))                                                             heap fetches: 1                           ->  materialize  (cost=0.00..134859.45 rows=801 width=41) (actual time=198.865..322.935 rows=9 loops=13)                                 ->  seq scan on dt_inout_summary x  (cost=0.00..134855.44 rows=801 width=41) (actual time=2585.238..4198.146 rows=9 loops=1)                                       filter: (instant = (subplan 2))                                       rows removed filter: 160099                                       subplan 2                                         ->  result  (cost=0.81..0.82 rows=1 width=0) (actual time=0.026..0.026 rows=1 loops=160108)                                               initplan 1 (returns $1)                                                 ->  limit  (cost=0.42..0.81 rows=1 width=8) (actual time=0.025..0.025 rows=1 loops=160108)                                                       ->  index scan backward using test on dt_inout_summary x1  (cost=0.42..6929.02 rows=17790 width=8) (actual time=0.025..0.025 rows=1 loops=160108)                                                             index cond: ((hive = x.hive) , (instant not null))                                                             heap fetches: 160108                     ->  materialize  (cost=0.00..525.83 rows=3 width=96) (actual time=0.006..0.006 rows=1 loops=13)                           ->  seq scan on dt_rain x_3  (cost=0.00..525.82 rows=3 width=96) (actual time=0.076..0.076 rows=1 loops=1)                                 filter: (instant = (subplan 8))                                 rows removed filter: 2                                 subplan 8                                   ->  result  (cost=0.81..0.82 rows=1 width=0) (actual time=0.023..0.023 rows=1 loops=3)                                         initplan 7 (returns $7)                                           ->  limit  (cost=0.42..0.81 rows=1 width=8) (actual time=0.023..0.023 rows=1 loops=3)                                                 ->  index scan backward using test on dt_inout_summary x1_3  (cost=0.42..6929.02 rows=17790 width=8) (actual time=0.022..0.022 rows=1 loops=3)                                                       index cond: ((hive = x_3.hive) , (instant not null))                                                       heap fetches: 3               ->  materialize  (cost=0.00..500.88 rows=3 width=104) (actual time=0.004..0.004 rows=0 loops=13)                     ->  seq scan on dt_voltage x_4  (cost=0.00..500.86 rows=3 width=104) (actual time=0.045..0.045 rows=0 loops=1)                           filter: (instant = (subplan 10))                           rows removed filter: 2                           subplan 10                             ->  result  (cost=0.81..0.82 rows=1 width=0) (actual time=0.021..0.021 rows=1 loops=2)                                   initplan 9 (returns $9)                                     ->  limit  (cost=0.42..0.81 rows=1 width=8) (actual time=0.020..0.020 rows=1 loops=2)                                           ->  index scan backward using test on dt_inout_summary x1_4  (cost=0.42..6929.02 rows=17790 width=8) (actual time=0.019..0.019 rows=1 loops=2)                                                 index cond: ((hive = x_4.hive) , (instant not null))                                                 heap fetches: 2         ->  materialize  (cost=0.00..5.30 rows=1 width=136) (actual time=0.001..0.002 rows=3 loops=13)               ->  seq scan on dt_gps x_5  (cost=0.00..5.30 rows=1 width=136) (actual time=0.009..0.018 rows=3 loops=1)                     filter: (instant = (subplan 11))                     rows removed filter: 3                     subplan 11                       ->  aggregate  (cost=1.05..1.06 rows=1 width=8) (actual time=0.002..0.002 rows=1 loops=6)                             ->  seq scan on dt_gps x1_5  (cost=0.00..1.05 rows=1 width=8) (actual time=0.001..0.001 rows=3 loops=6)                                   filter: (hive = x_5.hive)                                   rows removed filter: 3   ->  sort  (cost=127051.53..127053.53 rows=801 width=37) (actual time=4664.139..4664.139 rows=9 loops=1)         sort key: x_1.hive         sort method: quicksort  memory: 25kb         ->  seq scan on dt_temperature x_1  (cost=0.00..127012.90 rows=801 width=37) (actual time=2859.376..4664.118 rows=9 loops=1)               filter: (instant = (subplan 4))               rows removed filter: 160098               subplan 4                 ->  result  (cost=0.76..0.77 rows=1 width=0) (actual time=0.029..0.029 rows=1 loops=160107)                       initplan 3 (returns $3)                         ->  limit  (cost=0.42..0.76 rows=1 width=8) (actual time=0.028..0.028 rows=1 loops=160107)                               ->  index scan backward using dt_temperature_pkey on dt_temperature x1_1  (cost=0.42..6834.25 rows=20013 width=8) (actual time=0.028..0.028 rows=1 loops=160107)                                     index cond: ((hive = x_1.hive) , (instant not null))                                     heap fetches: 160107 total runtime: 8862.684 ms

is there way optimize view in way?

=== edit suggested joop max replaced not exists , indexes (hive, instant desc)

create or replace view dt_last4   select hives.id hive,     b.instant inout_instant,     b.input,     b.output,     b.timeout,     c.instant temperature_instant,     c.internal,     c.external,     d.instant weight_instant,     d.weight,     e.instant rain_instant,     e.rain,     f.instant voltage_instant,     f.operational,     f.panel,     f.cell,     g.instant gps_instant,     g.latitude,     g.longitude,     g.altitude    hives      left join dt_inout_summary b on b.hive = hives.id , not (exists ( select 1            dt_inout_summary nx           nx.hive = b.hive , nx.instant > b.instant))      left join dt_temperature c on c.hive = hives.id , not (exists ( select 1            dt_temperature nx           nx.hive = c.hive , nx.instant > c.instant))      left join dt_weight d on d.hive = hives.id , not (exists ( select 1            dt_weight nx           nx.hive = d.hive , nx.instant > d.instant))      left join dt_rain e on e.hive = hives.id , not (exists ( select 1            dt_rain nx           nx.hive = e.hive , nx.instant > e.instant))      left join dt_voltage f on f.hive = hives.id , not (exists ( select 1            dt_voltage nx           nx.hive = f.hive , nx.instant > f.instant))      left join dt_gps g on g.hive = hives.id , not (exists ( select 1            dt_gps nx           nx.hive = g.hive , nx.instant > g.instant));

explain analyze: enter image description here

hash left join  (cost=18746.60..17942207.21 rows=670182787 width=153) (actual time=771.791..3488.536 rows=13 loops=1)   hash cond: (hives.id = e.hive)   ->  hash left join  (cost=18744.45..14640569.26 rows=670182787 width=141) (actual time=771.776..3488.513 rows=13 loops=1)         hash cond: (hives.id = f.hive)         ->  hash left join  (cost=18742.35..11733156.54 rows=670182787 width=121) (actual time=771.759..3488.486 rows=13 loops=1)               hash cond: (hives.id = g.hive)               ->  hash right join  (cost=18740.04..7643068.10 rows=670182787 width=69) (actual time=771.730..3488.447 rows=13 loops=1)                     hash cond: (b.hive = hives.id)                     ->  nested loop anti join  (cost=0.84..81545.46 rows=106739 width=41) (actual time=585.381..3301.662 rows=9 loops=1)                           ->  index scan using dt_inout_summary_hive_idx on dt_inout_summary b  (cost=0.42..11549.35 rows=160108 width=41) (actual time=0.012..33.690 rows=160108 loops=1)                           ->  index scan using dt_inout_summary_hive_instant_idx on dt_inout_summary nx  (cost=0.42..119.09 rows=5930 width=29) (actual time=0.020..0.020 rows=1 loops=160108)                                 index cond: ((hive = b.hive) , (instant > b.instant))                                 heap fetches: 160099                     ->  hash  (cost=16361.97..16361.97 rows=106738 width=49) (actual time=186.324..186.324 rows=13 loops=1)                           buckets: 2048  batches: 16  memory usage: 1kb                           ->  hash right join  (cost=6197.92..16361.97 rows=106738 width=49) (actual time=109.280..186.247 rows=13 loops=1)                                 hash cond: (c.hive = hives.id)                                 ->  hash anti join  (cost=6194.41..14890.81 rows=106738 width=37) (actual time=109.245..186.196 rows=9 loops=1)                                       hash cond: (c.hive = nx_1.hive)                                       join filter: (nx_1.instant > c.instant)                                       rows removed join filter: 195309                                       ->  seq scan on dt_temperature c  (cost=0.00..3098.07 rows=160107 width=37) (actual time=0.004..18.177 rows=160107 loops=1)                                       ->  hash  (cost=3098.07..3098.07 rows=160107 width=29) (actual time=48.792..48.792 rows=160107 loops=1)                                             buckets: 2048  batches: 32 (originally 16)  memory usage: 4175kb                                             ->  seq scan on dt_temperature nx_1  (cost=0.00..3098.07 rows=160107 width=29) (actual time=0.002..17.848 rows=160107 loops=1)                                 ->  hash  (cost=3.30..3.30 rows=17 width=33) (actual time=0.029..0.029 rows=13 loops=1)                                       buckets: 1024  batches: 1  memory usage: 1kb                                       ->  hash left join  (cost=2.06..3.30 rows=17 width=33) (actual time=0.023..0.028 rows=13 loops=1)                                             hash cond: (hives.id = d.hive)                                             ->  seq scan on hives  (cost=0.00..1.17 rows=17 width=21) (actual time=0.004..0.006 rows=13 loops=1)                                             ->  hash  (cost=2.04..2.04 rows=1 width=33) (actual time=0.013..0.013 rows=1 loops=1)                                                   buckets: 1024  batches: 1  memory usage: 1kb                                                   ->  nested loop anti join  (cost=0.00..2.04 rows=1 width=33) (actual time=0.012..0.012 rows=1 loops=1)                                                         join filter: ((nx_2.instant > d.instant) , (nx_2.hive = d.hive))                                                         rows removed join filter: 1                                                         ->  seq scan on dt_weight d  (cost=0.00..1.01 rows=1 width=33) (actual time=0.004..0.004 rows=1 loops=1)                                                         ->  materialize  (cost=0.00..1.01 rows=1 width=29) (actual time=0.003..0.003 rows=1 loops=1)                                                               ->  seq scan on dt_weight nx_2  (cost=0.00..1.01 rows=1 width=29) (actual time=0.002..0.002 rows=1 loops=1)               ->  hash  (cost=2.26..2.26 rows=4 width=136) (actual time=0.019..0.019 rows=3 loops=1)                     buckets: 1024  batches: 1  memory usage: 1kb                     ->  hash anti join  (cost=1.14..2.26 rows=4 width=136) (actual time=0.015..0.018 rows=3 loops=1)                           hash cond: (g.hive = nx_5.hive)                           join filter: (nx_5.instant > g.instant)                           rows removed join filter: 9                           ->  seq scan on dt_gps g  (cost=0.00..1.06 rows=6 width=136) (actual time=0.004..0.004 rows=6 loops=1)                           ->  hash  (cost=1.06..1.06 rows=6 width=92) (actual time=0.004..0.004 rows=6 loops=1)                                 buckets: 1024  batches: 1  memory usage: 1kb                                 ->  seq scan on dt_gps nx_5  (cost=0.00..1.06 rows=6 width=92) (actual time=0.001..0.002 rows=6 loops=1)         ->  hash  (cost=2.08..2.08 rows=1 width=104) (actual time=0.014..0.014 rows=1 loops=1)               buckets: 1024  batches: 1  memory usage: 1kb               ->  hash anti join  (cost=1.04..2.08 rows=1 width=104) (actual time=0.012..0.012 rows=1 loops=1)                     hash cond: (f.hive = nx_4.hive)                     join filter: (nx_4.instant > f.instant)                     rows removed join filter: 2                     ->  seq scan on dt_voltage f  (cost=0.00..1.02 rows=2 width=104) (actual time=0.001..0.001 rows=2 loops=1)                     ->  hash  (cost=1.02..1.02 rows=2 width=92) (actual time=0.002..0.002 rows=2 loops=1)                           buckets: 1024  batches: 1  memory usage: 1kb                           ->  seq scan on dt_voltage nx_4  (cost=0.00..1.02 rows=2 width=92) (actual time=0.001..0.001 rows=2 loops=1)   ->  hash  (cost=2.13..2.13 rows=2 width=96) (actual time=0.011..0.011 rows=2 loops=1)         buckets: 1024  batches: 1  memory usage: 1kb         ->  hash anti join  (cost=1.07..2.13 rows=2 width=96) (actual time=0.009..0.010 rows=2 loops=1)               hash cond: (e.hive = nx_3.hive)               join filter: (nx_3.instant > e.instant)               rows removed join filter: 3               ->  seq scan on dt_rain e  (cost=0.00..1.03 rows=3 width=96) (actual time=0.001..0.002 rows=3 loops=1)               ->  hash  (cost=1.03..1.03 rows=3 width=92) (actual time=0.002..0.002 rows=3 loops=1)                     buckets: 1024  batches: 1  memory usage: 1kb                     ->  seq scan on dt_rain nx_3  (cost=0.00..1.03 rows=3 width=92) (actual time=0.001..0.001 rows=3 loops=1) total runtime: 3488.684 ms

this forth version much better. there still remains sequential scans. without them view state of pure art!

this not exists(...) construct, avoids max() aggregate in subquery. benefit presense of composite index on dt_inout_summary( hive, instant desc)

... hives h left outer join (     select hive, instant, "input", "output", timeout dt_inout_summary x         not exists(             select 1             dt_inout_summary nx              nx.hive = x.hive             , nx.instant > x.instant         )      ) b on (h.id = x.hive)   ...

btw: don't need subquery, plain left join same:

 ... hives h left join dt_inout_summary x on x.hive = h.id      , not exists(         select 1         dt_inout_summary nx          nx.hive = x.hive         , nx.instant > x.instant         )    ...

, you'll have reference x.yyyyy fields in main query ( ... x.hive, x.instant, x."input", x."output", x.timeout )

update: query requires 13 (1+2*6) entries range table entries. might cause optimizer off. could try add

set join_collapse_limit = 16;

before query. way split off subqueries ctes (ctes not broken optimizer), ctes can bit slower corresponding subqueries:

create or replace view dt_last4cte cte_b (     select *      dt_inout_summary b not exists ( select 1          dt_inout_summary nx          nx.hive = b.hive , nx.instant > b.instant)         ) , cte_c (     select *     dt_temperature c not exists ( select 1         dt_temperature nx         nx.hive = c.hive , nx.instant > c.instant)         ) , cte_d (     select *     dt_weight d not exists ( select 1       dt_weight nx nx.hive = d.hive , nx.instant > d.instant)     ) , cte_e (     select *     dt_rain e not exists ( select 1       dt_rain nx nx.hive = e.hive , nx.instant > e.instant)     ) , cte_f (     select *     dt_voltage f not exists ( select 1       dt_voltage nx nx.hive = f.hive , nx.instant > f.instant)     ) , cte_g (     select *     dt_gps g not exists ( select 1       dt_gps nx nx.hive = g.hive , nx.instant > g.instant)     )  select h0.id hive,     b.instant inout_instant,     b.input,     b.output,     b.timeout,     c.instant temperature_instant,     c.internal,     c.external,     d.instant weight_instant,     d.weight,     e.instant rain_instant,     e.rain,     f.instant voltage_instant,     f.operational,     f.panel,     f.cell,     g.instant gps_instant,     g.latitude,     g.longitude,     g.altitude    hives h0      left join cte_b b on b.hive = h0.id      left join cte_c c on c.hive = h0.id      left join cte_d d on d.hive = h0.id      left join cte_e e on e.hive = h0.id      left join cte_f f on f.hive = h0.id      left join cte_g g on g.hive = h0.id    -- __aditional__conditions__    ;

if typical usage of view adds additional conditions resulting final query, more selective plan may chosen optimizer.

Search This Blog

UIO

View optimization in postgresql -

Comments

Post a Comment

Popular posts from this blog

How to dequeue messages from RabbitMQ in a scheduled time -

Python Kivy ListView: How to delete selected ListItemButton? -

ruby - How do I merge two hashes into a hash of arrays? -