OFFSET vs. ROW

Vytvořil jsem test, který porovnává OFFSET, kurzory a ROW_NUMBER(). Můj dojem z ROW_NUMBER(), že by rychlost byla konzistentní bez ohledu na to, kde se v sadě výsledků nacházíte, je správný. Tato rychlost je však dramaticky nižší než OFFSET nebo CURSOR, které, jak jsem měl také dojem, jsou z hlediska rychlosti v podstatě stejné, obě se snižují, čím dále se dostanete ke konci výsledku.

Výsledky:

offset(100,100): 0.016359
scroll(100,100): 0.018393
rownum(100,100): 15.535614

offset(100,480000): 1.761800
scroll(100,480000): 1.781913
rownum(100,480000): 15.158601

offset(100,999900): 3.670898
scroll(100,999900): 3.664517
rownum(100,999900): 14.581068

Testovací skript používá sqlalchemy k nastavení tabulek a 1000000 řádků testovacích dat. Potom použije kurzor psycopg2 k provedení každého příkazu SELECT a načtení výsledků pomocí tří různých metod.

from sqlalchemy import *

metadata = MetaData()
engine = create_engine('postgresql://scott:example@sqldat.com/test', echo=True)

t1 = Table('t1', metadata,
    Column('id', Integer, primary_key=True),
    Column('d1', String(50)),
    Column('d2', String(50)),
    Column('d3', String(50)),
    Column('d4', String(50)),
    Column('d5', String(50))
)

if not engine.has_table('t1'):
    conn = engine.connect()
    t1.create(conn)

    # 1000000 rows
    for i in range(100):
        conn.execute(t1.insert(), [
            dict(
                ('d%d' % col, "data data data %d %d" % (col, (i * 10000) + j))
                for col in range(1, 6)
            ) for j in xrange(1, 10001)
        ])

import time

def timeit(fn, count, *args):
    now = time.time()
    for i in xrange(count):
        fn(*args)
    total = time.time() - now
    print "%s(%s): %f" % (fn.__name__, ",".join(repr(x) for x in args), total)

# this is a raw psycopg2 connection.
conn = engine.raw_connection()

def offset(limit, offset):
    cursor = conn.cursor()
    cursor.execute("select * from t1 order by id limit %d offset %d" % (limit, offset))
    cursor.fetchall()
    cursor.close()

def rownum(limit, offset):
    cursor = conn.cursor()
    cursor.execute("select * from (select *, "
                    "row_number() over (order by id asc) as rownum from t1) as foo "
                    "where rownum>=%d and rownum<%d" % (offset, limit + offset))
    cursor.fetchall()
    cursor.close()

def scroll(limit, offset):
    cursor = conn.cursor('foo')
    cursor.execute("select * from t1 order by id")
    cursor.scroll(offset)
    cursor.fetchmany(limit)
    cursor.close()

print 

timeit(offset, 10, 100, 100)
timeit(scroll, 10, 100, 100)
timeit(rownum, 10, 100, 100)

print 

timeit(offset, 10, 100, 480000)
timeit(scroll, 10, 100, 480000)
timeit(rownum, 10, 100, 480000)

print 

timeit(offset, 10, 100, 999900)
timeit(scroll, 10, 100, 999900)
timeit(rownum, 10, 100, 999900)

OFFSET vs. ROW_NUMBER()