ऑफ़सेट बनाम ROW

मैंने एक परीक्षण बनाया है जो ऑफ़सेट, कर्सर, और ROW_NUMBER() की तुलना करता है। ROW_NUMBER() की मेरी धारणा, कि यह गति में सुसंगत होगी, चाहे आप परिणाम सेट में कहीं भी हों, सही है। हालाँकि, वह गति OFFSET या CURSOR की तुलना में नाटकीय रूप से धीमी है, जो कि, जैसा कि मेरी धारणा भी थी, गति में काफी समान हैं, दोनों ही आपके द्वारा जाने वाले परिणाम के अंत तक गति में गिरावट करते हैं।

परिणाम:

offset(100,100): 0.016359
scroll(100,100): 0.018393
rownum(100,100): 15.535614

offset(100,480000): 1.761800
scroll(100,480000): 1.781913
rownum(100,480000): 15.158601

offset(100,999900): 3.670898
scroll(100,999900): 3.664517
rownum(100,999900): 14.581068

टेस्ट स्क्रिप्ट टेबल और टेस्ट डेटा की 1000000 पंक्तियों को सेट करने के लिए sqlalchemy का उपयोग करती है। इसके बाद यह प्रत्येक SELECT स्टेटमेंट को निष्पादित करने के लिए एक psycopg2 कर्सर का उपयोग करता है और तीन अलग-अलग तरीकों से परिणाम प्राप्त करता है।

from sqlalchemy import *

metadata = MetaData()
engine = create_engine('postgresql://scott:example@sqldat.com/test', echo=True)

t1 = Table('t1', metadata,
    Column('id', Integer, primary_key=True),
    Column('d1', String(50)),
    Column('d2', String(50)),
    Column('d3', String(50)),
    Column('d4', String(50)),
    Column('d5', String(50))
)

if not engine.has_table('t1'):
    conn = engine.connect()
    t1.create(conn)

    # 1000000 rows
    for i in range(100):
        conn.execute(t1.insert(), [
            dict(
                ('d%d' % col, "data data data %d %d" % (col, (i * 10000) + j))
                for col in range(1, 6)
            ) for j in xrange(1, 10001)
        ])

import time

def timeit(fn, count, *args):
    now = time.time()
    for i in xrange(count):
        fn(*args)
    total = time.time() - now
    print "%s(%s): %f" % (fn.__name__, ",".join(repr(x) for x in args), total)

# this is a raw psycopg2 connection.
conn = engine.raw_connection()

def offset(limit, offset):
    cursor = conn.cursor()
    cursor.execute("select * from t1 order by id limit %d offset %d" % (limit, offset))
    cursor.fetchall()
    cursor.close()

def rownum(limit, offset):
    cursor = conn.cursor()
    cursor.execute("select * from (select *, "
                    "row_number() over (order by id asc) as rownum from t1) as foo "
                    "where rownum>=%d and rownum<%d" % (offset, limit + offset))
    cursor.fetchall()
    cursor.close()

def scroll(limit, offset):
    cursor = conn.cursor('foo')
    cursor.execute("select * from t1 order by id")
    cursor.scroll(offset)
    cursor.fetchmany(limit)
    cursor.close()

print 

timeit(offset, 10, 100, 100)
timeit(scroll, 10, 100, 100)
timeit(rownum, 10, 100, 100)

print 

timeit(offset, 10, 100, 480000)
timeit(scroll, 10, 100, 480000)
timeit(rownum, 10, 100, 480000)

print 

timeit(offset, 10, 100, 999900)
timeit(scroll, 10, 100, 999900)
timeit(rownum, 10, 100, 999900)

ऑफ़सेट बनाम ROW_NUMBER()