looking for a loop optimization example
Originally posted
· 2 min read
I’m working on a PyMOTW post about the dis module. As one of the examples, I’d like to show how to use dis to reduce the number of opcodes processed in a loop as an optimization technique. I’m looking for some “naive” code that looks OK, but doesn’t perform especially well on large datasets.
So far I have a class that reads /usr/share/dict/words
and organizes the words by their first letter:
#!/usr/bin/env python
# encoding: utf-8
import timeit
import dis
class Dictionary(object):
def __init__(self, filename='/usr/share/dict/words'):
self.by_letter = {}
self.load_file(filename)
def load_file(self, filename):
f = open(filename, 'rt')
try:
words = [l.strip() for l in f.readlines()]
for word in words:
try:
self.by_letter[word[0]].append(word)
except KeyError:
self.by_letter[word[0]] = [word]
finally:
f.close()
if __name__ == '__main__':
print 'DISASSEMBLY:'
dis.dis(Dictionary)
print
t = timeit.Timer('d = Dictionary()', 'from dis_slow_loop import Dictionary')
iterations = 2
print 'TIME: %0.4f' % (t.timeit(iterations)/iterations)
I’ve optimized it a bit to get this:
#!/usr/bin/env python
# encoding: utf-8
import collections
import dis
import timeit
class Dictionary(object):
def __init__(self, filename='/usr/share/dict/words'):
self.by_letter = collections.defaultdict(list)
self.load_file(filename)
def load_file(self, filename):
by_letter = self.by_letter
with open(filename, 'rt') as f:
for line in f:
word = line.strip()
by_letter[word[0]].append(word)
if __name__ == '__main__':
print 'DISASSEMBLY:'
dis.dis(Dictionary)
print
t = timeit.Timer('d = Dictionary()', 'from dis_faster_loop import Dictionary')
iterations = 2
print 'TIME: %0.4f' % (t.timeit(iterations)/iterations)
The first example gives me a run time of 0.2712 sec and the second is 0.1998.
I’m looking for either alternative loops or a way to increase that spread without making the “slow” version obviously obtuse (no sleeps, unnecessary sub-loops, etc.).