Skip to content

Commit

Permalink
Performance enhancements for RangesIO.
Browse files Browse the repository at this point in the history
RangesIO is critical to performance of ruby-ole, and has finally
received some attention. Short of adding a buffered stream on top,
the performance isn't going to improve much. Fixes include:

1. Precalculate cumulative offsets and use binary search for
   finding appropriate range when seeking.
2. Previously every read did a range search. Now we store and
   maintain a pointer to the active range when seeking, and simply
   increment when a read/write spills over to the next.
3. Avoid costly creation of large temporary arrays storing range
   list for read/write ops.
4. On top of all the above, include range combining, so that in
   the bulk of cases, theres not too many distinct ranges to
   consider, despite typical block size of 512 bytes.

git-svn-id: https://ruby-ole.googlecode.com/svn/trunk@67 f297d60c-f930-0410-b1c6-9ffd80c20a0c
  • Loading branch information
aquasync committed Jul 13, 2009
1 parent 8420fdf commit 9c3d310
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 69 deletions.
127 changes: 78 additions & 49 deletions lib/ole/ranges_io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,14 @@ def initialize io, mode='r', params={}
@params = {:close_parent => false}.merge params
@mode = IO::Mode.new mode
@io = io
# convert ranges to arrays. check for negative ranges?
ranges ||= [0, io.size]
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
# calculate size
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
# initial position in the file
@pos = 0

self.ranges = ranges || [[0, io.size]]
# handle some mode flags
truncate 0 if @mode.truncate?
seek size if @mode.append?
end

#IOError: closed stream
# get this for reading, writing, everything...
#IOError: not opened for writing


# add block form. TODO add test for this
def self.open(*args, &block)
ranges_io = new(*args)
Expand All @@ -86,6 +77,36 @@ def self.open(*args, &block)
end
end

def ranges= ranges
# convert ranges to arrays. check for negative ranges?
ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
# combine ranges
if @params[:combine] == false
# might be useful for debugging...
@ranges = ranges
else
@ranges = []
next_pos = nil
ranges.each do |pos, len|
if next_pos == pos
@ranges.last[1] += len
next_pos += len
else
@ranges << [pos, len]
next_pos = pos + len
end
end
end
# calculate cumulative offsets from range sizes
@size = 0
@offsets = []
@ranges.each do |pos, len|
@offsets << @size
@size += len
end
self.pos = @pos
end

def pos= pos, whence=IO::SEEK_SET
case whence
when IO::SEEK_SET
Expand All @@ -95,30 +116,36 @@ def pos= pos, whence=IO::SEEK_SET
pos = @size + pos
else raise Errno::EINVAL
end
raise Errno::EINVAL unless (0...@size) === pos
raise Errno::EINVAL unless (0..@size) === pos
@pos = pos

# do a binary search throuh @offsets to find the active range.
a, c, b = 0, 0, @offsets.length
while a < b
c = (a + b) / 2
pivot = @offsets[c]
if pos == pivot
@active = c
return
elsif pos < pivot
b = c
else
a = c + 1
end
end

@active = a - 1
end

alias seek :pos=
alias tell :pos

def close
@io.close if @params[:close_parent]
def rewind
seek 0
end

# returns the [+offset+, +size+], pair inorder to read/write at +pos+
# (like a partial range), and its index.
def offset_and_size pos
total = 0
ranges.each_with_index do |(offset, size), i|
if pos <= total + size
diff = pos - total
return [offset + diff, size - diff], i
end
total += size
end
# should be impossible for any valid pos, (0...size) === pos
raise ArgumentError, "no range for pos #{pos.inspect}"
def close
@io.close if @params[:close_parent]
end

def eof?
Expand All @@ -130,24 +157,26 @@ def read limit=nil
data = ''
return data if eof?
limit ||= size
partial_range, i = offset_and_size @pos
# this may be conceptually nice (create sub-range starting where we are), but
# for a large range array its pretty wasteful. even the previous way was. but
# i'm not trying to optimize this atm. it may even go to c later if necessary.
([partial_range] + ranges[i+1..-1]).each do |pos, len|
pos, len = @ranges[@active]
diff = @pos - @offsets[@active]
pos += diff
len -= diff
loop do
@io.seek pos
if limit < len
# convoluted, to handle read errors. s may be nil
s = @io.read limit
@pos += s.length if s
break data << s
s = @io.read(limit).to_s
@pos += s.length
data << s
break
end
# convoluted, to handle ranges beyond the size of the file
s = @io.read len
@pos += s.length if s
s = @io.read(len).to_s
@pos += s.length
data << s
break if s.length != len
limit -= len
break if @active == @ranges.length - 1
@active += 1
pos, len = @ranges[@active]
end
data
end
Expand All @@ -164,8 +193,6 @@ def size= size
end

def write data
# short cut. needed because truncate 0 may return no ranges, instead of empty range,
# thus offset_and_size fails.
return 0 if data.empty?
data_pos = 0
# if we don't have room, we can use the truncate hook to make more space.
Expand All @@ -176,8 +203,11 @@ def write data
raise IOError, "unable to grow #{inspect} to write #{data.length} bytes"
end
end
partial_range, i = offset_and_size @pos
([partial_range] + ranges[i+1..-1]).each do |pos, len|
pos, len = @ranges[@active]
diff = @pos - @offsets[@active]
pos += diff
len -= diff
loop do
@io.seek pos
if data_pos + len > data.length
chunk = data[data_pos..-1]
Expand All @@ -189,6 +219,9 @@ def write data
@io.write data[data_pos, len]
@pos += len
data_pos += len
break if @active == @ranges.length - 1
@active += 1
pos, len = @ranges[@active]
end
data_pos
end
Expand All @@ -202,17 +235,13 @@ def write data
def gets
s = read 1024
i = s.index "\n"
@pos -= s.length - (i+1)
self.pos -= s.length - (i+1)
s[0..i]
end
alias readline :gets

def inspect
# the rescue is for empty files
pos, len = (@ranges[offset_and_size(@pos).last] rescue [nil, nil])
range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
"#<#{self.class} io=#{io.inspect}, size=#@size, pos=#@pos, "\
"range=#{range_str}>"
"#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>"
end
end

Expand Down
16 changes: 7 additions & 9 deletions lib/ole/storage/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Storage
class FormatError < StandardError # :nodoc:
end

VERSION = '1.2.8.2'
VERSION = '1.2.9'

# options used at creation time
attr_reader :params
Expand Down Expand Up @@ -602,8 +602,8 @@ def truncate size
# note that old_blocks is != @ranges.length necessarily. i'm planning to write a
# merge_ranges function that merges sequential ranges into one as an optimization.
@bat.resize_chain @blocks, size
@ranges = @bat.ranges @blocks, size
@pos = @size if @pos > size
@pos = size if @pos > size
self.ranges = @bat.ranges(@blocks, size)
self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first

# don't know if this is required, but we explicitly request our @io to grow if necessary
Expand All @@ -612,8 +612,6 @@ def truncate size
# maybe its ok to just seek out there later??
max = @ranges.map { |pos, len| pos + len }.max || 0
@io.truncate max if max > @io.size

@size = size
end
end

Expand All @@ -633,8 +631,8 @@ def truncate size
# bat migration needed! we need to backup some data. the amount of data
# should be <= @ole.header.threshold, so we can just hold it all in one buffer.
# backup this
pos = @pos
@pos = 0
pos = [@pos, size].min
self.pos = 0
keep = read [@size, size].min
# this does a normal truncate to 0, removing our presence from the old bat, and
# rewrite the dirent's first_block
Expand All @@ -645,9 +643,9 @@ def truncate size
# important to do this now, before the write. as the below write will always
# migrate us back to sbat! this will now allocate us +size+ in the new bat.
super
@pos = 0
self.pos = 0
write keep
@pos = pos
self.pos = pos
else
super
end
Expand Down
9 changes: 1 addition & 8 deletions test/test_ranges_io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,13 @@ def test_open

def test_basics
assert_equal 160, @io.size
assert_match %r{size=160,.*range=100\.\.200}, @io.inspect
assert_match %r{size=160}, @io.inspect
end

def test_truncate
assert_raises(NotImplementedError) { @io.size += 10 }
end

def test_offset_and_size
assert_equal [[100, 100], 0], @io.offset_and_size(0)
assert_equal [[150, 50], 0], @io.offset_and_size(50)
assert_equal [[5, 5], 1], @io.offset_and_size(105)
assert_raises(ArgumentError) { @io.offset_and_size 1000 }
end

def test_seek
@io.pos = 10
@io.seek(-100, IO::SEEK_END)
Expand Down
5 changes: 2 additions & 3 deletions test/test_storage.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#! /usr/bin/ruby

$: << File.dirname(__FILE__) + '/../lib'
#require 'rubygems'

require 'test/unit'
require 'ole/storage'
Expand Down Expand Up @@ -136,9 +137,7 @@ def test_dirent

dirent.open('r') { |f| assert_equal 2, f.first_block }
dirent.open('w') { |f| }
assert_raises Errno::EINVAL do
dirent.open('a') { |f| }
end
dirent.open('a') { |f| }
end

def test_delete
Expand Down

0 comments on commit 9c3d310

Please sign in to comment.