########################################################################## # # Array::IntSpan - a Module for handling arrays using IntSpan techniques # # Author: Toby Everett # Revision: 1.01 # Last Change: Fixed Makefile.PL ########################################################################## # Copyright 2000 Toby Everett. All rights reserved. # # This file is distributed under the Artistic License. See # http://www.ActiveState.com/corporate/artistic_license.htm or # the license that comes with your perl distribution. # # For comments, questions, bugs or general interest, feel free to # contact Toby Everett at teverett@alascom.att.com ########################################################################## use strict; package Array::IntSpan; use vars qw($VERSION); $VERSION = '1.01'; sub new { my $class = shift; my $self = [@_]; bless $self, $class; $self->_check_structure; return $self; } sub set_range { my $self = shift; my($new_elem) = [@_]; #Test that we were passed appropriate values @{$new_elem} == 3 or croak("Array::IntSpan::set_range should be called with three values."); $new_elem->[0] <= $new_elem->[1] or croak("Array::IntSpan::set_range called with bad indices: $new_elem->[0] and $new_elem->[1]."); my($start, $end) = (0, $#{$self}); #Before we binary search, we'll first check to see if this is an append operation if ($self->[$end]->[1] < $new_elem->[0]) { $start = $end+1; } else { #Binary search for the first element after the last element that is entirely #before the element to be inserted (say that ten times fast) while ($start < $end) { my $mid = int(($start+$end)/2); if ($self->[$mid]->[1] < $new_elem->[0]) { $start = $mid+1; } else { $end = $mid; } } } #If there is no conflict between this element and the new element, we are home free if ($start > $#{$self} || $new_elem->[1] < $self->[$start]->[0]) { splice(@{$self}, $start, 0, $new_elem); return 0; } else { #If we are here, we need to test for whether we need to frag the conflicting element if ($self->[$start]->[0] < $new_elem->[0]) { splice(@{$self}, $start, 1, [$self->[$start]->[0], $new_elem->[0]-1, $self->[$start]->[2]], [$new_elem->[0], $self->[$start]->[1], $self->[$start]->[2]]); $start++; } #Then we go a searching for where to end this insert my $start_elem = $start; $end = $#{$self}+1; while ($start < $end) { my $mid = int(($start+$end)/2); if ($self->[$mid]->[1] < $new_elem->[1]) { $start = $mid+1; } else { $end = $mid; } } #Do a fragmentation check if ($self->[$start]->[0] <= $new_elem->[1]) { if ($self->[$start]->[1] > $new_elem->[1]) { splice(@{$self}, $start, 1, [$self->[$start]->[0], $new_elem->[1], $self->[$start]->[2]], [$new_elem->[1]+1, $self->[$start]->[1], $self->[$start]->[2]]); } $start++; } #Then we splice in the new element. If the value is undef, we just remove the old stuff if (defined $new_elem->[2]) { splice(@{$self}, $start_elem, $start-$start_elem, $new_elem); } else { splice(@{$self}, $start_elem, $start-$start_elem); } return 1; } } sub lookup { my $self = shift; my($key) = @_; my($start, $end) = (0, $#{$self}); while ($start < $end) { my $mid = int(($start+$end)/2); if ($self->[$mid]->[1] < $key) { $start = $mid+1; } else { $end = $mid; } } if ($self->[$start]->[0] <= $key && $self->[$start]->[1] >= $key) { return $self->[$start]->[2]; } return undef; } sub _check_structure { my $self = shift; foreach my $i (0..$#{$self}) { @{$self->[$i]} == 3 or croak("Array::IntSpan::_check_structure failed - element $i lacks 3 entries."); $self->[$i]->[0] <= $self->[$i]->[1] or croak("Array::IntSpan::_check_structure failed - element $i has bad indices."); if ($i > 0) { $self->[$i-1]->[1] < $self->[$i]->[0] or croak("Array::IntSpan::_check_structure failed - element $i doesn't come after previous element."); } } } #The following code is courtesy of Mark Jacob-Dominus, sub croak { require Carp; *croak = \&Carp::croak; goto &croak; } 1; __END__ =head1 NAME Array::IntSpan - a Module for handling arrays using IntSpan techniques =head1 SYNOPSIS use Array::IntSpan; my $foo = Array::IntSpan->new([0, 59, 'F'], [60, 69, 'D'], [80, 89, 'B']); print "A score of 84% results in a ".$foo->lookup(84).".\n"; unless (defined($foo->lookup(70))) { print "The grade for the score 70% is currently undefined.\n"; } $foo->set_range(70, 79, 'C'); print "A score of 75% now results in a ".$foo->lookup(75).".\n"; $foo->set_range(0, 59, undef); unless (defined($foo->lookup(40))) { print "The grade for the score 40% is now undefined.\n"; } $foo->set_range(87, 89, 'B+'); $foo->set_range(85, 100, 'A'); $foo->set_range(100, 1_000_000, 'A+'); =head1 DESCRIPTION C brings the speed advantages of C (written by Steven McDougall) to arrays. Uses include manipulating grades, routing tables, or any other situation where you have mutually exclusive ranges of integers that map to given values. C is also provided with the distribution. It lets you use IP addresses in any of three forms (dotted decimal, network string, and integer) for the indices into the array. See the POD for that module for more information. =head2 Installation instructions Standard C approach or just copy C into C and C into C. =head1 METHODS =head2 new The C method takes an optional list of array elements. The elements should be in the form C<[start_index, end_index, value]>. They should be in sorted order and there should be no overlaps. The internal method C<_check_structure> will be called to verify the data is correct. If you wish to avoid the performance penalties of checking the structure, you can use C to dump an object and use that code to reconstitute it. =head2 set_range This method takes three parameters - the C, the C, and the C. If you wish to erase a range, specify C for the C. It properly deals with overlapping ranges and will replace existing data as appropriate. If the new range lies after the last existing range, the method will execute in O(1) time. If the new range lies within the existing ranges, the method executes in O(n) time, where n is the number of ranges. The code is not completely optimized and will make up to three calls to C if the new range intersects with existing ranges. It does not consolidate contiguous ranges that have the same C. If you have a large number of inserts to do, it would be beneficial to sort them first. Sorting is O(n lg(n)), and since appending is O(1), that will be considerably faster than the O(n^2) time for inserting n unsorted elements. The method returns C<0> if there were no overlapping ranges and C<1> if there were. =head2 lookup This method takes as a single parameter the C to look up. If there is an appropriate range, the method will return the associated value. Otherwise, it returns C. =head1 AUTHOR Toby Everett, teverett@alascom.att.com =cut