#AnyDBM handling from perlindex: # NDBM_File as LAST resort package AnyDBM_File; use vars '@ISA'; @ISA = qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File) unless @ISA; my $mod; for $mod (@ISA) { last if eval "require $mod" }; package Tk::Pod::Search_db; use strict; use vars qw($VERSION); $VERSION = substr(q$Revision: 1.4 $, 10) . ""; use Carp; use Fcntl; use Text::English; use Config; my $PREFIX = $Config::Config{prefix}; (my $IDXDIR = $Config::Config{man1dir}) =~ s|/[^/]+$||; sub new { my $class = shift; my $idir = shift; $idir ||= $IDXDIR; my (%self, %IF, %IDF, %FN); tie (%IF, 'AnyDBM_File', "$idir/index_if", O_RDONLY, 0644) or confess "Could not tie $idir/index_if: $!\n". "Did you run 'perlindex -index'?\n"; tie (%IDF, 'AnyDBM_File', "$idir/index_idf", O_RDONLY, 0644) or confess "Could not tie $idir/index_idf: $!\n"; tie (%FN, 'AnyDBM_File', "$idir/index_fn", O_RDONLY, 0644) or confess "Could not tie $idir/index_fn: $!\n"; $self{IF} = \%IF; $self{IDF} = \%IDF; $self{FN} = \%FN; #xxx: -idir depended but where can I get this info? # o A fourth index file? # o todo: check perlindex index routine $self{PREFIX} = $PREFIX; bless \%self, $class; } # changes to perlindex's normalize # o removed useless(?) stemmer check # o lexicalized $word sub normalize { my $line = join ' ', @_; my @result; $line =~ tr/A-Z/a-z/; $line =~ tr/a-z0-9/ /cs; my $word; for $word (split ' ', $line ) { $word =~ s/^\d+//; next unless length($word) > 2; push @result, &Text::English::stem($word); } @result; } # changes for perlindex's search slightly modified sub searchWords { my $self = shift; #print "try words|", join('|',@_),"\n"; my %score; my $maxhits = 15; my (@unknown, @stop); my $IF = $self->{IF}; my $IDF = $self->{IDF}; my $FN = $self->{FN}; #xxx &initstop if $opt_verbose; my ($word, $did, %post); #xxx for $word (normalize(@_)) { unless ($IF->{$word}) { #xxxif ($stop{$word}) { #xxx push @stop, $word; #xxx} else { #xxx push @unknown, $word; #xxx} next; } #my %post = unpack($p.'*',$IF->{$word}); %post = unpack('w*',$IF->{$word}); my $idf = log($FN->{'last'}/$IDF->{$word}); for $did (keys %post) { #xxx my ($maxtf) = unpack($p, $FN->{$did}); my ($maxtf) = unpack('w', $FN->{$did}); $score{$did} = 0 unless defined $score{$did}; # perl -w $score{$did} += $post{$did} / $maxtf * $idf; } } my @results; for $did (sort {$score{$b} <=> $score{$a}} keys %score) { my ($mtf, $path) = unpack('wa*', $FN->{$did}); push @results, $score{$did}, $path; last unless --$maxhits; } #print "results|", join('|',@results),"\n"; @results; } sub prefix { shift->{PREFIX}; } 1; __END__ =head1 NAME Tk::Pod::Search_db - dirty OO wrapper for C's search functionality =head1 SYNOPSIS ** THIS IS ALPHA SOFTWARE everything may and should change ** ** stuff here is more a scratch pad than docomentation! ** use Tk::Pod::Search_db; ... $idx = Tk::Pod::Search_db->new?(INDEXDIR)?; ... @hits = $idx->searchWords(WORD1,...); # @hits is a list of # relpath1,score1,... where # score is increasing $prefix = $idx->prefix(); @word = Tk::Pod::Search_db::normalize(STRING1,...); =head1 DESCRIPTION Module to search POD documentation. Before you can use the module one should create the indices with Cnew(INDEXDIR) Interface may change to support options like -maxhits =item $idx->seachWords(WORD1?,...?) search for WORD(s). Return a list of relpath1, score1, relpath2, score2, ... or empty list if no match is found. =item $pathprefix = $idx->pathprefix() The return path prefix and C<$relpath> give together the full path name of the POD documentation. $fullpath = $patchprefix . '/' . $relpath B Should make it easy to use Tk::Pod::Search with perlindex but index specific prefix handling is a mess up to know. =back =head1 SEE ALSO tkpod, perlindex perlpod, Tk::Pod::Search =head1 AUTHORS Achim Bohnet > Most of the code here is borrowed from L written by Ulrich Pfeifer >. Copyright (c) 1997-1998 Achim Bohnet. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut