0
0
Fork 0

added an abstract stemmer and an implementation of a simple stemmer

This commit is contained in:
Gibheer 2011-06-10 00:40:03 +02:00
parent 6285ef9fdf
commit aa153bc68c
7 changed files with 113 additions and 0 deletions

View File

@ -8,4 +8,5 @@ module Polecat
require 'polecat/index_searcher'
require 'polecat/query'
require 'polecat/term'
require 'polecat/stemmer'
end

25
lib/polecat/stemmer.rb Normal file
View File

@ -0,0 +1,25 @@
module Polecat
# abstract class for stemmer
#
# This class can be used for inheritence for your own stemmer.
# A stemmer is responsible to convert an document into an array of fragments
# which then merged with the index. As every document can be built of
# different words and fragments, the stemmer is very important to get the
# best result when searching.
#
# Be warned, that you use the same stemmer for the index as for the search
# input!
#
# To build your own stemmer implement the methods #stem and #result.
class Stemmer
# stems the word
#
# This method changes the word into a form, which get's interted into the
# index.
# @param [Object] word word to stem
# @return [Object] the stemmed variant of the word or the same object
def stem word
raise NotImplementedError, 'please implement #stem'
end
end
end

View File

@ -0,0 +1,18 @@
module Polecat
# simple stemmer for cleaning in a simple way
#
# This class can be used for cleaning strings in the most simple way. If it
# does not do, what you intent it to do, inherit from Polecat::Stemmer and
# implement your own.
class SimpleStemmer < Polecat::Stemmer
def stem word
if word.class == Array
word.each {|w| self.stem w }
elsif word.class == String && word.length > 1
word.gsub /(ing|ed)$/, ''
else
word
end
end
end
end

View File

@ -0,0 +1,12 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
require 'polecat/stemmer/simple'
describe Polecat::SimpleStemmer do
it "creates a new SimpleStemmer" do
subject.class.should be(Polecat::SimpleStemmer)
end
it "is a stemmer" do
subject.kind_of?(Polecat::Stemmer).should be(true)
end
end

View File

@ -0,0 +1,36 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
require 'polecat/stemmer/simple'
describe Polecat::SimpleStemmer do
it "takes one argument" do
subject.method(:stem).arity.should be(1)
end
it "returns nil if nil was given" do
subject.stem(nil).should be(nil)
end
it "returns the word if nothing was done" do
subject.stem("a").should == "a"
end
it "deletes 'ing' from the end" do
subject.stem("finding").should == "find"
end
it "deletes 'ed' from the word end" do
subject.stem("coded").should == "cod"
end
it "returns numbers not as a string" do
subject.stem(1).class.should be(Fixnum)
end
it "returns a float not as a string" do
subject.stem(1.1).class.should be(Float)
end
it "takes an array and stems every element in it" do
subject.stem(['coding'])[0].should == 'coding'
end
end

8
spec/stemmer/new_spec.rb Normal file
View File

@ -0,0 +1,8 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
describe "Stemmer#new" do
it "creates a new stemmer" do
s = Polecat::Stemmer.new
s.class.should be(Polecat::Stemmer)
end
end

13
spec/stemmer/stem_spec.rb Normal file
View File

@ -0,0 +1,13 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
describe "Stemmer#stem" do
let (:s) { Polecat::Stemmer.new }
it "takes one argument" do
s.method(:stem).arity.should == 1
end
it "raises an error, because it's an abstract class" do
lambda { s.stem "word" }.should raise_error(NotImplementedError)
end
end