From d5057a5a2649f59aef533e6bb3f0db5ad8aade9e Mon Sep 17 00:00:00 2001 From: Gibheer Date: Mon, 30 May 2011 18:36:51 +0200 Subject: [PATCH] * new basis for the save and load system of the index * the old index will disappear and IndexSearcher will be the interface for searching through the index --- lib/polecat.rb | 3 ++ lib/polecat/index_reader.rb | 46 +++++++++++++++++++++++++ lib/polecat/index_searcher.rb | 32 +++++++++++++++++ lib/polecat/index_writer.rb | 7 ++++ spec/index_reader/locked_spec.rb | 18 ++++++++++ spec/index_reader/new_spec.rb | 21 +++++++++++ spec/index_reader/read_spec.rb | 40 +++++++++++++++++++++ spec/index_searcher/new_spec.rb | 17 +++++++++ spec/index_writer/create_reader_spec.rb | 22 ++++++++++++ 9 files changed, 206 insertions(+) create mode 100644 lib/polecat/index_reader.rb create mode 100644 lib/polecat/index_searcher.rb create mode 100644 spec/index_reader/locked_spec.rb create mode 100644 spec/index_reader/new_spec.rb create mode 100644 spec/index_reader/read_spec.rb create mode 100644 spec/index_searcher/new_spec.rb create mode 100644 spec/index_writer/create_reader_spec.rb diff --git a/lib/polecat.rb b/lib/polecat.rb index d746e86..3325856 100644 --- a/lib/polecat.rb +++ b/lib/polecat.rb @@ -1,4 +1,7 @@ class Polecat require 'polecat/index' + require 'polecat/index_writer' + require 'polecat/index_reader' + require 'polecat/index_searcher' require 'polecat/document' end diff --git a/lib/polecat/index_reader.rb b/lib/polecat/index_reader.rb new file mode 100644 index 0000000..1817953 --- /dev/null +++ b/lib/polecat/index_reader.rb @@ -0,0 +1,46 @@ +class Polecat + # reads an index directory + # + # This class reads the content of an index directory and builds the + # necessary structures for the index type. + class IndexReader + attr_reader :path + + # initialize a new reader + # + # Create a new reader for the given path. If the directory is empty, you + # will get an empty index, else all documents stored in that directory. + # @param [String] path the path to the index directory + def initialize path + @path = path + raise ArgumentError, 'no valid directory' unless File.directory? @path + end + + # read the content of the directory + # + # Read all files of the directory and return an index object. + # @raise [IOError] raised when the directory is locked + # @return [Polecat::Index] the index with all documents + def read + raise IOError, 'index is locked' if locked? + files = Dir[@path + '/*'] + if files.count > 0 + documents = [] + files.each do |file| + documents += Marshal.load(File.read(file)) + end + else + {} + end + end + + # checks whether the directory is locked or not + def locked? + if File.exists? @path + '/index.lock' + true + else + false + end + end + end +end diff --git a/lib/polecat/index_searcher.rb b/lib/polecat/index_searcher.rb new file mode 100644 index 0000000..5cb280d --- /dev/null +++ b/lib/polecat/index_searcher.rb @@ -0,0 +1,32 @@ +class Polecat + # interface for searching an index + # + # Build on top of an Polecat::IndexReader, this class let's you search through + # all documents stored in an index. + class IndexSearcher + attr_reader :reader + + # creates a new Polecat::IndexSearcher + # + # Create a new Polecat::IndexSearcher to search documents. Either a path + # to a directory or a Polecat::IndexReader has to be given, to make this + # searcher work. + # @example + # # the following has the same meaning + # IndexSearcher.new 'index_dir' + # IndexSearcher.new(IndexReader.new 'index_dir') + def initialize *args + if args[0].class == Polecat::IndexReader + @reader = args[0] + elsif args[0].class == String + @reader = Polecat::IndexReader.new args[0] + end + end + + # returns the path of the index directory + # @return [String] path of the index directory + def path + @reader.path + end + end +end diff --git a/lib/polecat/index_writer.rb b/lib/polecat/index_writer.rb index 1cbab1c..1408d15 100644 --- a/lib/polecat/index_writer.rb +++ b/lib/polecat/index_writer.rb @@ -79,5 +79,12 @@ class Polecat return true end end + + # creates an index reader with the writers path + # + # @returns [Polecat::IndexReader] an IndexReader with the same path + def create_reader + Polecat::IndexReader.new @path + end end end diff --git a/spec/index_reader/locked_spec.rb b/spec/index_reader/locked_spec.rb new file mode 100644 index 0000000..3eed73e --- /dev/null +++ b/spec/index_reader/locked_spec.rb @@ -0,0 +1,18 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe "IndexReader#locked?" do + before do + @path = prepare_index_dir + end + + it "returns false when the directory is not locked" do + r = Polecat::IndexReader.new @path + r.locked?.should == false + end + + it "returns true when the directory is locked" do + FileUtils.touch @path + '/index.lock' + r = Polecat::IndexReader.new @path + r.locked?.should == true + end +end diff --git a/spec/index_reader/new_spec.rb b/spec/index_reader/new_spec.rb new file mode 100644 index 0000000..8e09e9e --- /dev/null +++ b/spec/index_reader/new_spec.rb @@ -0,0 +1,21 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe "IndexReader#new" do + before do + @path = prepare_index_dir + end + + it "takes a directory path as an argument" do + r = Polecat::IndexReader.new @path + r.path.should == @path + end + + it "raises an error when no path is given" do + lambda { Polecat::IndexReader.new }.should raise_error(ArgumentError) + end + + it "raises an error when the path is not a directory" do + lambda { Polecat::IndexReader.new '/dev/null' }.should( + raise_error(ArgumentError)) + end +end diff --git a/spec/index_reader/read_spec.rb b/spec/index_reader/read_spec.rb new file mode 100644 index 0000000..76872ac --- /dev/null +++ b/spec/index_reader/read_spec.rb @@ -0,0 +1,40 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe "IndexReader#read" do + before do + @path = prepare_index_dir + end + + it "returns a hash with all documents" do + r = Polecat::IndexReader.new @path + r.read.class.should == Hash + end + + it "returns an empty hash for a empty directory" do + r = Polecat::IndexReader.new @path + r.read.count.should == 0 + end + + it "returns the document count found in the index directory" do + w = Polecat::IndexWriter.new @path + w.add Spec::FooDocument.new(:id => 23) + w.write + r = Polecat::IndexReader.new @path + r.read.count.should == 1 + end + + it "merges all documents from different files together" do + w = Polecat::IndexWriter.new @path + w.add Spec::FooDocument.new(:id => 23) + w.write + w.add Spec::FooDocument.new(:id => 24) + w.write + w.create_reader.read.count.should == 2 + end + + it "raises an error when the directory is locked" do + FileUtils.touch @path + '/index.lock' + r = Polecat::IndexReader.new @path + lambda { r.read }.should raise_error(IOError) + end +end diff --git a/spec/index_searcher/new_spec.rb b/spec/index_searcher/new_spec.rb new file mode 100644 index 0000000..6e7ab54 --- /dev/null +++ b/spec/index_searcher/new_spec.rb @@ -0,0 +1,17 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe "IndexSearcher#new" do + before do + @path = prepare_index_dir + end + + it "takes a path as an arugment" do + s = Polecat::IndexSearcher.new @path + s.path.should == @path + end + + it "takes an IndexReader as an argument" do + s = Polecat::IndexSearcher.new(Polecat::IndexReader.new @path) + s.path.should == @path + end +end diff --git a/spec/index_writer/create_reader_spec.rb b/spec/index_writer/create_reader_spec.rb new file mode 100644 index 0000000..487cdfc --- /dev/null +++ b/spec/index_writer/create_reader_spec.rb @@ -0,0 +1,22 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +describe "IndexReader#read" do + before do + @path = prepare_index_dir + end + + it "returns a IndexReader" do + w = Polecat::IndexWriter.new @path + w.create_reader.class.should == Polecat::IndexReader + end + + it "returns a different object everytime it is called" do + w = Polecat::IndexWriter.new @path + w.create_reader.should_not == w.create_reader + end + + it "returns an IndexReader with the same path" do + w = Polecat::IndexWriter.new @path + w.create_reader.path.should == w.path + end +end