From 665109f5446c0d3eab86b6e5a140716a43783cbb Mon Sep 17 00:00:00 2001 From: Gibheer Date: Mon, 20 Jun 2011 11:06:36 +0200 Subject: [PATCH] modified benchmark to use the new storages --- benchmark/search_bench.rb | 49 ++++++++++++++++----- lib/polecat/index_searcher.rb | 82 ++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/benchmark/search_bench.rb b/benchmark/search_bench.rb index f589d85..f28c059 100644 --- a/benchmark/search_bench.rb +++ b/benchmark/search_bench.rb @@ -3,10 +3,14 @@ require 'rubygems' require 'benchmark' require 'virtus' require 'polecat' +require 'polecat/storage/binary_storage' +require 'polecat/storage/hash_storage' -n = 2500 +n = 1500 +docs = 50000 +find = 25000 -def run_threads threadcount, count, searcher, query +def run_threads1 threadcount, count, searcher, query threads = [] count = count / threadcount threadcount.times do @@ -19,10 +23,24 @@ def run_threads threadcount, count, searcher, query threads.each {|t| t.join } end +def run_threads2 threadcount, count, searcher, query + threads = [] + count = count / threadcount + threadcount.times do + threads << Thread.new do + for i in 1..count do + searcher.search_with_index query + end + end + end + threads.each {|t| t.join } +end + class Document include Virtus - attribute :name, Integer + attribute :name1, Integer, :storage => Polecat::Storage::BinaryStorage + attribute :name2, Integer, :storage => Polecat::Storage::HashStorage attribute :text, String end @@ -33,17 +51,28 @@ end Dir.mkdir 'benchmark/index_dir' writer = Polecat::IndexWriter.new 'benchmark/index_dir' -(1..50000).each do |i| - writer.add(Document.new(:name => i, :text => "Lorem #{i} Ipsum")) +(1..docs).each do |i| + writer.add(Document.new(:name1 => i, :name2 => i, :text => "Lorem #{i} Ipsum")) end writer.write searcher = Polecat::IndexSearcher.new :reader => writer.create_reader -query = Polecat::Query.new.add(Polecat::Term.new(:name, :lt, 25000)) +searcher.load Benchmark.bm do |x| - x.report('1') { run_threads 1, n, searcher, query.dup } - x.report('2') { run_threads 2, n, searcher, query.dup } - x.report('4') { run_threads 4, n, searcher, query.dup } - x.report('8') { run_threads 8, n, searcher, query.dup } + query = Polecat::Query.new.add(Polecat::Term.new(:name1, :eq, find)) + x.report('1') { run_threads1 1, n, searcher, query.dup } + x.report('2') { run_threads1 2, n, searcher, query.dup } + x.report('4') { run_threads1 4, n, searcher, query.dup } + x.report('8') { run_threads1 8, n, searcher, query.dup } + query = Polecat::Query.new.add(Polecat::Term.new(:name1, :eq, find)) + x.report('1') { run_threads2 1, n, searcher, query.dup } + x.report('2') { run_threads2 2, n, searcher, query.dup } + x.report('4') { run_threads2 4, n, searcher, query.dup } + x.report('8') { run_threads2 8, n, searcher, query.dup } + query = Polecat::Query.new.add(Polecat::Term.new(:name2, :eq, find)) + x.report('1') { run_threads2 1, n, searcher, query.dup } + x.report('2') { run_threads2 2, n, searcher, query.dup } + x.report('4') { run_threads2 4, n, searcher, query.dup } + x.report('8') { run_threads2 8, n, searcher, query.dup } end diff --git a/lib/polecat/index_searcher.rb b/lib/polecat/index_searcher.rb index 0a7796c..5d417e8 100644 --- a/lib/polecat/index_searcher.rb +++ b/lib/polecat/index_searcher.rb @@ -44,7 +44,6 @@ module Polecat def search query @content = @reader.read if @content.nil? @content.select do |doc| - #doc.attributes.fetch(@default_field).fetch(:value) == query rs = [] query.terms.each do |term| if term.compare(doc.send(term.field)) @@ -58,5 +57,86 @@ module Polecat end end end + + # searches through all documents + # + # Run the query against the @default_field@ of every stored document to get + # a list of all matching documents. + # @param [String] query a String which get's matched against the documents + # @return [Array] a list of all matching documents + def search_with_index query + docs = [] + return docs if query.terms.empty? + load if @content.nil? + return docs if @content.nil? + index = {} + query.terms.each do |term| + if term.operator == :eq && term.value.class != Regexp + set = @attribute_storage[term.field][term.value] + else + set = @content.select do |doc| + term.compare(doc.send(term.field)) + end + end + + if !set.nil? && !set.empty? + if docs.empty? + docs = set + if query.relation == :and + docs.each do |value| + index[value] = nil + end + end + else + if query.relation == :or + docs += set + else + set.each do |value| + if !index.has_key? value + docs << value + index[value] = nil + end + end + end + end + end + end + docs + end + + # loads all stuff and builds the indexes + def load + @content = @reader.read + if @content.nil? + return + end + @attribute_storage = {} + attributes = @content.first.class.attributes + attributes.each do |key, attribute| + if attribute.options.has_key? :storage + @attribute_storage[key] = attribute.options[:storage].new + else + @attribute_storage[key] = Hash.new + end + end + @content.each do |doc| + add_doc doc + end + end + + def add_doc doc + doc.attributes.each do |key, value| + begin + store = @attribute_storage[key][value] + rescue + store = nil + end + if store.nil? + @attribute_storage[key][value] = [doc] + else + store << doc + end + end + end end end