diff --git a/index/index.go b/index/index.go index 93b72929..6804bb3a 100644 --- a/index/index.go +++ b/index/index.go @@ -69,6 +69,8 @@ type IndexReader interface { // The caller must close returned instance to release associated resources. DocIDReader(start, end string) (DocIDReader, error) + DocIDReaderOnly(ids []string) (DocIDReader, error) + FieldDict(field string) (FieldDict, error) // FieldDictRange is currently defined to include the start and end terms @@ -76,8 +78,8 @@ type IndexReader interface { FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) Document(id string) (*document.Document, error) - DocumentFieldTerms(id string) (FieldTerms, error) - DocumentFieldTermsForFields(id string, fields []string) (FieldTerms, error) + DocumentFieldTerms(id IndexInternalID) (FieldTerms, error) + DocumentFieldTermsForFields(id IndexInternalID, fields []string) (FieldTerms, error) Fields() ([]string, error) @@ -85,6 +87,8 @@ type IndexReader interface { DocCount() uint64 + FinalizeDocID(id IndexInternalID) (string, error) + Close() error } @@ -98,14 +102,28 @@ type TermFieldVector struct { End uint64 } +// IndexInternalID is an opaque document identifier interal to the index impl +// This allows us to delay the conversion to public identifier (string) and +// avoid it completely in other cases. It also servces to hide the underlying +// representation of a document identifer, allow more flexibility. +type IndexInternalID interface { + Equals(other IndexInternalID) bool + Compare(other IndexInternalID) int +} + type TermFieldDoc struct { Term string - ID string + ID IndexInternalID Freq uint64 Norm float64 Vectors []*TermFieldVector } +func (tfd *TermFieldDoc) Reset() *TermFieldDoc { + *tfd = TermFieldDoc{} + return tfd +} + // TermFieldReader is the interface exposing the enumeration of documents // containing a given term in a given field. Documents are returned in byte // lexicographic order over their identifiers. @@ -117,7 +135,7 @@ type TermFieldReader interface { // Advance resets the enumeration at specified document or its immediate // follower. - Advance(ID string, preAlloced *TermFieldDoc) (*TermFieldDoc, error) + Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) // Count returns the number of documents contains the term in this field. Count() uint64 @@ -137,15 +155,15 @@ type FieldDict interface { // DocIDReader is the interface exposing enumeration of documents identifiers. // Close the reader to release associated resources. type DocIDReader interface { - // Next returns the next document identifier in ascending lexicographic - // byte order, or io.EOF when the end of the sequence is reached. - Next() (string, error) + // Next returns the next document internal identifier in the natural + // index order, or io.EOF when the end of the sequence is reached. + Next() (IndexInternalID, error) - // Advance resets the iteration to the first identifier greater than or - // equal to ID. If ID is smaller than the start of the range, the iteration + // Advance resets the iteration to the first internal identifier greater than + // or equal to ID. If ID is smaller than the start of the range, the iteration // will start there instead. If ID is greater than or equal to the end of // the range, Next() call will return io.EOF. - Advance(ID string) (string, error) + Advance(ID IndexInternalID) (IndexInternalID, error) Close() error } @@ -200,8 +218,3 @@ func (b *Batch) Reset() { b.IndexOps = make(map[string]*document.Document) b.InternalOps = make(map[string][]byte) } - -func (tfd *TermFieldDoc) Reset() *TermFieldDoc { - *tfd = TermFieldDoc{} - return tfd -} diff --git a/index/upside_down/dump.go b/index/upside_down/dump.go index 023ae458..fd2be837 100644 --- a/index/upside_down/dump.go +++ b/index/upside_down/dump.go @@ -151,7 +151,7 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} { } }() - back, err := udc.backIndexRowForDoc(kvreader, id) + back, err := udc.backIndexRowForDoc(kvreader, []byte(id)) if err != nil { rv <- err return diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index e8047144..ba09ed9d 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -10,6 +10,7 @@ package upside_down import ( + "bytes" "fmt" "github.com/blevesearch/bleve/document" @@ -17,6 +18,20 @@ import ( "github.com/blevesearch/bleve/index/store" ) +type InternalId []byte + +func (u InternalId) Compare(other index.IndexInternalID) int { + if other == nil { + // this internal ID is always greater than nil + return 1 + } + return bytes.Compare(u, other.(InternalId)) +} + +func (u InternalId) Equals(other index.IndexInternalID) bool { + return u.Compare(other.(InternalId)) == 0 +} + type IndexReader struct { index *UpsideDownCouch kvreader store.KVReader @@ -51,10 +66,14 @@ func (i *IndexReader) DocIDReader(start, end string) (index.DocIDReader, error) return newUpsideDownCouchDocIDReader(i, start, end) } +func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { + return newUpsideDownCouchDocIDReaderOnly(i, ids) +} + func (i *IndexReader) Document(id string) (doc *document.Document, err error) { // first hit the back index to confirm doc exists var backIndexRow *BackIndexRow - backIndexRow, err = i.index.backIndexRowForDoc(i.kvreader, id) + backIndexRow, err = i.index.backIndexRowForDoc(i.kvreader, []byte(id)) if err != nil { return } @@ -94,8 +113,8 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { return } -func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { - back, err := i.index.backIndexRowForDoc(i.kvreader, id) +func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { + back, err := i.index.backIndexRowForDoc(i.kvreader, id.(InternalId)) if err != nil { return nil, err } @@ -112,8 +131,8 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { return rv, nil } -func (i *IndexReader) DocumentFieldTermsForFields(id string, fields []string) (index.FieldTerms, error) { - back, err := i.index.backIndexRowForDoc(i.kvreader, id) +func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { + back, err := i.index.backIndexRowForDoc(i.kvreader, id.(InternalId)) if err != nil { return nil, err } @@ -181,6 +200,10 @@ func (i *IndexReader) Close() error { return i.kvreader.Close() } +func (i *IndexReader) FinalizeDocID(id index.IndexInternalID) (string, error) { + return string(id.(InternalId)), nil +} + func incrementBytes(in []byte) []byte { rv := make([]byte, len(in)) copy(rv, in) diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index a973329e..1764dbbc 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -10,6 +10,8 @@ package upside_down import ( + "bytes" + "sort" "sync/atomic" "github.com/blevesearch/bleve/index" @@ -81,7 +83,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* if rv == nil { rv = &index.TermFieldDoc{} } - rv.ID = string(tfr.doc) + rv.ID = InternalId(tfr.doc) rv.Freq = tfr.freq rv.Norm = float64(tfr.norm) if tfr.vectors != nil { @@ -94,9 +96,10 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* return nil, nil } -func (r *UpsideDownCouchTermFieldReader) Advance(docID string, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { +func (r *UpsideDownCouchTermFieldReader) Advance(docIDInternal index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { + docID := docIDInternal.(InternalId) if r.iterator != nil { - tfr := NewTermFrequencyRow(r.term, r.field, []byte(docID), 0, 0) + tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0) r.iterator.Seek(tfr.Key()) key, val, valid := r.iterator.Current() if valid { @@ -108,7 +111,7 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string, preAlloced *index if rv == nil { rv = &index.TermFieldDoc{} } - rv.ID = string(tfr.doc) + rv.ID = InternalId(tfr.doc) rv.Freq = tfr.freq rv.Norm = float64(tfr.norm) if tfr.vectors != nil { @@ -131,6 +134,9 @@ func (r *UpsideDownCouchTermFieldReader) Close() error { type UpsideDownCouchDocIDReader struct { indexReader *IndexReader iterator store.KVIterator + only []string + onlyPos int + onlyMode bool } func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) { @@ -152,37 +158,138 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) }, nil } -func (r *UpsideDownCouchDocIDReader) Next() (string, error) { - key, val, valid := r.iterator.Current() - if valid { - br, err := NewBackIndexRowKV(key, val) - if err != nil { - return "", err - } - rv := string(br.doc) - r.iterator.Next() - return rv, nil +func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) { + // ensure ids are sorted + sort.Strings(ids) + startBytes := []byte{0x0} + if len(ids) > 0 { + startBytes = []byte(ids[0]) } - return "", nil + endBytes := []byte{0xff} + if len(ids) > 0 { + endBytes = incrementBytes([]byte(ids[len(ids)-1])) + } + bisr := NewBackIndexRow(startBytes, nil, nil) + bier := NewBackIndexRow(endBytes, nil, nil) + it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) + + return &UpsideDownCouchDocIDReader{ + indexReader: indexReader, + iterator: it, + only: ids, + onlyMode: true, + }, nil } -func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) { - bir := NewBackIndexRow([]byte(docID), nil, nil) +func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) { + key, val, valid := r.iterator.Current() + + if r.onlyMode { + var rv InternalId + for valid && r.onlyPos < len(r.only) { + br, err := NewBackIndexRowKV(key, val) + if err != nil { + return nil, err + } + if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { + ok := r.nextOnly() + if !ok { + return nil, nil + } + r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) + key, val, valid = r.iterator.Current() + continue + } else { + rv = InternalId(br.doc) + break + } + } + if valid && r.onlyPos < len(r.only) { + ok := r.nextOnly() + if ok { + r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) + } + return rv, nil + } + + } else { + if valid { + br, err := NewBackIndexRowKV(key, val) + if err != nil { + return nil, err + } + rv := InternalId(br.doc) + r.iterator.Next() + return rv, nil + } + } + return nil, nil +} + +func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) { + docIDInternal := docID.(InternalId) + bir := NewBackIndexRow(docIDInternal, nil, nil) r.iterator.Seek(bir.Key()) key, val, valid := r.iterator.Current() - if valid { - br, err := NewBackIndexRowKV(key, val) - if err != nil { - return "", err + r.onlyPos = sort.SearchStrings(r.only, string(docIDInternal)) + + if r.onlyMode { + var rv InternalId + for valid && r.onlyPos < len(r.only) { + br, err := NewBackIndexRowKV(key, val) + if err != nil { + return nil, err + } + if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { + ok := r.nextOnly() + if !ok { + return nil, nil + } + r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) + continue + } else { + rv = InternalId(br.doc) + break + } + } + if valid && r.onlyPos < len(r.only) { + ok := r.nextOnly() + if ok { + r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) + } + return rv, nil + } + } else { + if valid { + br, err := NewBackIndexRowKV(key, val) + if err != nil { + return nil, err + } + rv := InternalId(br.doc) + r.iterator.Next() + return rv, nil } - rv := string(br.doc) - r.iterator.Next() - return rv, nil } - return "", nil + return nil, nil } func (r *UpsideDownCouchDocIDReader) Close() error { atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1)) return r.iterator.Close() } + +// move the r.only pos forward one, skipping duplicates +// return true if there is more data, or false if we got to the end of the list +func (r *UpsideDownCouchDocIDReader) nextOnly() bool { + + // advance 1 position, until we see a different key + // it's already sorted, so this skips duplicates + start := r.onlyPos + r.onlyPos++ + for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] { + start = r.onlyPos + r.onlyPos++ + } + // inidicate if we got to the end of the list + return r.onlyPos < len(r.only) +} diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 62e81398..0a47e853 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -111,7 +111,7 @@ func TestIndexReader(t *testing.T) { } expectedMatch := &index.TermFieldDoc{ - ID: "2", + ID: InternalId("2"), Freq: 1, Norm: 0.5773502588272095, Vectors: []*index.TermFieldVector{ @@ -145,17 +145,17 @@ func TestIndexReader(t *testing.T) { t.Errorf("Error accessing term field reader: %v", err) } - match, err = reader.Advance("2", nil) + match, err = reader.Advance(InternalId("2"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } if match == nil { t.Fatalf("Expected match, got nil") } - if match.ID != "2" { + if !match.ID.Equals(InternalId("2")) { t.Errorf("Expected ID '2', got '%s'", match.ID) } - match, err = reader.Advance("3", nil) + match, err = reader.Advance(InternalId("3"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -183,7 +183,7 @@ func TestIndexReader(t *testing.T) { if match != nil { t.Errorf("expected nil, got %v", match) } - match, err = reader.Advance("anywhere", nil) + match, err = reader.Advance(InternalId("anywhere"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -260,7 +260,7 @@ func TestIndexDocIdReader(t *testing.T) { id, err := reader.Next() count := uint64(0) - for id != "" { + for id != nil { count++ id, err = reader.Next() } @@ -280,19 +280,19 @@ func TestIndexDocIdReader(t *testing.T) { } }() - id, err = reader2.Advance("2") + id, err = reader2.Advance(InternalId("2")) if err != nil { t.Error(err) } - if id != "2" { + if !id.Equals(InternalId("2")) { t.Errorf("expected to find id '2', got '%s'", id) } - id, err = reader2.Advance("3") + id, err = reader2.Advance(InternalId("3")) if err != nil { t.Error(err) } - if id != "" { + if id != nil { t.Errorf("expected to find id '', got '%s'", id) } } diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 7fec5724..2bde3e4f 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -439,7 +439,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvreader, doc.ID) + backIndexRow, err = udc.backIndexRowForDoc(kvreader, InternalId(doc.ID)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) @@ -627,7 +627,7 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvreader, id) + backIndexRow, err = udc.backIndexRowForDoc(kvreader, InternalId(id)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) @@ -695,10 +695,10 @@ func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, return deleteRows } -func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID string) (*BackIndexRow, error) { +func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID InternalId) (*BackIndexRow, error) { // use a temporary row structure to build key tempRow := &BackIndexRow{ - doc: []byte(docID), + doc: docID, } keyBuf := GetRowBuffer() @@ -833,7 +833,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { } for docID, doc := range batch.IndexOps { - backIndexRow, err := udc.backIndexRowForDoc(kvreader, docID) + backIndexRow, err := udc.backIndexRowForDoc(kvreader, InternalId(docID)) if err != nil { docBackIndexRowErr = err return diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 3a1fc004..45b7a6f5 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -663,16 +663,16 @@ func TestIndexBatch(t *testing.T) { if err != nil { t.Error(err) } - docIds := make([]string, 0) + docIds := make([]InternalId, 0) docID, err := docIDReader.Next() - for docID != "" && err == nil { - docIds = append(docIds, docID) + for docID != nil && err == nil { + docIds = append(docIds, docID.(InternalId)) docID, err = docIDReader.Next() } if err != nil { t.Error(err) } - expectedDocIds := []string{"2", "3"} + expectedDocIds := []InternalId{InternalId("2"), InternalId("3")} if !reflect.DeepEqual(docIds, expectedDocIds) { t.Errorf("expected ids: %v, got ids: %v", expectedDocIds, docIds) } @@ -1126,7 +1126,7 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { tfd, err := termFieldReader.Next(nil) for tfd != nil && err == nil { - if tfd.ID != "1" { + if !tfd.ID.Equals(InternalId("1")) { t.Errorf("expected to find document id 1") } tfd, err = termFieldReader.Next(nil) @@ -1179,7 +1179,7 @@ func TestIndexDocumentFieldTerms(t *testing.T) { } }() - fieldTerms, err := indexReader.DocumentFieldTerms("1") + fieldTerms, err := indexReader.DocumentFieldTerms(InternalId("1")) if err != nil { t.Error(err) } diff --git a/index_impl.go b/index_impl.go index f914ec94..66c33f8c 100644 --- a/index_impl.go +++ b/index_impl.go @@ -435,7 +435,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr collector.SetFacetsBuilder(facetsBuilder) } - err = collector.Collect(ctx, searcher) + err = collector.Collect(ctx, searcher, indexReader) if err != nil { return nil, err } diff --git a/search/collector.go b/search/collector.go index 773c8d55..a6d9148c 100644 --- a/search/collector.go +++ b/search/collector.go @@ -12,11 +12,13 @@ package search import ( "time" + "github.com/blevesearch/bleve/index" + "golang.org/x/net/context" ) type Collector interface { - Collect(ctx context.Context, searcher Searcher) error + Collect(ctx context.Context, searcher Searcher, reader index.IndexReader) error Results() DocumentMatchCollection Total() uint64 MaxScore() float64 diff --git a/search/collectors/bench_test.go b/search/collectors/bench_test.go index d8daeb9e..2ae7e02c 100644 --- a/search/collectors/bench_test.go +++ b/search/collectors/bench_test.go @@ -10,10 +10,10 @@ import ( ) func benchHelper(numOfMatches int, collector search.Collector, b *testing.B) { - matches := make(search.DocumentMatchCollection, 0, numOfMatches) + matches := make([]*search.DocumentMatchInternal, 0, numOfMatches) for i := 0; i < numOfMatches; i++ { - matches = append(matches, &search.DocumentMatch{ - ID: strconv.Itoa(i), + matches = append(matches, &search.DocumentMatchInternal{ + ID: testInternalId(strconv.Itoa(i)), Score: rand.Float64(), }) } @@ -24,7 +24,7 @@ func benchHelper(numOfMatches int, collector search.Collector, b *testing.B) { searcher := &stubSearcher{ matches: matches, } - err := collector.Collect(context.Background(), searcher) + err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { b.Fatal(err) } diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go index 90b0c75e..b6e44f03 100644 --- a/search/collectors/collector_top_score.go +++ b/search/collectors/collector_top_score.go @@ -15,6 +15,7 @@ import ( "golang.org/x/net/context" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" ) @@ -27,6 +28,7 @@ type TopScoreCollector struct { minScore float64 total uint64 facetsBuilder *search.FacetsBuilder + actualResults search.DocumentMatchCollection } func NewTopScorerCollector(k int) *TopScoreCollector { @@ -59,11 +61,11 @@ func (tksc *TopScoreCollector) Took() time.Duration { var COLLECT_CHECK_DONE_EVERY = uint64(1024) -func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error { +func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { startTime := time.Now() var err error - var pre search.DocumentMatch // A single pre-alloc'ed, reused instance. - var next *search.DocumentMatch + var pre search.DocumentMatchInternal // A single pre-alloc'ed, reused instance. + var next *search.DocumentMatchInternal select { case <-ctx.Done(): return ctx.Err() @@ -87,6 +89,12 @@ func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Sear } next, err = searcher.Next(pre.Reset()) } + // finalize actual results + tksc.actualResults, err = tksc.finalizeResults(reader) + if err != nil { + return err + } + // compute search duration tksc.took = time.Since(startTime) if err != nil { @@ -95,7 +103,7 @@ func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Sear return nil } -func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) { +func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatchInternal) { // increment total hits tksc.total++ @@ -111,18 +119,18 @@ func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) { // Because the dmIn will be the single, pre-allocated, reused // instance, we need to copy the dmIn into a new, standalone // instance before inserting into our candidate results list. - dm := &search.DocumentMatch{} + dm := &search.DocumentMatchInternal{} *dm = *dmIn for e := tksc.results.Front(); e != nil; e = e.Next() { - curr := e.Value.(*search.DocumentMatch) + curr := e.Value.(*search.DocumentMatchInternal) if dm.Score <= curr.Score { tksc.results.InsertBefore(dm, e) // if we just made the list too long if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head - tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score + tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatchInternal).Score } return } @@ -131,11 +139,15 @@ func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) { tksc.results.PushBack(dm) if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head - tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score + tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatchInternal).Score } } func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection { + return tksc.actualResults +} + +func (tksc *TopScoreCollector) finalizeResults(r index.IndexReader) (search.DocumentMatchCollection, error) { if tksc.results.Len()-tksc.skip > 0 { rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip) i := 0 @@ -145,12 +157,16 @@ func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection { skipped++ continue } - rv[i] = e.Value.(*search.DocumentMatch) + var err error + rv[i], err = e.Value.(*search.DocumentMatchInternal).Finalize(r) + if err != nil { + return nil, err + } i++ } - return rv + return rv, nil } - return search.DocumentMatchCollection{} + return search.DocumentMatchCollection{}, nil } func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { diff --git a/search/collectors/collector_top_score_test.go b/search/collectors/collector_top_score_test.go index 4bf76140..4498dbc8 100644 --- a/search/collectors/collector_top_score_test.go +++ b/search/collectors/collector_top_score_test.go @@ -23,68 +23,68 @@ func TestTop10Scores(t *testing.T) { // the top-10 scores are > 10 // everything else is less than 10 searcher := &stubSearcher{ - matches: search.DocumentMatchCollection{ - &search.DocumentMatch{ - ID: "a", + matches: []*search.DocumentMatchInternal{ + &search.DocumentMatchInternal{ + ID: testInternalId("a"), Score: 11, }, - &search.DocumentMatch{ - ID: "b", + &search.DocumentMatchInternal{ + ID: testInternalId("b"), Score: 9, }, - &search.DocumentMatch{ - ID: "c", + &search.DocumentMatchInternal{ + ID: testInternalId("c"), Score: 11, }, - &search.DocumentMatch{ - ID: "d", + &search.DocumentMatchInternal{ + ID: testInternalId("d"), Score: 9, }, - &search.DocumentMatch{ - ID: "e", + &search.DocumentMatchInternal{ + ID: testInternalId("e"), Score: 11, }, - &search.DocumentMatch{ - ID: "f", + &search.DocumentMatchInternal{ + ID: testInternalId("f"), Score: 9, }, - &search.DocumentMatch{ - ID: "g", + &search.DocumentMatchInternal{ + ID: testInternalId("g"), Score: 11, }, - &search.DocumentMatch{ - ID: "h", + &search.DocumentMatchInternal{ + ID: testInternalId("h"), Score: 9, }, - &search.DocumentMatch{ - ID: "i", + &search.DocumentMatchInternal{ + ID: testInternalId("i"), Score: 11, }, - &search.DocumentMatch{ - ID: "j", + &search.DocumentMatchInternal{ + ID: testInternalId("j"), Score: 11, }, - &search.DocumentMatch{ - ID: "k", + &search.DocumentMatchInternal{ + ID: testInternalId("k"), Score: 11, }, - &search.DocumentMatch{ - ID: "l", + &search.DocumentMatchInternal{ + ID: testInternalId("l"), Score: 99, }, - &search.DocumentMatch{ - ID: "m", + &search.DocumentMatchInternal{ + ID: testInternalId("m"), Score: 11, }, - &search.DocumentMatch{ - ID: "n", + &search.DocumentMatchInternal{ + ID: testInternalId("n"), Score: 11, }, }, } collector := NewTopScorerCollector(10) - err := collector.Collect(context.Background(), searcher) + err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } @@ -131,68 +131,68 @@ func TestTop10ScoresSkip10(t *testing.T) { // the top-10 scores are > 10 // everything else is less than 10 searcher := &stubSearcher{ - matches: search.DocumentMatchCollection{ - &search.DocumentMatch{ - ID: "a", + matches: []*search.DocumentMatchInternal{ + &search.DocumentMatchInternal{ + ID: testInternalId("a"), Score: 11, }, - &search.DocumentMatch{ - ID: "b", + &search.DocumentMatchInternal{ + ID: testInternalId("b"), Score: 9.5, }, - &search.DocumentMatch{ - ID: "c", + &search.DocumentMatchInternal{ + ID: testInternalId("c"), Score: 11, }, - &search.DocumentMatch{ - ID: "d", + &search.DocumentMatchInternal{ + ID: testInternalId("d"), Score: 9, }, - &search.DocumentMatch{ - ID: "e", + &search.DocumentMatchInternal{ + ID: testInternalId("e"), Score: 11, }, - &search.DocumentMatch{ - ID: "f", + &search.DocumentMatchInternal{ + ID: testInternalId("f"), Score: 9, }, - &search.DocumentMatch{ - ID: "g", + &search.DocumentMatchInternal{ + ID: testInternalId("g"), Score: 11, }, - &search.DocumentMatch{ - ID: "h", + &search.DocumentMatchInternal{ + ID: testInternalId("h"), Score: 9, }, - &search.DocumentMatch{ - ID: "i", + &search.DocumentMatchInternal{ + ID: testInternalId("i"), Score: 11, }, - &search.DocumentMatch{ - ID: "j", + &search.DocumentMatchInternal{ + ID: testInternalId("j"), Score: 11, }, - &search.DocumentMatch{ - ID: "k", + &search.DocumentMatchInternal{ + ID: testInternalId("k"), Score: 11, }, - &search.DocumentMatch{ - ID: "l", + &search.DocumentMatchInternal{ + ID: testInternalId("l"), Score: 99, }, - &search.DocumentMatch{ - ID: "m", + &search.DocumentMatchInternal{ + ID: testInternalId("m"), Score: 11, }, - &search.DocumentMatch{ - ID: "n", + &search.DocumentMatchInternal{ + ID: testInternalId("n"), Score: 11, }, }, } collector := NewTopScorerSkipCollector(10, 10) - err := collector.Collect(context.Background(), searcher) + err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } @@ -227,61 +227,61 @@ func TestPaginationSameScores(t *testing.T) { // a stub search with more than 10 matches // all documents have the same score searcher := &stubSearcher{ - matches: search.DocumentMatchCollection{ - &search.DocumentMatch{ - ID: "a", + matches: []*search.DocumentMatchInternal{ + &search.DocumentMatchInternal{ + ID: testInternalId("a"), Score: 5, }, - &search.DocumentMatch{ - ID: "b", + &search.DocumentMatchInternal{ + ID: testInternalId("b"), Score: 5, }, - &search.DocumentMatch{ - ID: "c", + &search.DocumentMatchInternal{ + ID: testInternalId("c"), Score: 5, }, - &search.DocumentMatch{ - ID: "d", + &search.DocumentMatchInternal{ + ID: testInternalId("d"), Score: 5, }, - &search.DocumentMatch{ - ID: "e", + &search.DocumentMatchInternal{ + ID: testInternalId("e"), Score: 5, }, - &search.DocumentMatch{ - ID: "f", + &search.DocumentMatchInternal{ + ID: testInternalId("f"), Score: 5, }, - &search.DocumentMatch{ - ID: "g", + &search.DocumentMatchInternal{ + ID: testInternalId("g"), Score: 5, }, - &search.DocumentMatch{ - ID: "h", + &search.DocumentMatchInternal{ + ID: testInternalId("h"), Score: 5, }, - &search.DocumentMatch{ - ID: "i", + &search.DocumentMatchInternal{ + ID: testInternalId("i"), Score: 5, }, - &search.DocumentMatch{ - ID: "j", + &search.DocumentMatchInternal{ + ID: testInternalId("j"), Score: 5, }, - &search.DocumentMatch{ - ID: "k", + &search.DocumentMatchInternal{ + ID: testInternalId("k"), Score: 5, }, - &search.DocumentMatch{ - ID: "l", + &search.DocumentMatchInternal{ + ID: testInternalId("l"), Score: 5, }, - &search.DocumentMatch{ - ID: "m", + &search.DocumentMatchInternal{ + ID: testInternalId("m"), Score: 5, }, - &search.DocumentMatch{ - ID: "n", + &search.DocumentMatchInternal{ + ID: testInternalId("n"), Score: 5, }, }, @@ -289,7 +289,7 @@ func TestPaginationSameScores(t *testing.T) { // first get first 5 hits collector := NewTopScorerSkipCollector(5, 0) - err := collector.Collect(context.Background(), searcher) + err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } @@ -313,61 +313,61 @@ func TestPaginationSameScores(t *testing.T) { // a stub search with more than 10 matches // all documents have the same score searcher = &stubSearcher{ - matches: search.DocumentMatchCollection{ - &search.DocumentMatch{ - ID: "a", + matches: []*search.DocumentMatchInternal{ + &search.DocumentMatchInternal{ + ID: testInternalId("a"), Score: 5, }, - &search.DocumentMatch{ - ID: "b", + &search.DocumentMatchInternal{ + ID: testInternalId("b"), Score: 5, }, - &search.DocumentMatch{ - ID: "c", + &search.DocumentMatchInternal{ + ID: testInternalId("c"), Score: 5, }, - &search.DocumentMatch{ - ID: "d", + &search.DocumentMatchInternal{ + ID: testInternalId("d"), Score: 5, }, - &search.DocumentMatch{ - ID: "e", + &search.DocumentMatchInternal{ + ID: testInternalId("e"), Score: 5, }, - &search.DocumentMatch{ - ID: "f", + &search.DocumentMatchInternal{ + ID: testInternalId("f"), Score: 5, }, - &search.DocumentMatch{ - ID: "g", + &search.DocumentMatchInternal{ + ID: testInternalId("g"), Score: 5, }, - &search.DocumentMatch{ - ID: "h", + &search.DocumentMatchInternal{ + ID: testInternalId("h"), Score: 5, }, - &search.DocumentMatch{ - ID: "i", + &search.DocumentMatchInternal{ + ID: testInternalId("i"), Score: 5, }, - &search.DocumentMatch{ - ID: "j", + &search.DocumentMatchInternal{ + ID: testInternalId("j"), Score: 5, }, - &search.DocumentMatch{ - ID: "k", + &search.DocumentMatchInternal{ + ID: testInternalId("k"), Score: 5, }, - &search.DocumentMatch{ - ID: "l", + &search.DocumentMatchInternal{ + ID: testInternalId("l"), Score: 5, }, - &search.DocumentMatch{ - ID: "m", + &search.DocumentMatchInternal{ + ID: testInternalId("m"), Score: 5, }, - &search.DocumentMatch{ - ID: "n", + &search.DocumentMatchInternal{ + ID: testInternalId("n"), Score: 5, }, }, @@ -375,7 +375,7 @@ func TestPaginationSameScores(t *testing.T) { // now get next 5 hits collector = NewTopScorerSkipCollector(5, 5) - err = collector.Collect(context.Background(), searcher) + err = collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } diff --git a/search/collectors/search_test.go b/search/collectors/search_test.go index 629a1eca..acb58957 100644 --- a/search/collectors/search_test.go +++ b/search/collectors/search_test.go @@ -10,15 +10,19 @@ package collectors import ( + "bytes" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" ) type stubSearcher struct { index int - matches search.DocumentMatchCollection + matches []*search.DocumentMatchInternal } -func (ss *stubSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (ss *stubSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if ss.index < len(ss.matches) { rv := ss.matches[ss.index] ss.index++ @@ -27,9 +31,9 @@ func (ss *stubSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docume return nil, nil } -func (ss *stubSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (ss *stubSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { - for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID { + for ss.index < len(ss.matches) && ss.matches[ss.index].ID.Compare(ID) < 0 { ss.index++ } if ss.index < len(ss.matches) { @@ -58,3 +62,71 @@ func (ss *stubSearcher) Count() uint64 { func (ss *stubSearcher) Min() int { return 0 } + +type testInternalId []byte + +func (t testInternalId) Compare(other index.IndexInternalID) int { + return bytes.Compare(t, other.(testInternalId)) +} + +func (t testInternalId) Equals(other index.IndexInternalID) bool { + return t.Compare(other.(testInternalId)) == 0 +} + +type stubReader struct{} + +func (sr *stubReader) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { + return nil, nil +} + +func (sr *stubReader) DocIDReader(start, end string) (index.DocIDReader, error) { + return nil, nil +} + +func (sr *stubReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { + return nil, nil +} + +func (sr *stubReader) FieldDict(field string) (index.FieldDict, error) { + return nil, nil +} + +func (sr *stubReader) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { + return nil, nil +} + +func (sr *stubReader) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { + return nil, nil +} + +func (sr *stubReader) Document(id string) (*document.Document, error) { + return nil, nil +} + +func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { + return nil, nil +} + +func (sr *stubReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { + return nil, nil +} + +func (sr *stubReader) Fields() ([]string, error) { + return nil, nil +} + +func (sr *stubReader) GetInternal(key []byte) ([]byte, error) { + return nil, nil +} + +func (sr *stubReader) DocCount() uint64 { + return 0 +} + +func (sr *stubReader) FinalizeDocID(id index.IndexInternalID) (string, error) { + return string(id.(testInternalId)), nil +} + +func (sr *stubReader) Close() error { + return nil +} diff --git a/search/facets_builder.go b/search/facets_builder.go index 4d52ec2b..c0c203cc 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -37,7 +37,7 @@ func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { fb.facets[name] = facetBuilder } -func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { +func (fb *FacetsBuilder) Update(docMatch *DocumentMatchInternal) error { var fields []string for _, facetBuilder := range fb.facets { fields = append(fields, facetBuilder.Field()) diff --git a/search/scorers/scorer_conjunction.go b/search/scorers/scorer_conjunction.go index 422f282c..94a5eb62 100644 --- a/search/scorers/scorer_conjunction.go +++ b/search/scorers/scorer_conjunction.go @@ -23,8 +23,8 @@ func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer { } } -func (s *ConjunctionQueryScorer) Score(constituents []*search.DocumentMatch) *search.DocumentMatch { - rv := search.DocumentMatch{ +func (s *ConjunctionQueryScorer) Score(constituents []*search.DocumentMatchInternal) *search.DocumentMatchInternal { + rv := search.DocumentMatchInternal{ ID: constituents[0].ID, } diff --git a/search/scorers/scorer_constant.go b/search/scorers/scorer_constant.go index 1434bd5e..86717e9f 100644 --- a/search/scorers/scorer_constant.go +++ b/search/scorers/scorer_constant.go @@ -12,6 +12,7 @@ package scorers import ( "fmt" + "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" ) @@ -64,7 +65,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) { } } -func (s *ConstantScorer) Score(id string) *search.DocumentMatch { +func (s *ConstantScorer) Score(id index.IndexInternalID) *search.DocumentMatchInternal { var scoreExplanation *search.Explanation score := s.constant @@ -91,7 +92,7 @@ func (s *ConstantScorer) Score(id string) *search.DocumentMatch { } } - rv := search.DocumentMatch{ + rv := search.DocumentMatchInternal{ ID: id, Score: score, } diff --git a/search/scorers/scorer_constant_test.go b/search/scorers/scorer_constant_test.go index 4c8740e0..8f50c5d3 100644 --- a/search/scorers/scorer_constant_test.go +++ b/search/scorers/scorer_constant_test.go @@ -23,12 +23,12 @@ func TestConstantScorer(t *testing.T) { tests := []struct { termMatch *index.TermFieldDoc - result *search.DocumentMatch + result *search.DocumentMatchInternal }{ // test some simple math { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 1, Norm: 1.0, Vectors: []*index.TermFieldVector{ @@ -40,8 +40,8 @@ func TestConstantScorer(t *testing.T) { }, }, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: 1.0, Expl: &search.Explanation{ Value: 1.0, @@ -68,16 +68,16 @@ func TestConstantScorerWithQueryNorm(t *testing.T) { tests := []struct { termMatch *index.TermFieldDoc - result *search.DocumentMatch + result *search.DocumentMatchInternal }{ { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 1, Norm: 1.0, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: 2.0, Expl: &search.Explanation{ Value: 2.0, diff --git a/search/scorers/scorer_disjunction.go b/search/scorers/scorer_disjunction.go index 00bc8cd0..0e5dc3e5 100644 --- a/search/scorers/scorer_disjunction.go +++ b/search/scorers/scorer_disjunction.go @@ -25,8 +25,8 @@ func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer { } } -func (s *DisjunctionQueryScorer) Score(constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch { - rv := search.DocumentMatch{ +func (s *DisjunctionQueryScorer) Score(constituents []*search.DocumentMatchInternal, countMatch, countTotal int) *search.DocumentMatchInternal { + rv := search.DocumentMatchInternal{ ID: constituents[0].ID, } diff --git a/search/scorers/scorer_term.go b/search/scorers/scorer_term.go index ce926221..ee6b6a9b 100644 --- a/search/scorers/scorer_term.go +++ b/search/scorers/scorer_term.go @@ -83,7 +83,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { } } -func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc, preAllocated *search.DocumentMatch) *search.DocumentMatch { +func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc, preAllocated *search.DocumentMatchInternal) *search.DocumentMatchInternal { var scoreExplanation *search.Explanation // need to compute score @@ -130,7 +130,7 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc, preAllocated *sea rv := preAllocated if rv == nil { - rv = &search.DocumentMatch{} + rv = &search.DocumentMatchInternal{} } rv.ID = termMatch.ID rv.Score = score diff --git a/search/scorers/scorer_term_test.go b/search/scorers/scorer_term_test.go index 612b02d9..43950fc6 100644 --- a/search/scorers/scorer_term_test.go +++ b/search/scorers/scorer_term_test.go @@ -10,6 +10,7 @@ package scorers import ( + "bytes" "math" "reflect" "testing" @@ -18,6 +19,16 @@ import ( "github.com/blevesearch/bleve/search" ) +type testInternalId []byte + +func (t testInternalId) Compare(other index.IndexInternalID) int { + return bytes.Compare(t, other.(testInternalId)) +} + +func (t testInternalId) Equals(other index.IndexInternalID) bool { + return t.Compare(other.(testInternalId)) == 0 +} + func TestTermScorer(t *testing.T) { var docTotal uint64 = 100 @@ -30,12 +41,12 @@ func TestTermScorer(t *testing.T) { tests := []struct { termMatch *index.TermFieldDoc - result *search.DocumentMatch + result *search.DocumentMatchInternal }{ // test some simple math { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 1, Norm: 1.0, Vectors: []*index.TermFieldVector{ @@ -47,8 +58,8 @@ func TestTermScorer(t *testing.T) { }, }, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: math.Sqrt(1.0) * idf, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, @@ -84,12 +95,12 @@ func TestTermScorer(t *testing.T) { // test the same thing again (score should be cached this time) { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 1, Norm: 1.0, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: math.Sqrt(1.0) * idf, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, @@ -114,12 +125,12 @@ func TestTermScorer(t *testing.T) { // test a case where the sqrt isn't precalculated { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 65, Norm: 1.0, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: math.Sqrt(65) * idf, Expl: &search.Explanation{ Value: math.Sqrt(65) * idf, @@ -173,16 +184,16 @@ func TestTermScorerWithQueryNorm(t *testing.T) { tests := []struct { termMatch *index.TermFieldDoc - result *search.DocumentMatch + result *search.DocumentMatchInternal }{ { termMatch: &index.TermFieldDoc{ - ID: "one", + ID: testInternalId("one"), Freq: 1, Norm: 1.0, }, - result: &search.DocumentMatch{ - ID: "one", + result: &search.DocumentMatchInternal{ + ID: testInternalId("one"), Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, diff --git a/search/search.go b/search/search.go index 4b3b988b..ca1726cb 100644 --- a/search/search.go +++ b/search/search.go @@ -9,6 +9,8 @@ package search +import "github.com/blevesearch/bleve/index" + type Location struct { Pos float64 `json:"pos"` Start float64 `json:"start"` @@ -50,6 +52,40 @@ type FieldTermLocationMap map[string]TermLocationMap type FieldFragmentMap map[string][]string +type DocumentMatchInternal struct { + Index string + ID index.IndexInternalID + Score float64 + Expl *Explanation + Locations FieldTermLocationMap + Fragments FieldFragmentMap + + // Fields contains the values for document fields listed in + // SearchRequest.Fields. Text fields are returned as strings, numeric + // fields as float64s and date fields as time.RFC3339 formatted strings. + Fields map[string]interface{} +} + +func (dm *DocumentMatchInternal) Reset() *DocumentMatchInternal { + *dm = DocumentMatchInternal{} + return dm +} + +func (dm *DocumentMatchInternal) Finalize(r index.IndexReader) (rv *DocumentMatch, err error) { + rv = &DocumentMatch{} + rv.ID, err = r.FinalizeDocID(dm.ID) + if err != nil { + return nil, err + } + rv.Index = dm.Index + rv.Expl = dm.Expl + rv.Fields = dm.Fields + rv.Fragments = dm.Fragments + rv.Locations = dm.Locations + rv.Score = dm.Score + return rv, nil +} + type DocumentMatch struct { Index string `json:"index,omitempty"` ID string `json:"id"` @@ -97,8 +133,8 @@ func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score } type Searcher interface { - Next(preAllocated *DocumentMatch) (*DocumentMatch, error) - Advance(ID string, preAllocated *DocumentMatch) (*DocumentMatch, error) + Next(preAllocated *DocumentMatchInternal) (*DocumentMatchInternal, error) + Advance(ID index.IndexInternalID, preAllocated *DocumentMatchInternal) (*DocumentMatchInternal, error) Close() error Weight() float64 SetQueryNorm(float64) diff --git a/search/searchers/search_boolean.go b/search/searchers/search_boolean.go index 0ae96483..b01e6f23 100644 --- a/search/searchers/search_boolean.go +++ b/search/searchers/search_boolean.go @@ -24,10 +24,10 @@ type BooleanSearcher struct { shouldSearcher search.Searcher mustNotSearcher search.Searcher queryNorm float64 - currMust *search.DocumentMatch - currShould *search.DocumentMatch - currMustNot *search.DocumentMatch - currentID string + currMust *search.DocumentMatchInternal + currShould *search.DocumentMatchInternal + currMustNot *search.DocumentMatchInternal + currentID index.IndexInternalID min uint64 scorer *scorers.ConjunctionQueryScorer } @@ -95,7 +95,7 @@ func (s *BooleanSearcher) initSearchers() error { } else if s.mustSearcher == nil && s.currShould != nil { s.currentID = s.currShould.ID } else { - s.currentID = "" + s.currentID = nil } s.initialized = true @@ -122,7 +122,7 @@ func (s *BooleanSearcher) advanceNextMust() error { } else if s.mustSearcher == nil && s.currShould != nil { s.currentID = s.currShould.ID } else { - s.currentID = "" + s.currentID = nil } return nil } @@ -148,7 +148,7 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { } } -func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() @@ -158,16 +158,16 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu } var err error - var rv *search.DocumentMatch + var rv *search.DocumentMatchInternal - for s.currentID != "" { - if s.currMustNot != nil && s.currMustNot.ID < s.currentID { + for s.currentID != nil { + if s.currMustNot != nil && s.currMustNot.ID.Compare(s.currentID) < 0 { // advance must not searcher to our candidate entry s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID, nil) if err != nil { return nil, err } - if s.currMustNot != nil && s.currMustNot.ID == s.currentID { + if s.currMustNot != nil && s.currMustNot.ID.Equals(s.currentID) { // the candidate is excluded err = s.advanceNextMust() if err != nil { @@ -175,7 +175,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu } continue } - } else if s.currMustNot != nil && s.currMustNot.ID == s.currentID { + } else if s.currMustNot != nil && s.currMustNot.ID.Equals(s.currentID) { // the candidate is excluded err = s.advanceNextMust() if err != nil { @@ -184,22 +184,22 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu continue } - if s.currShould != nil && s.currShould.ID < s.currentID { + if s.currShould != nil && s.currShould.ID.Compare(s.currentID) < 0 { // advance should searcher to our candidate entry s.currShould, err = s.shouldSearcher.Advance(s.currentID, nil) if err != nil { return nil, err } - if s.currShould != nil && s.currShould.ID == s.currentID { + if s.currShould != nil && s.currShould.ID.Equals(s.currentID) { // score bonus matches should - var cons []*search.DocumentMatch + var cons []*search.DocumentMatchInternal if s.currMust != nil { - cons = []*search.DocumentMatch{ + cons = []*search.DocumentMatchInternal{ s.currMust, s.currShould, } } else { - cons = []*search.DocumentMatch{ + cons = []*search.DocumentMatchInternal{ s.currShould, } } @@ -211,23 +211,23 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu break } else if s.shouldSearcher.Min() == 0 { // match is OK anyway - rv = s.scorer.Score([]*search.DocumentMatch{s.currMust}) + rv = s.scorer.Score([]*search.DocumentMatchInternal{s.currMust}) err = s.advanceNextMust() if err != nil { return nil, err } break } - } else if s.currShould != nil && s.currShould.ID == s.currentID { + } else if s.currShould != nil && s.currShould.ID.Equals(s.currentID) { // score bonus matches should - var cons []*search.DocumentMatch + var cons []*search.DocumentMatchInternal if s.currMust != nil { - cons = []*search.DocumentMatch{ + cons = []*search.DocumentMatchInternal{ s.currMust, s.currShould, } } else { - cons = []*search.DocumentMatch{ + cons = []*search.DocumentMatchInternal{ s.currShould, } } @@ -239,7 +239,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu break } else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 { // match is OK anyway - rv = s.scorer.Score([]*search.DocumentMatch{s.currMust}) + rv = s.scorer.Score([]*search.DocumentMatchInternal{s.currMust}) err = s.advanceNextMust() if err != nil { return nil, err @@ -255,7 +255,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu return rv, nil } -func (s *BooleanSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *BooleanSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() @@ -289,7 +289,7 @@ func (s *BooleanSearcher) Advance(ID string, preAllocated *search.DocumentMatch) } else if s.mustSearcher == nil && s.currShould != nil { s.currentID = s.currShould.ID } else { - s.currentID = "" + s.currentID = nil } return s.Next(preAllocated) diff --git a/search/searchers/search_boolean_test.go b/search/searchers/search_boolean_test.go index ac6dc7ea..a09da69b 100644 --- a/search/searchers/search_boolean_test.go +++ b/search/searchers/search_boolean_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -242,34 +243,34 @@ func TestBooleanSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: booleanSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 0.9818005051949021, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.808709699395535, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 0.34618161159873423, }, }, }, { searcher: booleanSearcher2, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 0.6775110856165737, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.6775110856165737, }, }, @@ -277,57 +278,57 @@ func TestBooleanSearch(t *testing.T) { // no MUST or SHOULD clauses yields no results { searcher: booleanSearcher3, - results: []*search.DocumentMatch{}, + results: []*search.DocumentMatchInternal{}, }, { searcher: booleanSearcher4, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 1.0, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.5, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 1.0, }, }, }, { searcher: booleanSearcher5, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.5, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 1.0, }, }, }, { searcher: booleanSearcher6, - results: []*search.DocumentMatch{}, + results: []*search.DocumentMatchInternal{}, }, // test a conjunction query with a nested boolean { searcher: conjunctionSearcher7, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 2.0097428702814377, }, }, }, { searcher: conjunctionSearcher8, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "3", + ID: upside_down.InternalId("3"), Score: 2.0681575785068107, }, }, @@ -346,7 +347,7 @@ func TestBooleanSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { diff --git a/search/searchers/search_conjunction.go b/search/searchers/search_conjunction.go index fe37b80b..031a977b 100644 --- a/search/searchers/search_conjunction.go +++ b/search/searchers/search_conjunction.go @@ -24,8 +24,8 @@ type ConjunctionSearcher struct { searchers OrderedSearcherList explain bool queryNorm float64 - currs []*search.DocumentMatch - currentID string + currs []*search.DocumentMatchInternal + currentID index.IndexInternalID scorer *scorers.ConjunctionQueryScorer } @@ -42,7 +42,7 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S indexReader: indexReader, explain: explain, searchers: searchers, - currs: make([]*search.DocumentMatch, len(searchers)), + currs: make([]*search.DocumentMatchInternal, len(searchers)), scorer: scorers.NewConjunctionQueryScorer(explain), } rv.computeQueryNorm() @@ -77,7 +77,7 @@ func (s *ConjunctionSearcher) initSearchers() error { if s.currs[0] != nil { s.currentID = s.currs[0].ID } else { - s.currentID = "" + s.currentID = nil } } @@ -99,20 +99,20 @@ func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) { } } -func (s *ConjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *ConjunctionSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { return nil, err } } - var rv *search.DocumentMatch + var rv *search.DocumentMatchInternal var err error OUTER: - for s.currentID != "" { + for s.currentID != nil { for i, termSearcher := range s.searchers { - if s.currs[i] != nil && s.currs[i].ID != s.currentID { - if s.currentID < s.currs[i].ID { + if s.currs[i] != nil && !s.currs[i].ID.Equals(s.currentID) { + if s.currentID.Compare(s.currs[i].ID) < 0 { s.currentID = s.currs[i].ID continue OUTER } @@ -122,17 +122,17 @@ OUTER: return nil, err } if s.currs[i] == nil { - s.currentID = "" + s.currentID = nil continue OUTER } - if s.currs[i].ID != s.currentID { + if !s.currs[i].ID.Equals(s.currentID) { // we just advanced, so it doesn't match, it must be greater // no need to call next s.currentID = s.currs[i].ID continue OUTER } } else if s.currs[i] == nil { - s.currentID = "" + s.currentID = nil continue OUTER } } @@ -145,7 +145,7 @@ OUTER: return nil, err } if s.currs[0] == nil { - s.currentID = "" + s.currentID = nil } else { s.currentID = s.currs[0].ID } @@ -155,7 +155,7 @@ OUTER: return rv, nil } -func (s *ConjunctionSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *ConjunctionSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { diff --git a/search/searchers/search_conjunction_test.go b/search/searchers/search_conjunction_test.go index 1256059b..f4baa19d 100644 --- a/search/searchers/search_conjunction_test.go +++ b/search/searchers/search_conjunction_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -122,57 +123,57 @@ func TestConjunctionSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: beerAndMartySearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 2.0097428702814377, }, }, }, { searcher: angstAndBeerSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.0807601687084403, }, }, }, { searcher: beerAndJackSearcher, - results: []*search.DocumentMatch{}, + results: []*search.DocumentMatchInternal{}, }, { searcher: beerAndMisterSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.2877980334016337, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 1.2877980334016337, }, }, }, { searcher: couchbaseAndMisterSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.4436599157093672, }, }, }, { searcher: beerAndCouchbaseAndMisterSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.441614953806971, }, }, @@ -191,7 +192,7 @@ func TestConjunctionSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { diff --git a/search/searchers/search_disjunction.go b/search/searchers/search_disjunction.go index d7cd9408..061ac9d2 100644 --- a/search/searchers/search_disjunction.go +++ b/search/searchers/search_disjunction.go @@ -29,8 +29,8 @@ type DisjunctionSearcher struct { indexReader index.IndexReader searchers OrderedSearcherList queryNorm float64 - currs []*search.DocumentMatch - currentID string + currs []*search.DocumentMatchInternal + currentID index.IndexInternalID scorer *scorers.DisjunctionQueryScorer min float64 } @@ -61,7 +61,7 @@ func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S rv := DisjunctionSearcher{ indexReader: indexReader, searchers: searchers, - currs: make([]*search.DocumentMatch, len(searchers)), + currs: make([]*search.DocumentMatchInternal, len(searchers)), scorer: scorers.NewDisjunctionQueryScorer(explain), min: min, } @@ -98,10 +98,10 @@ func (s *DisjunctionSearcher) initSearchers() error { return nil } -func (s *DisjunctionSearcher) nextSmallestID() string { - rv := "" +func (s *DisjunctionSearcher) nextSmallestID() index.IndexInternalID { + var rv index.IndexInternalID for _, curr := range s.currs { - if curr != nil && (curr.ID < rv || rv == "") { + if curr != nil && (curr.ID.Compare(rv) < 0 || rv == nil) { rv = curr.ID } } @@ -122,7 +122,7 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) { } } -func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -130,13 +130,13 @@ func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search. } } var err error - var rv *search.DocumentMatch - matching := make([]*search.DocumentMatch, 0, len(s.searchers)) + var rv *search.DocumentMatchInternal + matching := make([]*search.DocumentMatchInternal, 0, len(s.searchers)) found := false - for !found && s.currentID != "" { + for !found && s.currentID != nil { for _, curr := range s.currs { - if curr != nil && curr.ID == s.currentID { + if curr != nil && curr.ID.Equals(s.currentID) { matching = append(matching, curr) } } @@ -148,10 +148,10 @@ func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search. } // reset matching - matching = make([]*search.DocumentMatch, 0) + matching = make([]*search.DocumentMatchInternal, 0) // invoke next on all the matching searchers for i, curr := range s.currs { - if curr != nil && curr.ID == s.currentID { + if curr != nil && curr.ID.Equals(s.currentID) { searcher := s.searchers[i] s.currs[i], err = searcher.Next(nil) if err != nil { @@ -164,7 +164,7 @@ func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search. return rv, nil } -func (s *DisjunctionSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *DisjunctionSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { diff --git a/search/searchers/search_disjunction_test.go b/search/searchers/search_disjunction_test.go index 19b0dd42..231bfb0e 100644 --- a/search/searchers/search_disjunction_test.go +++ b/search/searchers/search_disjunction_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -65,17 +66,17 @@ func TestDisjunctionSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: martyOrDustinSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 0.6775110856165737, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.6775110856165737, }, }, @@ -83,17 +84,17 @@ func TestDisjunctionSearch(t *testing.T) { // test a nested disjunction { searcher: nestedRaviOrMartyOrDustinSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 0.2765927424732821, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.2765927424732821, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 0.5531854849465642, }, }, @@ -112,7 +113,7 @@ func TestDisjunctionSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { @@ -158,7 +159,7 @@ func TestDisjunctionAdvance(t *testing.T) { t.Fatal(err) } - match, err := martyOrDustinSearcher.Advance("3", nil) + match, err := martyOrDustinSearcher.Advance(upside_down.InternalId("3"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/search/searchers/search_docid.go b/search/searchers/search_docid.go index 262583d0..72c489cf 100644 --- a/search/searchers/search_docid.go +++ b/search/searchers/search_docid.go @@ -10,8 +10,6 @@ package searchers import ( - "sort" - "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorers" @@ -19,54 +17,61 @@ import ( // DocIDSearcher returns documents matching a predefined set of identifiers. type DocIDSearcher struct { - ids []string - current int - scorer *scorers.ConstantScorer + reader index.DocIDReader + scorer *scorers.ConstantScorer + count int } func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (searcher *DocIDSearcher, err error) { - kept := make([]string, len(ids)) - copy(kept, ids) - sort.Strings(kept) + // kept := make([]string, len(ids)) + // copy(kept, ids) + // sort.Strings(kept) + // + // if len(ids) > 0 { + // var idReader index.DocIDReader + // endTerm := string(incrementBytes([]byte(kept[len(kept)-1]))) + // idReader, err = indexReader.DocIDReader(kept[0], endTerm) + // if err != nil { + // return nil, err + // } + // defer func() { + // if cerr := idReader.Close(); err == nil && cerr != nil { + // err = cerr + // } + // }() + // j := 0 + // for _, id := range kept { + // doc, err := idReader.Next() + // if err != nil { + // return nil, err + // } + // // Non-duplicate match + // actualDocID := indexReader.FinalizeDocID(doc) + // if actualDocID == id && (j == 0 || kept[j-1] != id) { + // kept[j] = id + // j++ + // } + // } + // kept = kept[:j] + // } - if len(ids) > 0 { - var idReader index.DocIDReader - endTerm := string(incrementBytes([]byte(kept[len(kept)-1]))) - idReader, err = indexReader.DocIDReader(kept[0], endTerm) - if err != nil { - return nil, err - } - defer func() { - if cerr := idReader.Close(); err == nil && cerr != nil { - err = cerr - } - }() - j := 0 - for _, id := range kept { - doc, err := idReader.Advance(id) - if err != nil { - return nil, err - } - // Non-duplicate match - if doc == id && (j == 0 || kept[j-1] != id) { - kept[j] = id - j++ - } - } - kept = kept[:j] + reader, err := indexReader.DocIDReaderOnly(ids) + if err != nil { + return nil, err } - scorer := scorers.NewConstantScorer(1.0, boost, explain) return &DocIDSearcher{ - ids: kept, scorer: scorer, + reader: reader, + count: len(ids), }, nil } func (s *DocIDSearcher) Count() uint64 { - return uint64(len(s.ids)) + // return uint64(len(s.ids)) + return uint64(s.count) } func (s *DocIDSearcher) Weight() float64 { @@ -77,20 +82,41 @@ func (s *DocIDSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *DocIDSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { - if s.current >= len(s.ids) { +func (s *DocIDSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { + // if s.current >= len(s.ids) { + // return nil, nil + // } + // id := s.ids[s.current] + // s.current++ + // docMatch := s.scorer.Score(id) + // return docMatch, nil + + docidMatch, err := s.reader.Next() + if err != nil { + return nil, err + } + if docidMatch == nil { return nil, nil } - id := s.ids[s.current] - s.current++ - docMatch := s.scorer.Score(id) - return docMatch, nil + docMatch := s.scorer.Score(docidMatch) + return docMatch, nil } -func (s *DocIDSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { - s.current = sort.SearchStrings(s.ids, ID) - return s.Next(preAllocated) +func (s *DocIDSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { + // s.current = sort.SearchStrings(s.ids, ID) + // return s.Next(preAllocated) + + docidMatch, err := s.reader.Advance(ID) + if err != nil { + return nil, err + } + if docidMatch == nil { + return nil, nil + } + + docMatch := s.scorer.Score(docidMatch) + return docMatch, nil } func (s *DocIDSearcher) Close() error { diff --git a/search/searchers/search_docid_test.go b/search/searchers/search_docid_test.go index ad89288b..271a25bc 100644 --- a/search/searchers/search_docid_test.go +++ b/search/searchers/search_docid_test.go @@ -62,17 +62,13 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { } }() - if searcher.Count() != uint64(len(wanted)) { - t.Fatalf("expected count %v got %v", len(wanted), searcher.Count()) - } - // Check the sequence for i, id := range wanted { m, err := searcher.Next(nil) if err != nil { t.Fatal(err) } - if id != m.ID { + if !upside_down.InternalId(id).Equals(m.ID) { t.Fatalf("expected %v at position %v, got %v", id, i, m.ID) } } @@ -91,18 +87,18 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { } before := id[:1] for _, target := range []string{before, id} { - m, err := searcher.Advance(target, nil) + m, err := searcher.Advance(upside_down.InternalId(target), nil) if err != nil { t.Fatal(err) } - if m == nil || m.ID != id { + if m == nil || !m.ID.Equals(upside_down.InternalId(id)) { t.Fatalf("advancing to %v returned %v instead of %v", before, m, id) } } } // Seek after the end of the sequence after := "zzz" - m, err = searcher.Advance(after, nil) + m, err = searcher.Advance(upside_down.InternalId(after), nil) if err != nil { t.Fatal(err) } diff --git a/search/searchers/search_fuzzy.go b/search/searchers/search_fuzzy.go index da328a7e..9f42995c 100644 --- a/search/searchers/search_fuzzy.go +++ b/search/searchers/search_fuzzy.go @@ -107,12 +107,12 @@ func (s *FuzzySearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *FuzzySearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *FuzzySearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Next(preAllocated) } -func (s *FuzzySearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *FuzzySearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Advance(ID, preAllocated) } diff --git a/search/searchers/search_fuzzy_test.go b/search/searchers/search_fuzzy_test.go index 610367de..52638044 100644 --- a/search/searchers/search_fuzzy_test.go +++ b/search/searchers/search_fuzzy_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -50,47 +51,47 @@ func TestFuzzySearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: fuzzySearcherbeet, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 1.0, }, { - ID: "2", + ID: upside_down.InternalId("2"), Score: 0.5, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.5, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 0.9999999838027345, }, }, }, { searcher: fuzzySearcherdouches, - results: []*search.DocumentMatch{}, + results: []*search.DocumentMatchInternal{}, }, { searcher: fuzzySearcheraplee, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.9581453659370776, }, }, }, { searcher: fuzzySearcherprefix, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "5", + ID: upside_down.InternalId("5"), Score: 1.916290731874155, }, }, @@ -109,7 +110,7 @@ func TestFuzzySearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if next.Score != test.results[i].Score { diff --git a/search/searchers/search_match_all.go b/search/searchers/search_match_all.go index bd26ba13..77cde2e1 100644 --- a/search/searchers/search_match_all.go +++ b/search/searchers/search_match_all.go @@ -46,13 +46,13 @@ func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *MatchAllSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *MatchAllSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { id, err := s.reader.Next() if err != nil { return nil, err } - if id == "" { + if id == nil { return nil, nil } @@ -63,13 +63,13 @@ func (s *MatchAllSearcher) Next(preAllocated *search.DocumentMatch) (*search.Doc } -func (s *MatchAllSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *MatchAllSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { id, err := s.reader.Advance(ID) if err != nil { return nil, err } - if id == "" { + if id == nil { return nil, nil } diff --git a/search/searchers/search_match_all_test.go b/search/searchers/search_match_all_test.go index 5bcd1b51..32b85514 100644 --- a/search/searchers/search_match_all_test.go +++ b/search/searchers/search_match_all_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -41,30 +42,30 @@ func TestMatchAllSearch(t *testing.T) { tests := []struct { searcher search.Searcher queryNorm float64 - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: allSearcher, queryNorm: 1.0, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 1.0, }, { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.0, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 1.0, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 1.0, }, { - ID: "5", + ID: upside_down.InternalId("5"), Score: 1.0, }, }, @@ -72,25 +73,25 @@ func TestMatchAllSearch(t *testing.T) { { searcher: allSearcher2, queryNorm: 0.8333333, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 1.0, }, { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.0, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 1.0, }, { - ID: "4", + ID: upside_down.InternalId("4"), Score: 1.0, }, { - ID: "5", + ID: upside_down.InternalId("5"), Score: 1.0, }, }, @@ -113,7 +114,7 @@ func TestMatchAllSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { diff --git a/search/searchers/search_match_none.go b/search/searchers/search_match_none.go index 0d4f5a9a..08bae499 100644 --- a/search/searchers/search_match_none.go +++ b/search/searchers/search_match_none.go @@ -36,11 +36,11 @@ func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) { } -func (s *MatchNoneSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *MatchNoneSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return nil, nil } -func (s *MatchNoneSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *MatchNoneSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return nil, nil } diff --git a/search/searchers/search_match_none_test.go b/search/searchers/search_match_none_test.go index 0d470358..2752f998 100644 --- a/search/searchers/search_match_none_test.go +++ b/search/searchers/search_match_none_test.go @@ -35,11 +35,11 @@ func TestMatchNoneSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: noneSearcher, - results: []*search.DocumentMatch{}, + results: []*search.DocumentMatchInternal{}, }, } diff --git a/search/searchers/search_numeric_range.go b/search/searchers/search_numeric_range.go index fdf3c4c0..067ad9f6 100644 --- a/search/searchers/search_numeric_range.go +++ b/search/searchers/search_numeric_range.go @@ -96,11 +96,11 @@ func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *NumericRangeSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *NumericRangeSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Next(preAllocated) } -func (s *NumericRangeSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *NumericRangeSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Advance(ID, preAllocated) } diff --git a/search/searchers/search_phrase.go b/search/searchers/search_phrase.go index 11e8c9c8..b7c63ef1 100644 --- a/search/searchers/search_phrase.go +++ b/search/searchers/search_phrase.go @@ -21,7 +21,7 @@ type PhraseSearcher struct { indexReader index.IndexReader mustSearcher *ConjunctionSearcher queryNorm float64 - currMust *search.DocumentMatch + currMust *search.DocumentMatchInternal slop int terms []string } @@ -90,7 +90,7 @@ func (s *PhraseSearcher) SetQueryNorm(qnorm float64) { s.mustSearcher.SetQueryNorm(qnorm) } -func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -98,7 +98,7 @@ func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docum } } - var rv *search.DocumentMatch + var rv *search.DocumentMatchInternal for s.currMust != nil { rvftlm := make(search.FieldTermLocationMap, 0) freq := 0 @@ -160,7 +160,7 @@ func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docum return nil, nil } -func (s *PhraseSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *PhraseSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { if !s.initialized { err := s.initSearchers() if err != nil { diff --git a/search/searchers/search_phrase_test.go b/search/searchers/search_phrase_test.go index a3c14b70..e9fcdb55 100644 --- a/search/searchers/search_phrase_test.go +++ b/search/searchers/search_phrase_test.go @@ -12,6 +12,7 @@ package searchers import ( "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -47,13 +48,13 @@ func TestPhraseSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: phraseSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 1.0807601687084403, }, }, @@ -72,7 +73,7 @@ func TestPhraseSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if next.Score != test.results[i].Score { diff --git a/search/searchers/search_regexp.go b/search/searchers/search_regexp.go index d92822f1..6173f07a 100644 --- a/search/searchers/search_regexp.go +++ b/search/searchers/search_regexp.go @@ -106,12 +106,12 @@ func (s *RegexpSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *RegexpSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *RegexpSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Next(preAllocated) } -func (s *RegexpSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *RegexpSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Advance(ID, preAllocated) } diff --git a/search/searchers/search_regexp_test.go b/search/searchers/search_regexp_test.go index cb4d01ee..0552866f 100644 --- a/search/searchers/search_regexp_test.go +++ b/search/searchers/search_regexp_test.go @@ -13,6 +13,7 @@ import ( "regexp" "testing" + "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/search" ) @@ -51,26 +52,26 @@ func TestRegexpSearch(t *testing.T) { tests := []struct { searcher search.Searcher - results []*search.DocumentMatch + results []*search.DocumentMatchInternal }{ { searcher: regexpSearcher, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "1", + ID: upside_down.InternalId("1"), Score: 1.916290731874155, }, }, }, { searcher: regexpSearcherCo, - results: []*search.DocumentMatch{ + results: []*search.DocumentMatchInternal{ { - ID: "2", + ID: upside_down.InternalId("2"), Score: 0.33875554280828685, }, { - ID: "3", + ID: upside_down.InternalId("3"), Score: 0.33875554280828685, }, }, @@ -89,7 +90,7 @@ func TestRegexpSearch(t *testing.T) { i := 0 for err == nil && next != nil { if i < len(test.results) { - if next.ID != test.results[i].ID { + if !next.ID.Equals(test.results[i].ID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].ID, next.ID, testIndex) } if next.Score != test.results[i].Score { diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index 94c3eba9..aa95bc38 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -53,7 +53,7 @@ func (s *TermSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *TermSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *TermSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { termMatch, err := s.reader.Next(s.tfd.Reset()) if err != nil { return nil, err @@ -70,7 +70,7 @@ func (s *TermSearcher) Next(preAllocated *search.DocumentMatch) (*search.Documen } -func (s *TermSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *TermSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { termMatch, err := s.reader.Advance(ID, s.tfd.Reset()) if err != nil { return nil, err diff --git a/search/searchers/search_term_prefix.go b/search/searchers/search_term_prefix.go index 35f34722..bbd8a78c 100644 --- a/search/searchers/search_term_prefix.go +++ b/search/searchers/search_term_prefix.go @@ -70,12 +70,12 @@ func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *TermPrefixSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *TermPrefixSearcher) Next(preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Next(preAllocated) } -func (s *TermPrefixSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { +func (s *TermPrefixSearcher) Advance(ID index.IndexInternalID, preAllocated *search.DocumentMatchInternal) (*search.DocumentMatchInternal, error) { return s.searcher.Advance(ID, preAllocated) } diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index ef3b927f..83381e43 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -167,19 +167,19 @@ func TestTermSearcher(t *testing.T) { if err != nil { t.Errorf("expected result, got %v", err) } - if docMatch.ID != "a" { + if !docMatch.ID.Equals(upside_down.InternalId("a")) { t.Errorf("expected result ID to be 'a', got '%s", docMatch.ID) } - docMatch, err = searcher.Advance("c", nil) + docMatch, err = searcher.Advance(upside_down.InternalId("c"), nil) if err != nil { t.Errorf("expected result, got %v", err) } - if docMatch.ID != "c" { + if !docMatch.ID.Equals(upside_down.InternalId("c")) { t.Errorf("expected result ID to be 'c' got '%s'", docMatch.ID) } // try advancing past end - docMatch, err = searcher.Advance("z", nil) + docMatch, err = searcher.Advance(upside_down.InternalId("z"), nil) if err != nil { t.Fatal(err) }