2014-04-17 22:55:53 +02:00
|
|
|
// Copyright (c) 2014 Couchbase, Inc.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
|
|
// and limitations under the License.
|
2014-09-02 16:54:50 +02:00
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
package upside_down
|
|
|
|
|
|
|
|
import (
|
2016-07-31 19:46:18 +02:00
|
|
|
"bytes"
|
|
|
|
"sort"
|
2016-03-05 13:50:25 +01:00
|
|
|
"sync/atomic"
|
|
|
|
|
2014-08-28 21:38:57 +02:00
|
|
|
"github.com/blevesearch/bleve/index"
|
|
|
|
"github.com/blevesearch/bleve/index/store"
|
2014-04-17 22:55:53 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type UpsideDownCouchTermFieldReader struct {
|
2016-07-23 01:49:33 +02:00
|
|
|
count uint64
|
2015-09-23 20:25:47 +02:00
|
|
|
indexReader *IndexReader
|
|
|
|
iterator store.KVIterator
|
|
|
|
term []byte
|
2016-07-23 01:49:33 +02:00
|
|
|
tfrNext *TermFrequencyRow
|
2015-09-23 20:25:47 +02:00
|
|
|
field uint16
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2016-07-30 16:26:42 +02:00
|
|
|
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
2015-03-10 21:22:19 +01:00
|
|
|
dictionaryRow := NewDictionaryRow(term, field, 0)
|
|
|
|
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if val == nil {
|
2016-03-05 13:50:25 +01:00
|
|
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
2015-03-10 21:22:19 +01:00
|
|
|
return &UpsideDownCouchTermFieldReader{
|
2016-07-23 01:49:33 +02:00
|
|
|
count: 0,
|
|
|
|
term: term,
|
|
|
|
tfrNext: &TermFrequencyRow{},
|
|
|
|
field: field,
|
2015-03-10 21:22:19 +01:00
|
|
|
}, nil
|
|
|
|
}
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2015-03-10 21:22:19 +01:00
|
|
|
err = dictionaryRow.parseDictionaryV(val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2016-01-07 08:38:02 +01:00
|
|
|
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
|
2015-09-23 20:25:47 +02:00
|
|
|
it := indexReader.kvreader.PrefixIterator(tfr.Key())
|
2015-03-10 21:22:19 +01:00
|
|
|
|
2016-03-05 13:50:25 +01:00
|
|
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
2014-04-17 22:55:53 +02:00
|
|
|
return &UpsideDownCouchTermFieldReader{
|
2015-09-23 20:25:47 +02:00
|
|
|
indexReader: indexReader,
|
|
|
|
iterator: it,
|
|
|
|
count: dictionaryRow.count,
|
|
|
|
term: term,
|
2016-07-23 01:49:33 +02:00
|
|
|
tfrNext: &TermFrequencyRow{},
|
2015-09-23 20:25:47 +02:00
|
|
|
field: field,
|
2014-04-17 22:55:53 +02:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
|
|
|
|
return r.count
|
|
|
|
}
|
|
|
|
|
2016-07-21 01:53:30 +02:00
|
|
|
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
2015-03-10 21:22:19 +01:00
|
|
|
if r.iterator != nil {
|
|
|
|
key, val, valid := r.iterator.Current()
|
|
|
|
if valid {
|
2016-07-23 01:49:33 +02:00
|
|
|
tfr := r.tfrNext
|
|
|
|
err := tfr.parseKDoc(key)
|
2016-07-20 20:49:51 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
err = tfr.parseV(val)
|
2015-03-10 21:22:19 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-07-21 01:53:30 +02:00
|
|
|
rv := preAlloced
|
|
|
|
if rv == nil {
|
|
|
|
rv = &index.TermFieldDoc{}
|
2015-10-28 17:23:54 +01:00
|
|
|
}
|
2016-08-03 19:45:48 +02:00
|
|
|
rv.ID = append(rv.ID, tfr.doc...)
|
2016-07-21 01:53:30 +02:00
|
|
|
rv.Freq = tfr.freq
|
|
|
|
rv.Norm = float64(tfr.norm)
|
2016-07-23 01:49:33 +02:00
|
|
|
if tfr.vectors != nil {
|
|
|
|
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
|
|
|
}
|
2015-10-28 17:23:54 +01:00
|
|
|
r.iterator.Next()
|
2016-07-21 01:53:30 +02:00
|
|
|
return rv, nil
|
2014-04-19 03:07:41 +02:00
|
|
|
}
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-09-04 00:47:02 +02:00
|
|
|
return nil, nil
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-01 20:26:50 +02:00
|
|
|
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
2015-03-10 21:22:19 +01:00
|
|
|
if r.iterator != nil {
|
2016-07-31 19:46:18 +02:00
|
|
|
tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0)
|
2015-03-10 21:22:19 +01:00
|
|
|
r.iterator.Seek(tfr.Key())
|
|
|
|
key, val, valid := r.iterator.Current()
|
|
|
|
if valid {
|
|
|
|
tfr, err := NewTermFrequencyRowKV(key, val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-07-29 23:14:58 +02:00
|
|
|
rv := preAlloced
|
|
|
|
if rv == nil {
|
|
|
|
rv = &index.TermFieldDoc{}
|
|
|
|
}
|
2016-08-03 19:45:48 +02:00
|
|
|
rv.ID = append(rv.ID, tfr.doc...)
|
2016-07-29 23:14:58 +02:00
|
|
|
rv.Freq = tfr.freq
|
|
|
|
rv.Norm = float64(tfr.norm)
|
|
|
|
if tfr.vectors != nil {
|
|
|
|
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
2015-10-28 17:23:54 +01:00
|
|
|
}
|
|
|
|
r.iterator.Next()
|
2016-07-29 23:14:58 +02:00
|
|
|
return rv, nil
|
2014-04-19 03:07:41 +02:00
|
|
|
}
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-09-04 00:47:02 +02:00
|
|
|
return nil, nil
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2015-03-06 20:46:29 +01:00
|
|
|
func (r *UpsideDownCouchTermFieldReader) Close() error {
|
2015-03-10 21:22:19 +01:00
|
|
|
if r.iterator != nil {
|
|
|
|
return r.iterator.Close()
|
|
|
|
}
|
|
|
|
return nil
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-07-11 20:24:28 +02:00
|
|
|
|
2014-09-04 01:53:59 +02:00
|
|
|
type UpsideDownCouchDocIDReader struct {
|
2014-09-12 23:21:35 +02:00
|
|
|
indexReader *IndexReader
|
|
|
|
iterator store.KVIterator
|
2016-07-31 19:46:18 +02:00
|
|
|
only []string
|
|
|
|
onlyPos int
|
|
|
|
onlyMode bool
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
|
|
|
|
2014-09-12 23:21:35 +02:00
|
|
|
func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) {
|
2016-01-07 08:38:02 +01:00
|
|
|
startBytes := []byte(start)
|
2014-07-11 20:24:28 +02:00
|
|
|
if start == "" {
|
2016-01-07 08:38:02 +01:00
|
|
|
startBytes = []byte{0x0}
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
2016-01-07 08:38:02 +01:00
|
|
|
endBytes := []byte(end)
|
2014-07-11 20:24:28 +02:00
|
|
|
if end == "" {
|
2016-01-07 08:38:02 +01:00
|
|
|
endBytes = []byte{0xff}
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
2016-01-07 08:38:02 +01:00
|
|
|
bisr := NewBackIndexRow(startBytes, nil, nil)
|
|
|
|
bier := NewBackIndexRow(endBytes, nil, nil)
|
2015-09-23 20:25:47 +02:00
|
|
|
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
2014-07-11 20:24:28 +02:00
|
|
|
|
2014-09-04 01:53:59 +02:00
|
|
|
return &UpsideDownCouchDocIDReader{
|
2014-09-12 23:21:35 +02:00
|
|
|
indexReader: indexReader,
|
|
|
|
iterator: it,
|
2014-07-11 20:24:28 +02:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2016-07-31 19:46:18 +02:00
|
|
|
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
|
|
|
|
// ensure ids are sorted
|
|
|
|
sort.Strings(ids)
|
|
|
|
startBytes := []byte{0x0}
|
|
|
|
if len(ids) > 0 {
|
|
|
|
startBytes = []byte(ids[0])
|
|
|
|
}
|
|
|
|
endBytes := []byte{0xff}
|
|
|
|
if len(ids) > 0 {
|
|
|
|
endBytes = incrementBytes([]byte(ids[len(ids)-1]))
|
|
|
|
}
|
|
|
|
bisr := NewBackIndexRow(startBytes, nil, nil)
|
|
|
|
bier := NewBackIndexRow(endBytes, nil, nil)
|
|
|
|
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
|
|
|
|
|
|
|
return &UpsideDownCouchDocIDReader{
|
|
|
|
indexReader: indexReader,
|
|
|
|
iterator: it,
|
|
|
|
only: ids,
|
|
|
|
onlyMode: true,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
|
2014-07-11 20:24:28 +02:00
|
|
|
key, val, valid := r.iterator.Current()
|
2016-07-31 19:46:18 +02:00
|
|
|
|
|
|
|
if r.onlyMode {
|
2016-08-01 20:26:50 +02:00
|
|
|
var rv index.IndexInternalID
|
2016-07-31 19:46:18 +02:00
|
|
|
for valid && r.onlyPos < len(r.only) {
|
|
|
|
br, err := NewBackIndexRowKV(key, val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
|
|
|
ok := r.nextOnly()
|
|
|
|
if !ok {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
|
|
|
key, val, valid = r.iterator.Current()
|
|
|
|
continue
|
|
|
|
} else {
|
2016-08-01 23:01:04 +02:00
|
|
|
rv = append([]byte(nil), br.doc...)
|
2016-07-31 19:46:18 +02:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if valid && r.onlyPos < len(r.only) {
|
|
|
|
ok := r.nextOnly()
|
|
|
|
if ok {
|
|
|
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
|
|
|
}
|
|
|
|
return rv, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if valid {
|
|
|
|
br, err := NewBackIndexRowKV(key, val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-08-01 23:01:04 +02:00
|
|
|
rv := append([]byte(nil), br.doc...)
|
2016-07-31 19:46:18 +02:00
|
|
|
r.iterator.Next()
|
|
|
|
return rv, nil
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
|
|
|
}
|
2016-07-31 19:46:18 +02:00
|
|
|
return nil, nil
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
|
|
|
|
2016-07-31 19:46:18 +02:00
|
|
|
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
|
2016-08-01 20:26:50 +02:00
|
|
|
bir := NewBackIndexRow(docID, nil, nil)
|
2014-07-11 20:24:28 +02:00
|
|
|
r.iterator.Seek(bir.Key())
|
|
|
|
key, val, valid := r.iterator.Current()
|
2016-08-01 20:26:50 +02:00
|
|
|
r.onlyPos = sort.SearchStrings(r.only, string(docID))
|
2016-07-31 19:46:18 +02:00
|
|
|
|
|
|
|
if r.onlyMode {
|
2016-08-01 20:26:50 +02:00
|
|
|
var rv index.IndexInternalID
|
2016-07-31 19:46:18 +02:00
|
|
|
for valid && r.onlyPos < len(r.only) {
|
|
|
|
br, err := NewBackIndexRowKV(key, val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
|
|
|
ok := r.nextOnly()
|
|
|
|
if !ok {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
|
|
|
continue
|
|
|
|
} else {
|
2016-08-01 23:01:04 +02:00
|
|
|
rv = append([]byte(nil), br.doc...)
|
2016-07-31 19:46:18 +02:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if valid && r.onlyPos < len(r.only) {
|
|
|
|
ok := r.nextOnly()
|
|
|
|
if ok {
|
|
|
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
|
|
|
}
|
|
|
|
return rv, nil
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if valid {
|
|
|
|
br, err := NewBackIndexRowKV(key, val)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-08-01 23:01:04 +02:00
|
|
|
rv := append([]byte(nil), br.doc...)
|
2016-07-31 19:46:18 +02:00
|
|
|
r.iterator.Next()
|
|
|
|
return rv, nil
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
|
|
|
}
|
2016-07-31 19:46:18 +02:00
|
|
|
return nil, nil
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
|
|
|
|
2015-03-06 20:46:29 +01:00
|
|
|
func (r *UpsideDownCouchDocIDReader) Close() error {
|
2016-03-05 13:50:25 +01:00
|
|
|
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
|
2015-03-06 20:46:29 +01:00
|
|
|
return r.iterator.Close()
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
2016-07-31 19:46:18 +02:00
|
|
|
|
|
|
|
// move the r.only pos forward one, skipping duplicates
|
|
|
|
// return true if there is more data, or false if we got to the end of the list
|
|
|
|
func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
|
|
|
|
|
|
|
|
// advance 1 position, until we see a different key
|
|
|
|
// it's already sorted, so this skips duplicates
|
|
|
|
start := r.onlyPos
|
|
|
|
r.onlyPos++
|
|
|
|
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
|
|
|
|
start = r.onlyPos
|
|
|
|
r.onlyPos++
|
|
|
|
}
|
|
|
|
// inidicate if we got to the end of the list
|
|
|
|
return r.onlyPos < len(r.only)
|
|
|
|
}
|