166 lines
4.1 KiB
Go
166 lines
4.1 KiB
Go
|
// Copyright (c) 2017 Couchbase, Inc.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
package zap
|
||
|
|
||
|
import (
|
||
|
"encoding/binary"
|
||
|
"fmt"
|
||
|
|
||
|
"github.com/RoaringBitmap/roaring"
|
||
|
"github.com/blevesearch/bleve/index"
|
||
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||
|
"github.com/couchbaselabs/vellum"
|
||
|
"github.com/couchbaselabs/vellum/regexp"
|
||
|
)
|
||
|
|
||
|
// Dictionary is the zap representation of the term dictionary
|
||
|
type Dictionary struct {
|
||
|
segment *Segment
|
||
|
field string
|
||
|
fieldID uint16
|
||
|
fst *vellum.FST
|
||
|
}
|
||
|
|
||
|
// PostingsList returns the postings list for the specified term
|
||
|
func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
|
||
|
return d.postingsList(term, except)
|
||
|
}
|
||
|
|
||
|
func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
|
||
|
rv := &PostingsList{
|
||
|
dictionary: d,
|
||
|
term: term,
|
||
|
except: except,
|
||
|
}
|
||
|
|
||
|
if d.fst != nil {
|
||
|
postingsOffset, exists, err := d.fst.Get([]byte(term))
|
||
|
if err != nil {
|
||
|
return nil, fmt.Errorf("vellum err: %v", err)
|
||
|
}
|
||
|
if exists {
|
||
|
rv.postingsOffset = postingsOffset
|
||
|
// read the location of the freq/norm details
|
||
|
var n uint64
|
||
|
var read int
|
||
|
|
||
|
rv.freqOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
|
||
|
n += uint64(read)
|
||
|
rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||
|
n += uint64(read)
|
||
|
var postingsLen uint64
|
||
|
postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
|
||
|
n += uint64(read)
|
||
|
|
||
|
roaringBytes := d.segment.mm[postingsOffset+n : postingsOffset+n+postingsLen]
|
||
|
|
||
|
bitmap := roaring.NewBitmap()
|
||
|
_, err = bitmap.FromBuffer(roaringBytes)
|
||
|
if err != nil {
|
||
|
return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
|
||
|
}
|
||
|
|
||
|
rv.postings = bitmap
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return rv, nil
|
||
|
}
|
||
|
|
||
|
// Iterator returns an iterator for this dictionary
|
||
|
func (d *Dictionary) Iterator() segment.DictionaryIterator {
|
||
|
|
||
|
rv := &DictionaryIterator{
|
||
|
d: d,
|
||
|
}
|
||
|
|
||
|
if d.fst != nil {
|
||
|
itr, err := d.fst.Iterator(nil, nil)
|
||
|
if err == nil {
|
||
|
rv.itr = itr
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return rv
|
||
|
}
|
||
|
|
||
|
// PrefixIterator returns an iterator which only visits terms having the
|
||
|
// the specified prefix
|
||
|
func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
|
||
|
rv := &DictionaryIterator{
|
||
|
d: d,
|
||
|
}
|
||
|
|
||
|
if d.fst != nil {
|
||
|
r, err := regexp.New(prefix + ".*")
|
||
|
if err == nil {
|
||
|
itr, err := d.fst.Search(r, nil, nil)
|
||
|
if err == nil {
|
||
|
rv.itr = itr
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return rv
|
||
|
}
|
||
|
|
||
|
// RangeIterator returns an iterator which only visits terms between the
|
||
|
// start and end terms. NOTE: bleve.index API specifies the end is inclusive.
|
||
|
func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
|
||
|
rv := &DictionaryIterator{
|
||
|
d: d,
|
||
|
}
|
||
|
|
||
|
// need to increment the end position to be inclusive
|
||
|
endBytes := []byte(end)
|
||
|
if endBytes[len(endBytes)-1] < 0xff {
|
||
|
endBytes[len(endBytes)-1]++
|
||
|
} else {
|
||
|
endBytes = append(endBytes, 0xff)
|
||
|
}
|
||
|
|
||
|
if d.fst != nil {
|
||
|
itr, err := d.fst.Iterator([]byte(start), endBytes)
|
||
|
if err == nil {
|
||
|
rv.itr = itr
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return rv
|
||
|
}
|
||
|
|
||
|
// DictionaryIterator is an iterator for term dictionary
|
||
|
type DictionaryIterator struct {
|
||
|
d *Dictionary
|
||
|
itr vellum.Iterator
|
||
|
err error
|
||
|
}
|
||
|
|
||
|
// Next returns the next entry in the dictionary
|
||
|
func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
|
||
|
if i.itr == nil || i.err == vellum.ErrIteratorDone {
|
||
|
return nil, nil
|
||
|
} else if i.err != nil {
|
||
|
return nil, i.err
|
||
|
}
|
||
|
term, count := i.itr.Current()
|
||
|
rv := &index.DictEntry{
|
||
|
Term: string(term),
|
||
|
Count: count,
|
||
|
}
|
||
|
i.err = i.itr.Next()
|
||
|
return rv, nil
|
||
|
}
|