2017-12-05 19:05:12 +01:00
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package bolt
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
|
|
|
|
"github.com/RoaringBitmap/roaring"
|
|
|
|
"github.com/Smerity/govarint"
|
|
|
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
|
|
|
"github.com/boltdb/bolt"
|
|
|
|
"github.com/couchbaselabs/vellum"
|
|
|
|
"github.com/golang/snappy"
|
|
|
|
)
|
|
|
|
|
|
|
|
var readOnlyOptions = &bolt.Options{
|
|
|
|
ReadOnly: true,
|
|
|
|
}
|
|
|
|
|
|
|
|
// _id field is always guaranteed to have fieldID of 0
|
|
|
|
const idFieldID uint16 = 0
|
|
|
|
|
|
|
|
// Open returns a boltdb impl of a segment
|
|
|
|
func Open(path string) (segment.Segment, error) {
|
|
|
|
|
|
|
|
db, err := bolt.Open(path, 0600, readOnlyOptions)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
tx, err := db.Begin(false)
|
|
|
|
if err != nil {
|
|
|
|
_ = db.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
rv := &Segment{
|
|
|
|
db: db,
|
|
|
|
tx: tx,
|
|
|
|
fieldsMap: make(map[string]uint16),
|
|
|
|
}
|
|
|
|
|
|
|
|
err = rv.loadConfig()
|
|
|
|
if err != nil {
|
|
|
|
_ = db.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = rv.loadFields()
|
|
|
|
if err != nil {
|
|
|
|
_ = db.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return rv, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Segment implements a boltdb based implementation of a segment
|
|
|
|
type Segment struct {
|
|
|
|
version uint8
|
|
|
|
chunkFactor uint32
|
|
|
|
db *bolt.DB
|
|
|
|
tx *bolt.Tx
|
|
|
|
|
|
|
|
fieldsMap map[string]uint16
|
|
|
|
fieldsInv []string
|
|
|
|
fieldsLoc []bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Segment) loadConfig() (err error) {
|
|
|
|
bucket := s.tx.Bucket(configBucket)
|
|
|
|
if bucket == nil {
|
|
|
|
return fmt.Errorf("config bucket missing")
|
|
|
|
}
|
|
|
|
|
|
|
|
ver := bucket.Get(versionKey)
|
|
|
|
if ver == nil {
|
|
|
|
return fmt.Errorf("version key missing")
|
|
|
|
}
|
|
|
|
s.version = ver[0]
|
|
|
|
|
|
|
|
chunk := bucket.Get(chunkKey)
|
|
|
|
if chunk == nil {
|
|
|
|
return fmt.Errorf("chunk key is missing")
|
|
|
|
}
|
|
|
|
s.chunkFactor = binary.BigEndian.Uint32(chunk)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// loadFields reads the fields info from the segment so that we never have to go
|
|
|
|
// back to disk to access this (small and used frequently)
|
|
|
|
func (s *Segment) loadFields() (err error) {
|
|
|
|
|
|
|
|
bucket := s.tx.Bucket(fieldsBucket)
|
|
|
|
if bucket == nil {
|
|
|
|
return fmt.Errorf("fields bucket missing")
|
|
|
|
}
|
|
|
|
|
|
|
|
indexLocs := roaring.NewBitmap()
|
|
|
|
err = bucket.ForEach(func(k []byte, v []byte) error {
|
|
|
|
|
|
|
|
// process index locations bitset
|
|
|
|
if k[0] == indexLocsKey[0] {
|
|
|
|
_, err2 := indexLocs.FromBuffer(v)
|
|
|
|
if err2 != nil {
|
|
|
|
return fmt.Errorf("error loading indexLocs: %v", err2)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
|
2017-12-07 00:33:47 +01:00
|
|
|
_, fieldID, err2 := segment.DecodeUvarintAscending(k)
|
2017-12-05 19:05:12 +01:00
|
|
|
if err2 != nil {
|
|
|
|
return err2
|
|
|
|
}
|
|
|
|
// we store fieldID+1 in so we can discern the zero value
|
|
|
|
s.fieldsMap[string(v)] = uint16(fieldID + 1)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// now setup the inverse (should have same size as map and be keyed 0-(len-1))
|
|
|
|
s.fieldsInv = make([]string, len(s.fieldsMap))
|
|
|
|
for k, v := range s.fieldsMap {
|
|
|
|
s.fieldsInv[int(v)-1] = k
|
|
|
|
}
|
|
|
|
s.fieldsLoc = make([]bool, len(s.fieldsInv))
|
|
|
|
for i := range s.fieldsInv {
|
|
|
|
if indexLocs.ContainsInt(i) {
|
|
|
|
s.fieldsLoc[i] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fields returns the field names used in this segment
|
|
|
|
func (s *Segment) Fields() []string {
|
|
|
|
return s.fieldsInv
|
|
|
|
}
|
|
|
|
|
|
|
|
// Count returns the number of documents in this segment
|
|
|
|
// (this has no notion of deleted docs)
|
|
|
|
func (s *Segment) Count() uint64 {
|
|
|
|
return uint64(s.tx.Bucket(storedBucket).Stats().BucketN - 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dictionary returns the term dictionary for the specified field
|
|
|
|
func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
|
2017-12-07 00:33:47 +01:00
|
|
|
dict, err := s.dictionary(field)
|
|
|
|
if err == nil && dict == nil {
|
|
|
|
return &segment.EmptyDictionary{}, nil
|
|
|
|
}
|
|
|
|
return dict, err
|
2017-12-05 19:05:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Segment) dictionary(field string) (*Dictionary, error) {
|
|
|
|
|
|
|
|
rv := &Dictionary{
|
|
|
|
segment: s,
|
|
|
|
field: field,
|
|
|
|
}
|
|
|
|
|
|
|
|
rv.fieldID = s.fieldsMap[field]
|
|
|
|
if rv.fieldID > 0 {
|
|
|
|
rv.fieldID = rv.fieldID - 1
|
2017-12-07 00:33:47 +01:00
|
|
|
fieldIDKey := segment.EncodeUvarintAscending(nil, uint64(rv.fieldID))
|
2017-12-05 19:05:12 +01:00
|
|
|
bucket := s.tx.Bucket(dictBucket)
|
|
|
|
if bucket == nil {
|
|
|
|
return nil, fmt.Errorf("dictionary bucket missing")
|
|
|
|
}
|
|
|
|
fstBytes := bucket.Get(fieldIDKey)
|
|
|
|
if fstBytes == nil {
|
|
|
|
return nil, fmt.Errorf("dictionary field %s bytes nil", field)
|
|
|
|
}
|
|
|
|
if fstBytes != nil {
|
|
|
|
fst, err := vellum.Load(fstBytes)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
|
|
|
|
}
|
|
|
|
if err == nil {
|
|
|
|
rv.fst = fst
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-07 00:33:47 +01:00
|
|
|
} else {
|
|
|
|
return nil, nil
|
2017-12-05 19:05:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return rv, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// VisitDocument invokes the DocFieldValueVistor for each stored field
|
|
|
|
// for the specified doc number
|
|
|
|
func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
|
|
|
|
storedBuucket := s.tx.Bucket(storedBucket)
|
|
|
|
if storedBuucket == nil {
|
|
|
|
return fmt.Errorf("stored bucket missing")
|
|
|
|
}
|
2017-12-07 00:33:47 +01:00
|
|
|
docNumKey := segment.EncodeUvarintAscending(nil, num)
|
2017-12-05 19:05:12 +01:00
|
|
|
docBucket := storedBuucket.Bucket(docNumKey)
|
|
|
|
if docBucket == nil {
|
|
|
|
return fmt.Errorf("segment has no doc number %d", num)
|
|
|
|
}
|
|
|
|
metaBytes := docBucket.Get(metaKey)
|
|
|
|
if metaBytes == nil {
|
|
|
|
return fmt.Errorf("stored meta bytes for doc number %d is nil", num)
|
|
|
|
}
|
|
|
|
dataBytes := docBucket.Get(dataKey)
|
|
|
|
if dataBytes == nil {
|
|
|
|
return fmt.Errorf("stored data bytes for doc number %d is nil", num)
|
|
|
|
}
|
|
|
|
uncompressed, err := snappy.Decode(nil, dataBytes)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
reader := bytes.NewReader(metaBytes)
|
|
|
|
decoder := govarint.NewU64Base128Decoder(reader)
|
|
|
|
|
|
|
|
keepGoing := true
|
|
|
|
for keepGoing {
|
|
|
|
field, err := decoder.GetU64()
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
typ, err := decoder.GetU64()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
offset, err := decoder.GetU64()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
l, err := decoder.GetU64()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
numap, err := decoder.GetU64()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
var arrayPos []uint64
|
|
|
|
if numap > 0 {
|
|
|
|
arrayPos = make([]uint64, numap)
|
|
|
|
for i := 0; i < int(numap); i++ {
|
|
|
|
ap, err := decoder.GetU64()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
arrayPos[i] = ap
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
value := uncompressed[offset : offset+l]
|
|
|
|
keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// DocNumbers returns a bitset corresponding to the doc numbers of all the
|
|
|
|
// provided _id strings
|
|
|
|
func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
|
|
|
|
rv := roaring.New()
|
|
|
|
|
|
|
|
if len(s.fieldsMap) > 0 {
|
|
|
|
idDict, err := s.dictionary("_id")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, id := range ids {
|
|
|
|
postings, err := idDict.postingsList(id, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if postings.postings != nil {
|
|
|
|
rv.Or(postings.postings)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rv, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close releases all resources associated with this segment
|
|
|
|
func (s *Segment) Close() error {
|
|
|
|
err := s.tx.Rollback()
|
|
|
|
if err != nil {
|
|
|
|
_ = s.db.Close()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return s.db.Close()
|
|
|
|
}
|
2017-12-07 00:33:47 +01:00
|
|
|
|
|
|
|
func (s *Segment) Path() string {
|
|
|
|
return s.db.Path()
|
|
|
|
}
|