2017-12-01 21:42:50 +01:00
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
package mem
|
|
|
|
|
|
|
|
import (
|
2017-12-07 00:33:47 +01:00
|
|
|
"fmt"
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
"github.com/RoaringBitmap/roaring"
|
|
|
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
|
|
|
)
|
|
|
|
|
2017-12-01 15:30:07 +01:00
|
|
|
// _id field is always guaranteed to have fieldID of 0
|
|
|
|
const idFieldID uint16 = 0
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
// KNOWN ISSUES
|
|
|
|
// - LIMITATION - we decided whether or not to store term vectors for a field
|
|
|
|
// at the segment level, based on the first definition of a
|
|
|
|
// field we see. in normal bleve usage this is fine, all
|
|
|
|
// instances of a field definition will be the same. however,
|
|
|
|
// advanced users may violate this and provide unique field
|
|
|
|
// definitions with each document. this segment does not
|
|
|
|
// support this usage.
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
// - need better testing of multiple docs, iterating freqs, locations and
|
|
|
|
// and verifying the correct results are returned
|
|
|
|
|
|
|
|
// Segment is an in memory implementation of scorch.Segment
|
|
|
|
type Segment struct {
|
|
|
|
|
2017-12-01 13:26:47 +01:00
|
|
|
// FieldsMap name -> id+1
|
|
|
|
FieldsMap map[string]uint16
|
2017-09-29 18:42:37 +02:00
|
|
|
// fields id -> name
|
2017-12-01 13:26:47 +01:00
|
|
|
FieldsInv []string
|
2017-09-29 18:42:37 +02:00
|
|
|
|
|
|
|
// term dictionary
|
|
|
|
// field id -> term -> posting id + 1
|
2017-12-01 13:26:47 +01:00
|
|
|
Dicts []map[string]uint64
|
2017-09-29 18:42:37 +02:00
|
|
|
|
|
|
|
// term dictionary keys
|
|
|
|
// field id -> []dictionary keys
|
2017-12-01 13:26:47 +01:00
|
|
|
DictKeys [][]string
|
2017-09-29 18:42:37 +02:00
|
|
|
|
2017-12-01 13:26:47 +01:00
|
|
|
// Postings list
|
|
|
|
// Postings list id -> Postings bitmap
|
|
|
|
Postings []*roaring.Bitmap
|
2017-09-29 18:42:37 +02:00
|
|
|
|
2017-12-11 21:47:41 +01:00
|
|
|
// Postings List has locations
|
|
|
|
PostingsLocs []*roaring.Bitmap
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
// term frequencies
|
2017-12-01 13:26:47 +01:00
|
|
|
// postings list id -> Freqs (one for each hit in bitmap)
|
|
|
|
Freqs [][]uint64
|
2017-09-29 18:42:37 +02:00
|
|
|
|
2017-12-01 13:26:47 +01:00
|
|
|
// field Norms
|
|
|
|
// postings list id -> Norms (one for each hit in bitmap)
|
|
|
|
Norms [][]float32
|
2017-09-29 18:42:37 +02:00
|
|
|
|
|
|
|
// field/start/end/pos/locarraypos
|
|
|
|
// postings list id -> start/end/pos/locarraypos (one for each freq)
|
2017-12-01 13:26:47 +01:00
|
|
|
Locfields [][]uint16
|
|
|
|
Locstarts [][]uint64
|
|
|
|
Locends [][]uint64
|
|
|
|
Locpos [][]uint64
|
|
|
|
Locarraypos [][][]uint64
|
2017-09-29 18:42:37 +02:00
|
|
|
|
2017-12-01 13:26:47 +01:00
|
|
|
// Stored field values
|
2017-09-29 18:42:37 +02:00
|
|
|
// docNum -> field id -> slice of values (each value []byte)
|
2017-12-01 13:26:47 +01:00
|
|
|
Stored []map[uint16][][]byte
|
2017-09-29 18:42:37 +02:00
|
|
|
|
|
|
|
// stored field types
|
|
|
|
// docNum -> field id -> slice of types (each type byte)
|
2017-12-01 13:26:47 +01:00
|
|
|
StoredTypes []map[uint16][]byte
|
2017-09-29 18:42:37 +02:00
|
|
|
|
|
|
|
// stored field array positions
|
|
|
|
// docNum -> field id -> slice of array positions (each is []uint64)
|
2017-12-01 13:26:47 +01:00
|
|
|
StoredPos []map[uint16][][]uint64
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// New builds a new empty Segment
|
|
|
|
func New() *Segment {
|
|
|
|
return &Segment{
|
2017-12-01 13:26:47 +01:00
|
|
|
FieldsMap: map[string]uint16{},
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-13 22:10:44 +01:00
|
|
|
func (s *Segment) AddRef() {
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Segment) DecRef() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
// Fields returns the field names used in this segment
|
|
|
|
func (s *Segment) Fields() []string {
|
2017-12-01 13:26:47 +01:00
|
|
|
return s.FieldsInv
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// VisitDocument invokes the DocFieldValueVistor for each stored field
|
|
|
|
// for the specified doc number
|
|
|
|
func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
|
|
|
|
// ensure document number exists
|
2017-12-01 13:26:47 +01:00
|
|
|
if int(num) > len(s.Stored)-1 {
|
2017-09-29 18:42:37 +02:00
|
|
|
return nil
|
|
|
|
}
|
2017-12-01 13:26:47 +01:00
|
|
|
docFields := s.Stored[int(num)]
|
2017-09-29 18:42:37 +02:00
|
|
|
for field, values := range docFields {
|
|
|
|
for i, value := range values {
|
2017-12-01 13:26:47 +01:00
|
|
|
keepGoing := visitor(s.FieldsInv[field], s.StoredTypes[int(num)][field][i], value, s.StoredPos[int(num)][field][i])
|
2017-09-29 18:42:37 +02:00
|
|
|
if !keepGoing {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-12-07 00:33:47 +01:00
|
|
|
func (s *Segment) getField(name string) (int, error) {
|
|
|
|
fieldID, ok := s.FieldsMap[name]
|
|
|
|
if !ok {
|
|
|
|
return 0, fmt.Errorf("no field named %s", name)
|
|
|
|
}
|
|
|
|
return int(fieldID - 1), nil
|
|
|
|
}
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
// Dictionary returns the term dictionary for the specified field
|
2017-12-05 00:06:06 +01:00
|
|
|
func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
|
2017-12-07 00:33:47 +01:00
|
|
|
fieldID, err := s.getField(field)
|
|
|
|
if err != nil {
|
|
|
|
// no such field, return empty dictionary
|
|
|
|
return &segment.EmptyDictionary{}, nil
|
|
|
|
}
|
2017-09-29 18:42:37 +02:00
|
|
|
return &Dictionary{
|
|
|
|
segment: s,
|
|
|
|
field: field,
|
2017-12-07 00:33:47 +01:00
|
|
|
fieldID: uint16(fieldID),
|
2017-12-05 00:06:06 +01:00
|
|
|
}, nil
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Count returns the number of documents in this segment
|
|
|
|
// (this has no notion of deleted docs)
|
|
|
|
func (s *Segment) Count() uint64 {
|
2017-12-01 13:26:47 +01:00
|
|
|
return uint64(len(s.Stored))
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// DocNumbers returns a bitset corresponding to the doc numbers of all the
|
|
|
|
// provided _id strings
|
2017-12-05 00:06:06 +01:00
|
|
|
func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
|
2017-09-29 18:42:37 +02:00
|
|
|
rv := roaring.New()
|
2017-12-01 15:50:27 +01:00
|
|
|
|
|
|
|
// guard against empty segment
|
|
|
|
if len(s.FieldsMap) > 0 {
|
|
|
|
idDictionary := s.Dicts[idFieldID]
|
|
|
|
|
|
|
|
for _, id := range ids {
|
|
|
|
postingID := idDictionary[id]
|
|
|
|
if postingID > 0 {
|
|
|
|
rv.Or(s.Postings[postingID-1])
|
|
|
|
}
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
}
|
2017-12-05 00:06:06 +01:00
|
|
|
return rv, nil
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
2017-12-05 15:31:02 +01:00
|
|
|
|
|
|
|
// Close releases all resources associated with this segment
|
|
|
|
func (s *Segment) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|