parent
b16c1d7f79
commit
292af78b9e
|
@ -22,6 +22,8 @@ type Index interface {
|
|||
TermFieldReader(term []byte, field string) (TermFieldReader, error)
|
||||
DocIdReader(start, end string) (DocIdReader, error)
|
||||
|
||||
FieldReader(field string, startTerm []byte, endTerm []byte) (FieldReader, error)
|
||||
|
||||
DocCount() uint64
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
|
@ -41,6 +43,7 @@ type TermFieldVector struct {
|
|||
}
|
||||
|
||||
type TermFieldDoc struct {
|
||||
Term string
|
||||
ID string
|
||||
Freq uint64
|
||||
Norm float64
|
||||
|
@ -54,6 +57,11 @@ type TermFieldReader interface {
|
|||
Close()
|
||||
}
|
||||
|
||||
type FieldReader interface {
|
||||
Next() (*TermFieldDoc, error)
|
||||
Close()
|
||||
}
|
||||
|
||||
type DocIdReader interface {
|
||||
Next() (string, error)
|
||||
Advance(ID string) (string, error)
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/index/store"
|
||||
)
|
||||
|
||||
type UpsideDownCouchFieldReader struct {
|
||||
index *UpsideDownCouch
|
||||
iterator store.KVIterator
|
||||
endKey []byte
|
||||
field uint16
|
||||
}
|
||||
|
||||
func newUpsideDownCouchFieldReader(index *UpsideDownCouch, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldReader, error) {
|
||||
|
||||
startRow := NewTermFrequencyRow(startTerm, field, "", 0, 0)
|
||||
startKey := startRow.ScanPrefixForFieldTermPrefix()
|
||||
|
||||
endKey := NewTermFrequencyRow(endTerm, field, "", 0, 0).Key()
|
||||
|
||||
it := index.store.Iterator(startKey)
|
||||
|
||||
return &UpsideDownCouchFieldReader{
|
||||
index: index,
|
||||
iterator: it,
|
||||
field: field,
|
||||
endKey: endKey,
|
||||
}, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if !valid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// past end term
|
||||
if bytes.Compare(key, r.endKey) > 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
currRow, err := NewTermFrequencyRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing term freq row kv: %v", err)
|
||||
}
|
||||
rv := index.TermFieldDoc{
|
||||
Term: string(currRow.term),
|
||||
Freq: currRow.freq,
|
||||
}
|
||||
// advance the iterator to the next term
|
||||
// by using invalid doc id (higher sorting)
|
||||
nextTerm := incrementBytes(currRow.term)
|
||||
nextRow := NewTermFrequencyRow(nextTerm, r.field, "", 0, 0)
|
||||
r.iterator.Seek(nextRow.ScanPrefixForFieldTermPrefix())
|
||||
return &rv, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldReader) Close() {
|
||||
r.iterator.Close()
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
// didnt' overflow, so stop
|
||||
break
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package upside_down
|
||||
|
||||
import (
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index/store/leveldb"
|
||||
)
|
||||
|
||||
func TestIndexFieldReader(t *testing.T) {
|
||||
defer os.RemoveAll("test")
|
||||
|
||||
store, err := leveldb.Open("test", true)
|
||||
idx := NewUpsideDownCouch(store)
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer idx.Close()
|
||||
|
||||
var expectedCount uint64 = 0
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("prefix", []byte("bob cat cats catting dog doggy zoo"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount += 1
|
||||
|
||||
reader, err := idx.FieldReader("name", nil, nil)
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
termCount := 0
|
||||
curr, err := reader.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
if curr.Term != "test" {
|
||||
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
|
||||
}
|
||||
curr, err = reader.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
|
||||
reader, err = idx.FieldReader("desc", nil, nil)
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
termCount = 0
|
||||
terms := make([]string, 0)
|
||||
curr, err = reader.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = reader.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms := []string{"eat", "more", "rice"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
reader, err = idx.FieldReader("prefix", []byte("cat"), []byte("cat"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = reader.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = reader.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"cats", "catting", "cat"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
}
|
|
@ -149,6 +149,30 @@ type TermFrequencyRow struct {
|
|||
vectors []*TermVector
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
|
||||
buf := make([]byte, 3)
|
||||
buf[0] = 't'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
|
||||
buf := make([]byte, 3+len(tfr.term))
|
||||
buf[0] = 't'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
||||
copy(buf[3:], tfr.term)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
|
||||
buf := make([]byte, 3+len(tfr.term)+1)
|
||||
buf[0] = 't'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
||||
termLen := copy(buf[3:], tfr.term)
|
||||
buf[3+termLen] = BYTE_SEPARATOR
|
||||
return buf
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Key() []byte {
|
||||
buf := make([]byte, 3+len(tfr.term)+1+len(tfr.doc))
|
||||
buf[0] = 't'
|
||||
|
|
|
@ -598,7 +598,15 @@ func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (inde
|
|||
if fieldExists {
|
||||
return newUpsideDownCouchTermFieldReader(udc, term, uint16(fieldIndex))
|
||||
}
|
||||
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, 0)
|
||||
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, ^uint16(0))
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) FieldReader(fieldName string, startTerm []byte, endTerm []byte) (index.FieldReader, error) {
|
||||
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchFieldReader(udc, uint16(fieldIndex), startTerm, endTerm)
|
||||
}
|
||||
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, ^uint16(0))
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, error) {
|
||||
|
|
9
query.go
9
query.go
|
@ -106,5 +106,14 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasPrefix := tmp["prefix"]
|
||||
if hasPrefix {
|
||||
var rv PrefixQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
return nil, fmt.Errorf("Unrecognized query")
|
||||
}
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/search"
|
||||
)
|
||||
|
||||
type PrefixQuery struct {
|
||||
Prefix string `json:"prefix"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewPrefixQuery(prefix string) *PrefixQuery {
|
||||
return &PrefixQuery{
|
||||
Prefix: prefix,
|
||||
BoostVal: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) Boost() float64 {
|
||||
return q.BoostVal
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) SetBoost(b float64) *PrefixQuery {
|
||||
q.BoostVal = b
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) SetField(f string) *PrefixQuery {
|
||||
q.FieldVal = f
|
||||
return q
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, error) {
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = i.m.defaultField()
|
||||
}
|
||||
return search.NewTermPrefixSearcher(i.i, q.Prefix, field, q.BoostVal, explain)
|
||||
}
|
||||
|
||||
func (q *PrefixQuery) Validate() error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
package search
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
)
|
||||
|
||||
type TermPrefixSearcher struct {
|
||||
index index.Index
|
||||
prefix string
|
||||
field string
|
||||
explain bool
|
||||
searcher *DisjunctionSearcher
|
||||
}
|
||||
|
||||
func NewTermPrefixSearcher(index index.Index, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
|
||||
// find the terms with this prefix
|
||||
fieldReader, err := index.FieldReader(field, []byte(prefix), []byte(prefix))
|
||||
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]Searcher, 0, 25)
|
||||
tfd, err := fieldReader.Next()
|
||||
for err == nil && tfd != nil {
|
||||
qsearcher, err := NewTermSearcher(index, string(tfd.Term), field, 1.0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qsearchers = append(qsearchers, qsearcher)
|
||||
tfd, err = fieldReader.Next()
|
||||
}
|
||||
// build disjunction searcher of these ranges
|
||||
searcher, err := NewDisjunctionSearcher(index, qsearchers, 0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &TermPrefixSearcher{
|
||||
index: index,
|
||||
prefix: prefix,
|
||||
field: field,
|
||||
explain: explain,
|
||||
searcher: searcher,
|
||||
}, nil
|
||||
}
|
||||
func (s *TermPrefixSearcher) Count() uint64 {
|
||||
return s.searcher.Count()
|
||||
}
|
||||
|
||||
func (s *TermPrefixSearcher) Weight() float64 {
|
||||
return s.searcher.Weight()
|
||||
}
|
||||
|
||||
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.searcher.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *TermPrefixSearcher) Next() (*DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
|
||||
}
|
||||
|
||||
func (s *TermPrefixSearcher) Advance(ID string) (*DocumentMatch, error) {
|
||||
return s.searcher.Next()
|
||||
}
|
||||
|
||||
func (s *TermPrefixSearcher) Close() {
|
||||
s.searcher.Close()
|
||||
}
|
Loading…
Reference in New Issue