0
0
bleve/search/fragmenter_simple.go

77 lines
2.2 KiB
Go
Raw Normal View History

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import ()
const DEFAULT_FRAGMENT_SIZE = 100
type SimpleFragmenter struct {
fragmentSize int
}
func NewSimpleFragmenter() *SimpleFragmenter {
return &SimpleFragmenter{
fragmentSize: DEFAULT_FRAGMENT_SIZE,
}
}
func NewSimpleFragmenterWithSize(fragmentSize int) *SimpleFragmenter {
return &SimpleFragmenter{
fragmentSize: fragmentSize,
}
}
func (s *SimpleFragmenter) Fragment(orig []byte, ot termLocations) []*Fragment {
rv := make([]*Fragment, 0)
maxbegin := 0
for currTermIndex, termLocation := range ot {
// start with with this
// it should be the highest scoring fragment with this term first
start := termLocation.Start
end := start + s.fragmentSize
if end > len(orig) {
end = len(orig)
// we hit end, so push back as far as we can without crossing maxbegin
extra := s.fragmentSize - (end - start)
if start-extra >= maxbegin {
start -= extra
} else {
start = maxbegin
}
}
// however, we'd rather have the tokens centered more in the frag
// lets try to do that as best we can, without affecting the score
// find the end of the last term in this fragment
minend := end
for _, innerTermLocation := range ot[currTermIndex:] {
if innerTermLocation.End > end {
break
}
minend = innerTermLocation.End
}
// find the smaller of the two rooms to move
roomToMove := end - minend
if start-maxbegin < roomToMove {
roomToMove = start - maxbegin
}
offset := roomToMove / 2
rv = append(rv, &Fragment{orig: orig, start: start - offset, end: end - offset})
// set maxbegin to the end of the current term location
// so that next one won't back up to include it
maxbegin = termLocation.End
}
return rv
}