413 lines
9.9 KiB
Go
413 lines
9.9 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
package upside_down
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
)
|
|
|
|
const BYTE_SEPARATOR byte = 0xff
|
|
|
|
type UpsideDownCouchRowStream chan UpsideDownCouchRow
|
|
|
|
type UpsideDownCouchRow interface {
|
|
Key() []byte
|
|
Value() []byte
|
|
}
|
|
|
|
func ParseFromKeyValue(key, value []byte) UpsideDownCouchRow {
|
|
switch key[0] {
|
|
case 'v':
|
|
return NewVersionRowKV(key, value)
|
|
case 'f':
|
|
return NewFieldRowKV(key, value)
|
|
case 't':
|
|
return NewTermFrequencyRowKV(key, value)
|
|
case 'b':
|
|
return NewBackIndexRowKV(key, value)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// VERSION
|
|
|
|
type VersionRow struct {
|
|
version uint8
|
|
}
|
|
|
|
func (v *VersionRow) Key() []byte {
|
|
return []byte{'v'}
|
|
}
|
|
|
|
func (v *VersionRow) Value() []byte {
|
|
buf := new(bytes.Buffer)
|
|
err := binary.Write(buf, binary.LittleEndian, v.version)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (v *VersionRow) String() string {
|
|
return fmt.Sprintf("Version: %d", v.version)
|
|
}
|
|
|
|
func NewVersionRow(version uint8) *VersionRow {
|
|
return &VersionRow{
|
|
version: version,
|
|
}
|
|
}
|
|
|
|
func NewVersionRowKV(key, value []byte) *VersionRow {
|
|
rv := VersionRow{}
|
|
buf := bytes.NewBuffer(value)
|
|
err := binary.Read(buf, binary.LittleEndian, &rv.version)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
return &rv
|
|
}
|
|
|
|
// FIELD definition
|
|
|
|
type FieldRow struct {
|
|
index uint16
|
|
name string
|
|
}
|
|
|
|
func (f *FieldRow) Key() []byte {
|
|
buf := new(bytes.Buffer)
|
|
err := buf.WriteByte('f')
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, f.index)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (f *FieldRow) Value() []byte {
|
|
buf := new(bytes.Buffer)
|
|
_, err := buf.WriteString(f.name)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteString failed: %v", err))
|
|
}
|
|
err = buf.WriteByte(BYTE_SEPARATOR)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (f *FieldRow) String() string {
|
|
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
|
|
}
|
|
|
|
func NewFieldRow(index uint16, name string) *FieldRow {
|
|
return &FieldRow{
|
|
index: index,
|
|
name: name,
|
|
}
|
|
}
|
|
|
|
func NewFieldRowKV(key, value []byte) *FieldRow {
|
|
rv := FieldRow{}
|
|
|
|
buf := bytes.NewBuffer(key)
|
|
buf.ReadByte() // type
|
|
err := binary.Read(buf, binary.LittleEndian, &rv.index)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
|
|
buf = bytes.NewBuffer(value)
|
|
rv.name, err = buf.ReadString(BYTE_SEPARATOR)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
|
|
}
|
|
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
|
|
|
|
return &rv
|
|
}
|
|
|
|
// TERM FIELD FREQUENCY
|
|
|
|
type TermVector struct {
|
|
field uint16
|
|
pos uint64
|
|
start uint64
|
|
end uint64
|
|
}
|
|
|
|
func (tv *TermVector) String() string {
|
|
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d", tv.field, tv.pos, tv.start, tv.end)
|
|
}
|
|
|
|
type TermFrequencyRow struct {
|
|
term []byte
|
|
field uint16
|
|
doc []byte
|
|
freq uint64
|
|
norm float32
|
|
vectors []*TermVector
|
|
}
|
|
|
|
func (tfr *TermFrequencyRow) Key() []byte {
|
|
buf := new(bytes.Buffer)
|
|
err := buf.WriteByte('t')
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
_, err = buf.Write(tfr.term)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
|
|
}
|
|
err = buf.WriteByte(BYTE_SEPARATOR)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, tfr.field)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
_, err = buf.Write(tfr.doc)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (tfr *TermFrequencyRow) Value() []byte {
|
|
buf := new(bytes.Buffer)
|
|
err := binary.Write(buf, binary.LittleEndian, tfr.freq)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, tfr.norm)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
for _, vector := range tfr.vectors {
|
|
err = binary.Write(buf, binary.LittleEndian, vector.field)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, vector.pos)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, vector.start)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, vector.end)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (tfr *TermFrequencyRow) String() string {
|
|
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
|
|
}
|
|
|
|
func NewTermFrequencyRow(term []byte, field uint16, doc string, freq uint64, norm float32) *TermFrequencyRow {
|
|
return &TermFrequencyRow{
|
|
term: term,
|
|
field: field,
|
|
doc: []byte(doc),
|
|
freq: freq,
|
|
norm: norm,
|
|
}
|
|
}
|
|
|
|
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
|
|
return &TermFrequencyRow{
|
|
term: term,
|
|
field: field,
|
|
doc: []byte(doc),
|
|
freq: freq,
|
|
norm: norm,
|
|
vectors: vectors,
|
|
}
|
|
}
|
|
|
|
func NewTermFrequencyRowKV(key, value []byte) *TermFrequencyRow {
|
|
rv := TermFrequencyRow{
|
|
doc: []byte(""),
|
|
}
|
|
buf := bytes.NewBuffer(key)
|
|
buf.ReadByte() // type
|
|
|
|
var err error
|
|
rv.term, err = buf.ReadBytes(BYTE_SEPARATOR)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
|
|
}
|
|
rv.term = rv.term[:len(rv.term)-1] // trim off separator byte
|
|
|
|
err = binary.Read(buf, binary.LittleEndian, &rv.field)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
|
|
doc, err := buf.ReadBytes(BYTE_SEPARATOR)
|
|
if err != io.EOF {
|
|
panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
|
|
}
|
|
if doc != nil {
|
|
rv.doc = doc
|
|
}
|
|
|
|
buf = bytes.NewBuffer((value))
|
|
err = binary.Read(buf, binary.LittleEndian, &rv.freq)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
err = binary.Read(buf, binary.LittleEndian, &rv.norm)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
|
|
var field uint16
|
|
err = binary.Read(buf, binary.LittleEndian, &field)
|
|
if err != nil && err != io.EOF {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
for err != io.EOF {
|
|
tv := TermVector{}
|
|
tv.field = field
|
|
// at this point we expect at least one term vector
|
|
if rv.vectors == nil {
|
|
rv.vectors = make([]*TermVector, 0)
|
|
}
|
|
|
|
err = binary.Read(buf, binary.LittleEndian, &tv.pos)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
err = binary.Read(buf, binary.LittleEndian, &tv.start)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
err = binary.Read(buf, binary.LittleEndian, &tv.end)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
rv.vectors = append(rv.vectors, &tv)
|
|
// try to read next record (may not exist)
|
|
err = binary.Read(buf, binary.LittleEndian, &field)
|
|
}
|
|
|
|
return &rv
|
|
|
|
}
|
|
|
|
type BackIndexEntry struct {
|
|
term []byte
|
|
field uint16
|
|
}
|
|
|
|
func (bie *BackIndexEntry) String() string {
|
|
return fmt.Sprintf("Term: `%s` Field: %d", string(bie.term), bie.field)
|
|
}
|
|
|
|
type BackIndexRow struct {
|
|
doc []byte
|
|
entries []*BackIndexEntry
|
|
}
|
|
|
|
func (br *BackIndexRow) Key() []byte {
|
|
buf := new(bytes.Buffer)
|
|
err := buf.WriteByte('b')
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, br.doc)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (br *BackIndexRow) Value() []byte {
|
|
buf := new(bytes.Buffer)
|
|
for _, e := range br.entries {
|
|
_, err := buf.Write(e.term)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.Write failed: %v", err))
|
|
}
|
|
err = buf.WriteByte(BYTE_SEPARATOR)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
|
|
}
|
|
err = binary.Write(buf, binary.LittleEndian, e.field)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Write failed: %v", err))
|
|
}
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func (br *BackIndexRow) String() string {
|
|
return fmt.Sprintf("Backindex DocId: `%s` Entries: %v", string(br.doc), br.entries)
|
|
}
|
|
|
|
func NewBackIndexRow(doc string, entries []*BackIndexEntry) *BackIndexRow {
|
|
return &BackIndexRow{
|
|
doc: []byte(doc),
|
|
entries: entries,
|
|
}
|
|
}
|
|
|
|
func NewBackIndexRowKV(key, value []byte) *BackIndexRow {
|
|
rv := BackIndexRow{}
|
|
|
|
buf := bytes.NewBuffer(key)
|
|
buf.ReadByte() // type
|
|
|
|
var err error
|
|
rv.doc, err = buf.ReadBytes(BYTE_SEPARATOR)
|
|
if err != io.EOF {
|
|
panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
|
|
}
|
|
|
|
buf = bytes.NewBuffer(value)
|
|
rv.entries = make([]*BackIndexEntry, 0)
|
|
|
|
var term []byte
|
|
term, err = buf.ReadBytes(BYTE_SEPARATOR)
|
|
if err != nil && err != io.EOF {
|
|
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
|
|
}
|
|
for err != io.EOF {
|
|
ent := BackIndexEntry{}
|
|
ent.term = term[:len(term)-1] // trim off separator byte
|
|
|
|
err = binary.Read(buf, binary.LittleEndian, &ent.field)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("binary.Read failed: %v", err))
|
|
}
|
|
rv.entries = append(rv.entries, &ent)
|
|
|
|
term, err = buf.ReadBytes(BYTE_SEPARATOR)
|
|
if err != nil && err != io.EOF {
|
|
panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
|
|
}
|
|
}
|
|
|
|
return &rv
|
|
}
|