diff --git a/index/upside_down/row.go b/index/upside_down/row.go index b2bd88c4..43583f36 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -150,12 +150,12 @@ type TermFrequencyRow struct { } func (tfr *TermFrequencyRow) Key() []byte { - buf := make([]byte, 1+len(tfr.term)+1+2+len(tfr.doc)) + buf := make([]byte, 3+len(tfr.term)+1+len(tfr.doc)) buf[0] = 't' - termLen := copy(buf[1:], tfr.term) - buf[1+termLen] = BYTE_SEPARATOR - binary.LittleEndian.PutUint16(buf[1+termLen+1:1+termLen+1+2], tfr.field) - copy(buf[1+termLen+1+2:], tfr.doc) + binary.LittleEndian.PutUint16(buf[1:3], tfr.field) + termLen := copy(buf[3:], tfr.term) + buf[3+termLen] = BYTE_SEPARATOR + copy(buf[3+termLen+1:], tfr.doc) return buf } @@ -211,17 +211,17 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { buf.ReadByte() // type var err error + err = binary.Read(buf, binary.LittleEndian, &rv.field) + if err != nil { + return nil, err + } + rv.term, err = buf.ReadBytes(BYTE_SEPARATOR) if err != nil { return nil, err } rv.term = rv.term[:len(rv.term)-1] // trim off separator byte - err = binary.Read(buf, binary.LittleEndian, &rv.field) - if err != nil { - return nil, err - } - doc, err := buf.ReadBytes(BYTE_SEPARATOR) if err != io.EOF { return nil, err diff --git a/index/upside_down/row_test.go b/index/upside_down/row_test.go index aa66da19..40f09555 100644 --- a/index/upside_down/row_test.go +++ b/index/upside_down/row_test.go @@ -41,17 +41,17 @@ func TestRows(t *testing.T) { }, { NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14), - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64}, }, { NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14), - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64}, }, { NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0}, }, { @@ -131,49 +131,49 @@ func TestInvalidRows(t *testing.T) { []byte{'f', 0, 0}, []byte{}, }, - // type t, invalid key (missing term) + // type t, invalid key (missing field) { []byte{'t'}, []byte{}, }, - // type t, invalid key (missing field) + // type t, invalid key (missing term) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR}, + []byte{'t', 0, 0}, []byte{}, }, // type t, invalid key (missing id) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR}, []byte{}, }, // type t, invalid val (misisng freq) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{}, }, // type t, invalid val (missing norm) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0}, }, // type t, invalid val (half missing tv field, full missing is valid (no term vectors)) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0}, }, // type t, invalid val (missing tv pos) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0}, }, // type t, invalid val (missing tv start) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, }, // type t, invalid val (missing tv end) { - []byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, + []byte{'t', 0, 0, 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0}, }, // type b, invalid key (missing id)