add initial version of zap file format

2017-12-09 14:28:33 -05:00 · 2017-12-09 14:28:33 -05:00 · 9781d9b089
parent ff2e6b98e4
commit 9781d9b089
16 changed files with 3053 additions and 0 deletions
--- a/index/scorch/segment/zap/README.md
+++ b/index/scorch/segment/zap/README.md
@ -0,0 +1,120 @@
+# zap file format
+
+## stored fields section
+
+- for each document
+  - preparation phase:
+    - produce a slice of metadata bytes and data bytes
+    - produce these slices in field id order
+    - field value is appended to the data slice
+    - metadata slice is govarint encoded with the following values for each field value
+      - field id (uint16)
+      - field type (byte)
+      - field value start offset in uncompressed data slice (uint64)
+      - field value length (uint64)
+      - field number of array positions (uint64)
+      - one additional value for each array position (uint64)
+      - compress the data slice using snappy
+  - file writing phase:
+    - remember the start offset for this document
+    - write out meta data length (varint uint64)
+    - write out compressed data length (varint uint64)
+    - write out the metadata bytes
+    - write out the compressed data bytes
+
+## stored fields idx
+
+- for each document
+  - write start offset (remembered from previous section) of stored data (big endian uint64)
+
+With this index and a known document number, we have direct access to all the stored field data.
+
+## posting details (freq/norm) section
+
+- for each posting list
+  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice remembering offsets of where each chunk starts
+  - preparation phase:
+    - for each hit in the posting list
+    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
+    - encode term frequency (uint64)
+    - encode norm factor (float32)
+  - file writing phase:
+    - remember start position for this posting list details
+    - write out number of chunks that follow (varint uint64)
+    - write out length of each chunk (each a varint uint64)
+    - write out the byte slice containing all the chunk data
+
+If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
+
+## posting details (location) section
+
+- for each posting list
+  - produce a slice containing multiple consecutive chunks (each chunk is govarint stream)
+  - produce a slice remembering offsets of where each chunk starts
+  - preparation phase:
+    - for each hit in the posting list
+    - if this hit is in next chunk close out encoding of last chunk and record offset start of next
+    - encode field (uint16)
+    - encode field pos (uint64)
+    - encode field start (uint64)
+    - encode field end (uint64)
+    - encode number of array positions to follow (uint64)
+    - encode each array position (each uint64)
+  - file writing phase:
+    - remember start position for this posting list details
+    - write out number of chunks that follow (varint uint64)
+    - write out length of each chunk (each a varint uint64)
+    - write out the byte slice containing all the chunk data
+
+If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
+
+## postings list section
+
+- for each posting list
+  - preparation phase:
+    - encode roaring bitmap posting list to bytes (so we know the length)
+  - file writing phase:
+    - remember the start position for this posting list
+    - write freq/norm details offset (remembered from previous, as varint uint64)
+    - write location details offset (remembered from previous, as varint uint64)
+    - write length of encoded roaring bitmap
+    - write the serialized roaring bitmap data
+
+## dictionary
+
+- for each field
+  - preparation phase:
+    - encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
+  - file writing phase:
+    - remember the start position of this persistDictionary
+    - write length of vellum data (varint uint64)
+    - write out vellum data
+
+## fields section
+
+- for each field
+  - file writing phase:
+    - remember start offset for each field
+    - write 1 if field has location info indexed, 0 if not (varint uint64)
+    - write dictionary address (remembered from previous) (varint uint64)
+    - write length of field name (varint uint64)
+    - write field name bytes
+
+## fields idx
+
+- for each field
+  - file writing phase:
+    - write big endian uint64 of start offset for each field
+
+NOTE: currently we don't know or record the length of this fields index.  Instead we rely on the fact that we know it immediately precedes a footer of known size.
+
+## footer
+
+- file writing phase
+  - write number of docs (big endian uint64)
+  - write stored field index location (big endian uint64)
+  - write field index location (big endian uint64)
+  - write out chunk factor (big endian uint32)
+  - write out version (big endian uint32)
+  - write out file CRC of everything preceding this (big endian uint32)
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@ -0,0 +1,615 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/binary"
+	"math"
+	"os"
+
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+	"github.com/couchbaselabs/vellum"
+	"github.com/golang/snappy"
+)
+
+var version uint32
+
+// PersistSegment takes the in-memory segment and persists it to the specified
+// path in the zap file format.
+func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
+
+	flag := os.O_RDWR | os.O_CREATE
+
+	f, err := os.OpenFile(path, flag, 0600)
+	if err != nil {
+		return err
+	}
+
+	// bufer the output
+	br := bufio.NewWriter(f)
+
+	// wrap it for counting (tracking offsets)
+	cr := NewCountHashWriter(br)
+
+	var storedIndexOffset uint64
+	storedIndexOffset, err = persistStored(memSegment, cr)
+	if err != nil {
+		return err
+	}
+
+	var freqOffsets, locOffsets []uint64
+	freqOffsets, locOffsets, err = persistPostingDetails(memSegment, cr, chunkFactor)
+	if err != nil {
+		return err
+	}
+
+	var postingsLocs []uint64
+	postingsLocs, err = persistPostingsLists(memSegment, cr, freqOffsets, locOffsets)
+	if err != nil {
+		return err
+	}
+
+	var dictLocs []uint64
+	dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
+	if err != nil {
+		return err
+	}
+
+	var fieldIndexStart uint64
+	fieldIndexStart, err = persistFields(memSegment, cr, dictLocs)
+	if err != nil {
+		return err
+	}
+
+	err = persistFooter(uint64(len(memSegment.Stored)), storedIndexOffset,
+		fieldIndexStart, chunkFactor, cr)
+	if err != nil {
+		return err
+	}
+
+	err = br.Flush()
+	if err != nil {
+		return err
+	}
+
+	err = f.Close()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
+
+	var curr int
+	var metaBuf bytes.Buffer
+	var data, compressed []byte
+
+	docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
+
+	for docNum, storedValues := range memSegment.Stored {
+		if docNum != 0 {
+			// reset buffer if necessary
+			metaBuf.Reset()
+			data = data[:0]
+			compressed = compressed[:0]
+			curr = 0
+		}
+
+		metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+
+		// encode fields in order
+		for fieldID := range memSegment.FieldsInv {
+			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
+				// has stored values for this field
+				num := len(storedFieldValues)
+
+				// process each value
+				for i := 0; i < num; i++ {
+					// encode field
+					_, err2 := metaEncoder.PutU64(uint64(fieldID))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode type
+					_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode start offset
+					_, err2 = metaEncoder.PutU64(uint64(curr))
+					if err2 != nil {
+						return 0, err2
+					}
+					// end len
+					_, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode number of array pos
+					_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
+					if err2 != nil {
+						return 0, err2
+					}
+					// encode all array positions
+					for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
+						_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
+						if err2 != nil {
+							return 0, err2
+						}
+					}
+					// append data
+					data = append(data, storedFieldValues[i]...)
+					// update curr
+					curr += len(storedFieldValues[i])
+				}
+			}
+		}
+		metaEncoder.Close()
+
+		metaBytes := metaBuf.Bytes()
+
+		// compress the data
+		compressed = snappy.Encode(compressed, data)
+
+		// record where we're about to start writing
+		docNumOffsets[docNum] = uint64(w.Count())
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the meta length
+		n := binary.PutUvarint(buf, uint64(len(metaBytes)))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+		// write out the compressed data length
+		n = binary.PutUvarint(buf, uint64(len(compressed)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+		// now write the meta
+		_, err = w.Write(metaBytes)
+		if err != nil {
+			return 0, err
+		}
+		// now write the compressed data
+		_, err = w.Write(compressed)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// return value is the start of the stored index
+	rv := uint64(w.Count())
+	// now write out the stored doc index
+	for docNum := range memSegment.Stored {
+		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
+	var freqOffsets, locOfffsets []uint64
+	for postingID := range memSegment.Postings {
+		postingsListItr := memSegment.Postings[postingID].Iterator()
+
+		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
+
+		var freqNormBuf []byte
+		var offset int
+
+		var encodingBuf bytes.Buffer
+		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
+
+		chunkLens := make([]uint64, total)
+		var currChunk uint64
+		for postingsListItr.HasNext() {
+			docNum := postingsListItr.Next()
+			chunk := uint64(docNum) / uint64(chunkFactor)
+
+			if chunk != currChunk {
+				// starting a new chunk
+				if encoder != nil {
+					// close out last
+					encoder.Close()
+					encodingBytes := encodingBuf.Bytes()
+					chunkLens[currChunk] = uint64(len(encodingBytes))
+					freqNormBuf = append(freqNormBuf, encodingBytes...)
+					encodingBuf.Reset()
+					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
+				}
+
+				currChunk = chunk
+			}
+
+			// put freq
+			_, err := encoder.PutU64(memSegment.Freqs[postingID][offset])
+			if err != nil {
+				return nil, nil, err
+			}
+
+			// put norm
+			norm := memSegment.Norms[postingID][offset]
+			normBits := math.Float32bits(norm)
+			_, err = encoder.PutU32(normBits)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			offset++
+		}
+
+		// close out last chunk
+		if encoder != nil {
+			// fix me write freq/norms
+			encoder.Close()
+			encodingBytes := encodingBuf.Bytes()
+			chunkLens[currChunk] = uint64(len(encodingBytes))
+			freqNormBuf = append(freqNormBuf, encodingBytes...)
+		}
+
+		// record where this postings freq info starts
+		freqOffsets = append(freqOffsets, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(total))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return nil, nil, err
+		}
+		// write out the chunk lens
+		for _, chunkLen := range chunkLens {
+			n := binary.PutUvarint(buf, uint64(chunkLen))
+			_, err = w.Write(buf[:n])
+			if err != nil {
+				return nil, nil, err
+			}
+		}
+		// write out the data
+		_, err = w.Write(freqNormBuf)
+		if err != nil {
+			return nil, nil, err
+		}
+
+	}
+
+	// now do it again for the locations
+	for postingID := range memSegment.Postings {
+		postingsListItr := memSegment.Postings[postingID].Iterator()
+
+		total := uint64(len(memSegment.Stored))/uint64(chunkFactor) + 1
+
+		var locBuf []byte
+		var offset int
+		var locOffset int
+
+		var encodingBuf bytes.Buffer
+		encoder := govarint.NewU64Base128Encoder(&encodingBuf)
+
+		chunkLens := make([]uint64, total)
+		var currChunk uint64
+		for postingsListItr.HasNext() {
+			docNum := postingsListItr.Next()
+			chunk := uint64(docNum) / uint64(chunkFactor)
+
+			if chunk != currChunk {
+				// starting a new chunk
+				if encoder != nil {
+					// close out last
+					encoder.Close()
+					encodingBytes := encodingBuf.Bytes()
+					chunkLens[currChunk] = uint64(len(encodingBytes))
+					locBuf = append(locBuf, encodingBytes...)
+					encodingBuf.Reset()
+					encoder = govarint.NewU64Base128Encoder(&encodingBuf)
+				}
+				currChunk = chunk
+			}
+
+			for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
+
+				if len(memSegment.Locfields[postingID]) > 0 {
+					// put field
+					_, err := encoder.PutU64(uint64(memSegment.Locfields[postingID][locOffset]))
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put pos
+					_, err = encoder.PutU64(memSegment.Locpos[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put start
+					_, err = encoder.PutU64(memSegment.Locstarts[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put end
+					_, err = encoder.PutU64(memSegment.Locends[postingID][locOffset])
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put array positions
+					num := len(memSegment.Locarraypos[postingID][locOffset])
+
+					// put the number of array positions to follow
+					_, err = encoder.PutU64(uint64(num))
+					if err != nil {
+						return nil, nil, err
+					}
+
+					// put each array position
+					for j := 0; j < num; j++ {
+						_, err = encoder.PutU64(memSegment.Locarraypos[postingID][locOffset][j])
+						if err != nil {
+							return nil, nil, err
+						}
+					}
+				}
+
+				locOffset++
+			}
+			offset++
+		}
+
+		// close out last chunk
+		if encoder != nil {
+			// fix me write freq/norms
+			encoder.Close()
+			encodingBytes := encodingBuf.Bytes()
+			chunkLens[currChunk] = uint64(len(encodingBytes))
+			locBuf = append(locBuf, encodingBytes...)
+		}
+
+		// record where this postings loc info starts
+		locOfffsets = append(locOfffsets, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(total))
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return nil, nil, err
+		}
+		// write out the chunk lens
+		for _, chunkLen := range chunkLens {
+			n := binary.PutUvarint(buf, uint64(chunkLen))
+			_, err = w.Write(buf[:n])
+			if err != nil {
+				return nil, nil, err
+			}
+		}
+		// write out the data
+		_, err = w.Write(locBuf)
+		if err != nil {
+			return nil, nil, err
+		}
+
+	}
+	return freqOffsets, locOfffsets, nil
+}
+
+func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, freqOffsets, locOffsets []uint64) ([]uint64, error) {
+	var rv []uint64
+
+	var postingsBuf bytes.Buffer
+	for postingID := range memSegment.Postings {
+		if postingID != 0 {
+			postingsBuf.Reset()
+		}
+
+		// record where we start this posting list
+		rv = append(rv, uint64(w.Count()))
+
+		// write out postings list to memory so we know the len
+		postingsListLen, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the start of the term info
+		buf := make([]byte, binary.MaxVarintLen64)
+		n := binary.PutUvarint(buf, freqOffsets[postingID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the start of the loc info
+		n = binary.PutUvarint(buf, locOffsets[postingID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the length of this postings list
+		n = binary.PutUvarint(buf, uint64(postingsListLen))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write out the postings list itself
+		_, err = w.Write(postingsBuf.Bytes())
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
+	var rv []uint64
+
+	var buffer bytes.Buffer
+	for fieldID, fieldTerms := range memSegment.DictKeys {
+		if fieldID != 0 {
+			buffer.Reset()
+		}
+
+		// start a new vellum for this field
+		builder, err := vellum.New(&buffer, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		dict := memSegment.Dicts[fieldID]
+		// now walk the dictionary in order of fieldTerms (already sorted)
+		for i := range fieldTerms {
+			postingID := dict[fieldTerms[i]] - 1
+			postingsAddr := postingsLocs[postingID]
+			err = builder.Insert([]byte(fieldTerms[i]), postingsAddr)
+			if err != nil {
+				return nil, err
+			}
+		}
+		err = builder.Close()
+		if err != nil {
+			return nil, err
+		}
+
+		// record where this dictionary starts
+		rv = append(rv, uint64(w.Count()))
+
+		vellumData := buffer.Bytes()
+
+		// write out the length of the vellum data
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out the number of chunks
+		n := binary.PutUvarint(buf, uint64(len(vellumData)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return nil, err
+		}
+
+		// write this vellum to disk
+		_, err = w.Write(vellumData)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return rv, nil
+}
+
+func persistFields(memSegment *mem.Segment, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
+	var rv uint64
+
+	var fieldStarts []uint64
+	for fieldID, fieldName := range memSegment.FieldsInv {
+
+		// record start of this field
+		fieldStarts = append(fieldStarts, uint64(w.Count()))
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		// write out if the field has indexed locs (0 or 1)
+		var indexedLoc uint64
+		if memSegment.FieldsLoc[fieldID] {
+			indexedLoc = 1
+		}
+		n := binary.PutUvarint(buf, indexedLoc)
+		_, err := w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out dict location for this field
+		n = binary.PutUvarint(buf, dictLocs[fieldID])
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the length of the field name
+		n = binary.PutUvarint(buf, uint64(len(fieldName)))
+		_, err = w.Write(buf[:n])
+		if err != nil {
+			return 0, err
+		}
+
+		// write out the field name
+		_, err = w.Write([]byte(fieldName))
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	// now write out the fields index
+	rv = uint64(w.Count())
+
+	// now write out the stored doc index
+	for fieldID := range memSegment.FieldsInv {
+		err := binary.Write(w, binary.BigEndian, fieldStarts[fieldID])
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	return rv, nil
+}
+
+// NOTE: update if you make the footer bigger
+//               crc + ver + chunk + field offset + stored offset + num docs
+const footerSize = 4 + 4 + 4 + 8 + 8 + 8
+
+func persistFooter(numDocs, storedIndexOffset, fieldIndexOffset uint64,
+	chunkFactor uint32, w *CountHashWriter) error {
+	// write out the number of docs
+	err := binary.Write(w, binary.BigEndian, numDocs)
+	if err != nil {
+		return err
+	}
+	// write out the stored field index location:
+	err = binary.Write(w, binary.BigEndian, storedIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out the field index location
+	err = binary.Write(w, binary.BigEndian, fieldIndexOffset)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit chunk factor
+	err = binary.Write(w, binary.BigEndian, chunkFactor)
+	if err != nil {
+		return err
+	}
+	// write out 32-bit version
+	err = binary.Write(w, binary.BigEndian, version)
+	if err != nil {
+		return err
+	}
+	// write out CRC-32 of everything upto but not including this CRC
+	err = binary.Write(w, binary.BigEndian, w.Sum32())
+	if err != nil {
+		return err
+	}
+	return nil
+}
--- a/index/scorch/segment/zap/build_test.go
+++ b/index/scorch/segment/zap/build_test.go
@ -0,0 +1,288 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"os"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func TestBuild(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegment()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func buildMemSegment() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	return mem.NewFromAnalyzedDocs(results)
+}
+
+func buildMemSegmentMulti() *mem.Segment {
+
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("wow"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	doc2 := &document.Document{
+		ID: "b",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("b"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("name", nil, []byte("who"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("some thing"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{0}, []byte("cold"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+			document.NewTextFieldCustom("tag", []uint64{1}, []byte("dark"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+		CompositeFields: []*document.CompositeField{
+			document.NewCompositeField("_all", true, nil, []string{"_id"}),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("wow"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+		&index.AnalysisResult{
+			Document: doc2,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("b"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      3,
+						Position: 1,
+						Term:     []byte("who"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("some"),
+					},
+					&analysis.Token{
+						Start:    5,
+						End:      10,
+						Position: 2,
+						Term:     []byte("thing"),
+					},
+				}, nil, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("cold"),
+					},
+				}, []uint64{0}, true),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      4,
+						Position: 1,
+						Term:     []byte("dark"),
+					},
+				}, []uint64{1}, true),
+			},
+			Length: []int{
+				1,
+				1,
+				2,
+				1,
+				1,
+			},
+		},
+	}
+
+	// fix up composite fields
+	for _, ar := range results {
+		for i, f := range ar.Document.Fields {
+			for _, cf := range ar.Document.CompositeFields {
+				cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
+			}
+		}
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
--- a/index/scorch/segment/zap/cmd/zap/README.md
+++ b/index/scorch/segment/zap/cmd/zap/README.md
@ -0,0 +1,3 @@
+# zap command line utility
+
+Kind of a hack just put together quickly to let me debug some issues.
--- a/index/scorch/segment/zap/cmd/zap/cmd/dict.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/dict.go
@ -0,0 +1,72 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/couchbaselabs/vellum"
+	"github.com/spf13/cobra"
+)
+
+// dictCmd represents the dict command
+var dictCmd = &cobra.Command{
+	Use:   "dict [path] [field]",
+	Short: "dict prints the term dictionary for the specified field",
+	Long:  `The dict command lets you print the term dictionary for the specified field.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify field")
+		}
+
+		data := segment.Data()
+
+		addr, err := segment.DictAddr(args[1])
+		if err != nil {
+			return fmt.Errorf("error determing address: %v", err)
+		}
+		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
+
+		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
+		fmt.Printf("vellum length: %d\n", vellumLen)
+		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
+		fmt.Printf("raw vellum data % x\n", fstBytes)
+		fmt.Printf("dictionary:\n\n")
+		if fstBytes != nil {
+			fst, err := vellum.Load(fstBytes)
+			if err != nil {
+				return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
+			}
+
+			itr, err := fst.Iterator(nil, nil)
+			for err == nil {
+				currTerm, currVal := itr.Current()
+				fmt.Printf("%s - %d (%x)\n", currTerm, currVal, currVal)
+				err = itr.Next()
+			}
+			if err != nil && err != vellum.ErrIteratorDone {
+				return fmt.Errorf("error iterating dictionary: %v", err)
+			}
+
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(dictCmd)
+}
--- a/index/scorch/segment/zap/cmd/zap/cmd/explore.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/explore.go
@ -0,0 +1,124 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+	"log"
+
+	"github.com/couchbaselabs/vellum"
+	"github.com/spf13/cobra"
+)
+
+// exploreCmd represents the explore command
+var exploreCmd = &cobra.Command{
+	Use:   "explore [path] [field] <term> <docNum>",
+	Short: "explores the index by field, then term (optional), and then docNum (optional)",
+	Long:  `The explore command lets you explore the index in order of field, then optionally by term, then optionally again by doc number.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify field")
+		}
+
+		data := segment.Data()
+
+		addr, err := segment.DictAddr(args[1])
+		if err != nil {
+			return fmt.Errorf("error determing address: %v", err)
+		}
+		fmt.Printf("dictionary for field starts at %d (%x)\n", addr, addr)
+
+		vellumLen, read := binary.Uvarint(data[addr : addr+binary.MaxVarintLen64])
+		fmt.Printf("vellum length: %d\n", vellumLen)
+		fstBytes := data[addr+uint64(read) : addr+uint64(read)+vellumLen]
+		fmt.Printf("raw vellum data % x\n", fstBytes)
+
+		if len(args) >= 3 {
+			if fstBytes != nil {
+				fst, err := vellum.Load(fstBytes)
+				if err != nil {
+					return fmt.Errorf("dictionary field %s vellum err: %v", args[1], err)
+				}
+				postingsAddr, exists, err := fst.Get([]byte(args[2]))
+				if err != nil {
+					return fmt.Errorf("error looking for term : %v", err)
+				}
+				if exists {
+					fmt.Printf("postings list begins at %d (%x)\n", postingsAddr, postingsAddr)
+
+					var n uint64
+					freqAddr, read := binary.Uvarint(data[postingsAddr : postingsAddr+binary.MaxVarintLen64])
+					n += uint64(read)
+
+					var locAddr uint64
+					locAddr, read = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+					n += uint64(read)
+
+					var postingListLen uint64
+					postingListLen, _ = binary.Uvarint(data[postingsAddr+n : postingsAddr+n+binary.MaxVarintLen64])
+
+					fmt.Printf("Posting List Length: %d\n", postingListLen)
+
+					fmt.Printf("Freq details at: %d (%x)\n", freqAddr, freqAddr)
+					numChunks, r2 := binary.Uvarint(data[freqAddr : freqAddr+binary.MaxVarintLen64])
+					n = uint64(r2)
+
+					var freqOffsets []uint64
+					for j := uint64(0); j < numChunks; j++ {
+						chunkLen, r3 := binary.Uvarint(data[freqAddr+n : freqAddr+n+binary.MaxVarintLen64])
+						n += uint64(r3)
+						freqOffsets = append(freqOffsets, chunkLen)
+					}
+					running := freqAddr + n
+					for k, offset := range freqOffsets {
+						fmt.Printf("freq chunk: %d, len %d, start at %d (%x) end %d (%x)\n", k, offset, running, running, running+offset, running+offset)
+						running += offset
+					}
+
+					fmt.Printf("Loc details at: %d (%x)\n", locAddr, locAddr)
+					numLChunks, r4 := binary.Uvarint(data[locAddr : locAddr+binary.MaxVarintLen64])
+					n = uint64(r4)
+					fmt.Printf("there are %d loc chunks\n", numLChunks)
+
+					var locOffsets []uint64
+					for j := uint64(0); j < numLChunks; j++ {
+						log.Printf("reading from %d(%x)\n", locAddr+n, locAddr+n)
+						log.Printf("data i see here: % x\n", data[locAddr+n:locAddr+n+binary.MaxVarintLen64])
+						lchunkLen, r4 := binary.Uvarint(data[locAddr+n : locAddr+n+binary.MaxVarintLen64])
+						n += uint64(r4)
+						log.Printf("see chunk len %d(%x)\n", lchunkLen, lchunkLen)
+						locOffsets = append(locOffsets, lchunkLen)
+					}
+
+					running2 := locAddr + n
+					for k, offset := range locOffsets {
+						fmt.Printf("loc chunk: %d, len %d(%x), start at %d (%x) end %d (%x)\n", k, offset, offset, running2, running2, running2+offset, running2+offset)
+						running2 += offset
+					}
+
+				} else {
+					fmt.Printf("dictionary does not contain term '%s'\n", args[2])
+				}
+			}
+		}
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(exploreCmd)
+}
--- a/index/scorch/segment/zap/cmd/zap/cmd/footer.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/footer.go
@ -0,0 +1,43 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+// footerCmd represents the footer command
+var footerCmd = &cobra.Command{
+	Use:   "footer [path]",
+	Short: "prints the contents of the zap footer",
+	Long:  `The footer command will print the contents of the footer.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		data := segment.Data()
+		fmt.Printf("Length: %d\n", len(data))
+		fmt.Printf("CRC: %#x\n", segment.CRC())
+		fmt.Printf("Version: %d\n", segment.Version())
+		fmt.Printf("Chunk Factor: %d\n", segment.ChunkFactor())
+		fmt.Printf("Fields Idx: %d (%#x)\n", segment.FieldsIndexOffset(), segment.FieldsIndexOffset())
+		fmt.Printf("Stored Idx: %d (%#x)\n", segment.StoredIndexOffset(), segment.StoredIndexOffset())
+		fmt.Printf("Num Docs: %d\n", segment.NumDocs())
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(footerCmd)
+}
--- a/index/scorch/segment/zap/cmd/zap/cmd/root.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/root.go
@ -0,0 +1,58 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/blevesearch/bleve/index/scorch/segment/zap"
+	"github.com/spf13/cobra"
+)
+
+var segment *zap.Segment
+
+// RootCmd represents the base command when called without any subcommands
+var RootCmd = &cobra.Command{
+	Use:   "zap",
+	Short: "command-line tool to interact with a zap file",
+	Long:  `Zap is a command-line tool to interact with a zap file.`,
+	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+
+		if len(args) < 1 {
+			return fmt.Errorf("must specify path to zap file")
+		}
+
+		segInf, err := zap.Open(args[0])
+		if err != nil {
+			return fmt.Errorf("error opening zap file: %v", err)
+		}
+		segment = segInf.(*zap.Segment)
+
+		return nil
+	},
+	PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
+		return nil
+	},
+}
+
+// Execute adds all child commands to the root command sets flags appropriately.
+// This is called by main.main(). It only needs to happen once to the rootCmd.
+func Execute() {
+	if err := RootCmd.Execute(); err != nil {
+		fmt.Println(err)
+		os.Exit(-1)
+	}
+}
--- a/index/scorch/segment/zap/cmd/zap/cmd/stored.go
+++ b/index/scorch/segment/zap/cmd/zap/cmd/stored.go
@ -0,0 +1,73 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"encoding/binary"
+	"fmt"
+	"strconv"
+
+	"github.com/golang/snappy"
+	"github.com/spf13/cobra"
+)
+
+// storedCmd represents the stored command
+var storedCmd = &cobra.Command{
+	Use:   "stored [path] [docNum]",
+	Short: "prints the stored section for a doc number",
+	Long:  `The stored command will print the raw stored data bytes for the specified document number.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		if len(args) < 2 {
+			return fmt.Errorf("must specify doc number")
+		}
+		docNum, err := strconv.Atoi(args[1])
+		if err != nil {
+			return fmt.Errorf("unable to parse doc number: %v", err)
+		}
+		if docNum >= int(segment.NumDocs()) {
+			return fmt.Errorf("invalid doc number %d (valid 0 - %d)", docNum, segment.NumDocs()-1)
+		}
+		data := segment.Data()
+		storedIdx := segment.StoredIndexOffset()
+		// read docNum entry in the index
+		indexPos := storedIdx + (8 * uint64(docNum))
+		storedStartAddr := binary.BigEndian.Uint64(data[indexPos : indexPos+8])
+		fmt.Printf("Stored field starts at %d (%#x)\n", storedStartAddr, storedStartAddr)
+
+		var n uint64
+		metaLen, read := binary.Uvarint(data[storedStartAddr : storedStartAddr+binary.MaxVarintLen64])
+		n += uint64(read)
+		fmt.Printf("Meta Len: %d\n", metaLen)
+		var dataLen uint64
+		dataLen, read = binary.Uvarint(data[storedStartAddr+n : storedStartAddr+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		fmt.Printf("Data Len: %d\n", dataLen)
+		meta := data[storedStartAddr+n : storedStartAddr+n+metaLen]
+		fmt.Printf("Raw meta: % x\n", meta)
+		raw := data[storedStartAddr+n+metaLen : storedStartAddr+n+metaLen+dataLen]
+		fmt.Printf("Raw data (len %d): % x\n", len(raw), raw)
+		uncompressed, err := snappy.Decode(nil, raw)
+		if err != nil {
+			panic(err)
+		}
+		fmt.Printf("Uncompressed data (len %d): % x\n", len(uncompressed), uncompressed)
+
+		return nil
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(storedCmd)
+}
--- a/index/scorch/segment/zap/cmd/zap/main.go
+++ b/index/scorch/segment/zap/cmd/zap/main.go
@ -0,0 +1,23 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"github.com/blevesearch/bleve/index/scorch/segment/zap/cmd/zap/cmd"
+)
+
+func main() {
+	cmd.Execute()
+}
--- a/index/scorch/segment/zap/count.go
+++ b/index/scorch/segment/zap/count.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"hash"
+	"hash/crc32"
+	"io"
+)
+
+// CountHashWriter is a wrapper around a Writer which counts the number of
+// bytes which have been written
+type CountHashWriter struct {
+	w io.Writer
+	h hash.Hash32
+	n int
+}
+
+// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
+func NewCountHashWriter(w io.Writer) *CountHashWriter {
+	return &CountHashWriter{
+		w: w,
+		h: crc32.NewIEEE(),
+	}
+}
+
+// Write writes the provided bytes to the wrapped writer and counts the bytes
+func (c *CountHashWriter) Write(b []byte) (int, error) {
+	n, err := c.w.Write(b)
+	c.n += n
+	_, _ = c.h.Write(b)
+	return n, err
+}
+
+// Count returns the number of bytes written
+func (c *CountHashWriter) Count() int {
+	return c.n
+}
+
+// Sum32 returns the CRC-32 hash of the content written to this writer
+func (c *CountHashWriter) Sum32() uint32 {
+	return c.h.Sum32()
+}
--- a/index/scorch/segment/zap/dict.go
+++ b/index/scorch/segment/zap/dict.go
@ -0,0 +1,165 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbaselabs/vellum"
+	"github.com/couchbaselabs/vellum/regexp"
+)
+
+// Dictionary is the zap representation of the term dictionary
+type Dictionary struct {
+	segment *Segment
+	field   string
+	fieldID uint16
+	fst     *vellum.FST
+}
+
+// PostingsList returns the postings list for the specified term
+func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
+	return d.postingsList(term, except)
+}
+
+func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*PostingsList, error) {
+	rv := &PostingsList{
+		dictionary: d,
+		term:       term,
+		except:     except,
+	}
+
+	if d.fst != nil {
+		postingsOffset, exists, err := d.fst.Get([]byte(term))
+		if err != nil {
+			return nil, fmt.Errorf("vellum err: %v", err)
+		}
+		if exists {
+			rv.postingsOffset = postingsOffset
+			// read the location of the freq/norm details
+			var n uint64
+			var read int
+
+			rv.freqOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
+			n += uint64(read)
+			rv.locOffset, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+			var postingsLen uint64
+			postingsLen, read = binary.Uvarint(d.segment.mm[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+
+			roaringBytes := d.segment.mm[postingsOffset+n : postingsOffset+n+postingsLen]
+
+			bitmap := roaring.NewBitmap()
+			_, err = bitmap.FromBuffer(roaringBytes)
+			if err != nil {
+				return nil, fmt.Errorf("error loading roaring bitmap: %v", err)
+			}
+
+			rv.postings = bitmap
+		}
+	}
+
+	return rv, nil
+}
+
+// Iterator returns an iterator for this dictionary
+func (d *Dictionary) Iterator() segment.DictionaryIterator {
+
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator(nil, nil)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// PrefixIterator returns an iterator which only visits terms having the
+// the specified prefix
+func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	if d.fst != nil {
+		r, err := regexp.New(prefix + ".*")
+		if err == nil {
+			itr, err := d.fst.Search(r, nil, nil)
+			if err == nil {
+				rv.itr = itr
+			}
+		}
+	}
+
+	return rv
+}
+
+// RangeIterator returns an iterator which only visits terms between the
+// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
+func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
+	rv := &DictionaryIterator{
+		d: d,
+	}
+
+	// need to increment the end position to be inclusive
+	endBytes := []byte(end)
+	if endBytes[len(endBytes)-1] < 0xff {
+		endBytes[len(endBytes)-1]++
+	} else {
+		endBytes = append(endBytes, 0xff)
+	}
+
+	if d.fst != nil {
+		itr, err := d.fst.Iterator([]byte(start), endBytes)
+		if err == nil {
+			rv.itr = itr
+		}
+	}
+
+	return rv
+}
+
+// DictionaryIterator is an iterator for term dictionary
+type DictionaryIterator struct {
+	d   *Dictionary
+	itr vellum.Iterator
+	err error
+}
+
+// Next returns the next entry in the dictionary
+func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
+	if i.itr == nil || i.err == vellum.ErrIteratorDone {
+		return nil, nil
+	} else if i.err != nil {
+		return nil, i.err
+	}
+	term, count := i.itr.Current()
+	rv := &index.DictEntry{
+		Term:  string(term),
+		Count: count,
+	}
+	i.err = i.itr.Next()
+	return rv, nil
+}
--- a/index/scorch/segment/zap/dict_test.go
+++ b/index/scorch/segment/zap/dict_test.go
@ -0,0 +1,183 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/document"
+	"github.com/blevesearch/bleve/index"
+	"github.com/blevesearch/bleve/index/scorch/segment/mem"
+)
+
+func buildMemSegmentForDict() *mem.Segment {
+	doc := &document.Document{
+		ID: "a",
+		Fields: []document.Field{
+			document.NewTextFieldCustom("_id", nil, []byte("a"), document.IndexField|document.StoreField, nil),
+			document.NewTextFieldCustom("desc", nil, []byte("apple ball cat dog egg fish bat"), document.IndexField|document.StoreField|document.IncludeTermVectors, nil),
+		},
+	}
+
+	// forge analyzed docs
+	results := []*index.AnalysisResult{
+		&index.AnalysisResult{
+			Document: doc,
+			Analyzed: []analysis.TokenFrequencies{
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      1,
+						Position: 1,
+						Term:     []byte("a"),
+					},
+				}, nil, false),
+				analysis.TokenFrequency(analysis.TokenStream{
+					&analysis.Token{
+						Start:    0,
+						End:      5,
+						Position: 1,
+						Term:     []byte("apple"),
+					},
+					&analysis.Token{
+						Start:    6,
+						End:      10,
+						Position: 2,
+						Term:     []byte("ball"),
+					},
+					&analysis.Token{
+						Start:    11,
+						End:      14,
+						Position: 3,
+						Term:     []byte("cat"),
+					},
+					&analysis.Token{
+						Start:    15,
+						End:      18,
+						Position: 4,
+						Term:     []byte("dog"),
+					},
+					&analysis.Token{
+						Start:    19,
+						End:      22,
+						Position: 5,
+						Term:     []byte("egg"),
+					},
+					&analysis.Token{
+						Start:    20,
+						End:      24,
+						Position: 6,
+						Term:     []byte("fish"),
+					},
+					&analysis.Token{
+						Start:    25,
+						End:      28,
+						Position: 7,
+						Term:     []byte("bat"),
+					},
+				}, nil, true),
+			},
+			Length: []int{
+				1,
+				7,
+			},
+		},
+	}
+
+	segment := mem.NewFromAnalyzedDocs(results)
+
+	return segment
+}
+
+func TestDictionary(t *testing.T) {
+
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentForDict()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// test basic full iterator
+	expected := []string{"apple", "ball", "bat", "cat", "dog", "egg", "fish"}
+	var got []string
+	itr := dict.Iterator()
+	next, err := itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test prefix iterator
+	expected = []string{"ball", "bat"}
+	got = got[:0]
+	itr = dict.PrefixIterator("b")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+
+	// test range iterator
+	expected = []string{"cat", "dog", "egg"}
+	got = got[:0]
+	itr = dict.RangeIterator("cat", "egg")
+	next, err = itr.Next()
+	for next != nil && err == nil {
+		got = append(got, next.Term)
+		next, err = itr.Next()
+	}
+	if err != nil {
+		t.Fatalf("dict itr error: %v", err)
+	}
+
+	if !reflect.DeepEqual(expected, got) {
+		t.Errorf("expected: %v, got: %v", expected, got)
+	}
+}
--- a/index/scorch/segment/zap/posting.go
+++ b/index/scorch/segment/zap/posting.go
@ -0,0 +1,362 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+)
+
+// PostingsList is an in-memory represenation of a postings list
+type PostingsList struct {
+	dictionary     *Dictionary
+	term           string
+	postingsOffset uint64
+	freqOffset     uint64
+	locOffset      uint64
+	postings       *roaring.Bitmap
+	except         *roaring.Bitmap
+	postingKey     []byte
+}
+
+// Iterator returns an iterator for this postings list
+func (p *PostingsList) Iterator() segment.PostingsIterator {
+	rv := &PostingsIterator{
+		postings: p,
+	}
+	if p.postings != nil {
+		// prepare the freq chunk details
+		var n uint64
+		var read int
+		var numFreqChunks uint64
+		numFreqChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+		for i := 0; i < int(numFreqChunks); i++ {
+			rv.freqChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.freqChunkStart = p.freqOffset + n
+
+		// prepare the loc chunk details
+		n = 0
+		var numLocChunks uint64
+		numLocChunks, read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		rv.locChunkLens = make([]uint64, int(numLocChunks))
+		for i := 0; i < int(numLocChunks); i++ {
+			rv.locChunkLens[i], read = binary.Uvarint(p.dictionary.segment.mm[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+			n += uint64(read)
+		}
+		rv.locChunkStart = p.locOffset + n
+
+		rv.all = p.postings.Iterator()
+		if p.except != nil {
+			allExcept := p.postings.Clone()
+			allExcept.AndNot(p.except)
+			rv.actual = allExcept.Iterator()
+		} else {
+			rv.actual = p.postings.Iterator()
+		}
+	}
+
+	return rv
+}
+
+// Count returns the number of items on this postings list
+func (p *PostingsList) Count() uint64 {
+	var rv uint64
+	if p.postings != nil {
+		rv = p.postings.GetCardinality()
+		if p.except != nil {
+			except := p.except.GetCardinality()
+			if except > rv {
+				// avoid underflow
+				except = rv
+			}
+			rv -= except
+		}
+	}
+	return rv
+}
+
+// PostingsIterator provides a way to iterate through the postings list
+type PostingsIterator struct {
+	postings  *PostingsList
+	all       roaring.IntIterable
+	offset    int
+	locoffset int
+	actual    roaring.IntIterable
+
+	currChunk         uint32
+	currChunkFreqNorm []byte
+	currChunkLoc      []byte
+	freqNormDecoder   *govarint.Base128Decoder
+	locDecoder        *govarint.Base128Decoder
+
+	freqChunkLens  []uint64
+	freqChunkStart uint64
+
+	locChunkLens  []uint64
+	locChunkStart uint64
+}
+
+func (i *PostingsIterator) loadChunk(chunk int) error {
+	if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
+		return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
+	}
+	// load correct chunk bytes
+	start := i.freqChunkStart
+	for j := 0; j < chunk; j++ {
+		start += i.freqChunkLens[j]
+	}
+	end := start + i.freqChunkLens[chunk]
+	i.currChunkFreqNorm = i.postings.dictionary.segment.mm[start:end]
+	i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
+
+	start = i.locChunkStart
+	for j := 0; j < chunk; j++ {
+		start += i.locChunkLens[j]
+	}
+	end = start + i.locChunkLens[chunk]
+	i.currChunkLoc = i.postings.dictionary.segment.mm[start:end]
+	i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
+	i.currChunk = uint32(chunk)
+	return nil
+}
+
+func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
+	freq, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+	}
+	normBits, err := i.freqNormDecoder.GetU64()
+	if err != nil {
+		return 0, 0, fmt.Errorf("error reading norm: %v", err)
+	}
+	return freq, normBits, err
+}
+
+// readLocation processes all the integers on the stream representing a single
+// location.  if you care about it, pass in a non-nil location struct, and we
+// will fill it.  if you don't care about it, pass in nil and we safely consume
+// the contents.
+func (i *PostingsIterator) readLocation(l *Location) error {
+	// read off field
+	fieldID, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location field: %v", err)
+	}
+	// read off pos
+	pos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location pos: %v", err)
+	}
+	// read off start
+	start, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location start: %v", err)
+	}
+	// read off end
+	end, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location end: %v", err)
+	}
+	// read off num array pos
+	numArrayPos, err := i.locDecoder.GetU64()
+	if err != nil {
+		return fmt.Errorf("error reading location num array pos: %v", err)
+	}
+
+	// group these together for less branching
+	if l != nil {
+		l.field = i.postings.dictionary.segment.fieldsInv[fieldID]
+		l.pos = pos
+		l.start = start
+		l.end = end
+		if numArrayPos > 0 {
+			l.ap = make([]uint64, int(numArrayPos))
+		}
+	}
+
+	// read off array positions
+	for k := 0; k < int(numArrayPos); k++ {
+		ap, err := i.locDecoder.GetU64()
+		if err != nil {
+			return fmt.Errorf("error reading array position: %v", err)
+		}
+		if l != nil {
+			l.ap[k] = ap
+		}
+	}
+
+	return nil
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) Next() (segment.Posting, error) {
+	if i.actual == nil || !i.actual.HasNext() {
+		return nil, nil
+	}
+	n := i.actual.Next()
+	nChunk := n / i.postings.dictionary.segment.chunkFactor
+	allN := i.all.Next()
+	allNChunk := allN / i.postings.dictionary.segment.chunkFactor
+
+	// n is the next actual hit (excluding some postings)
+	// allN is the next hit in the full postings
+	// if they don't match, adjust offsets to factor in item we're skipping over
+	// incr the all iterator, and check again
+	for allN != n {
+
+		// in different chunks, reset offsets
+		if allNChunk != nChunk {
+			i.locoffset = 0
+			i.offset = 0
+		} else {
+
+			if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+				err := i.loadChunk(int(nChunk))
+				if err != nil {
+					return nil, fmt.Errorf("error loading chunk: %v", err)
+				}
+			}
+
+			// read off freq/offsets even though we don't care about them
+			freq, _, err := i.readFreqNorm()
+			if err != nil {
+				return nil, err
+			}
+			if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+				for j := 0; j < int(freq); j++ {
+					err := i.readLocation(nil)
+					if err != nil {
+						return nil, err
+					}
+				}
+			}
+
+			// in same chunk, need to account for offsets
+			i.offset++
+		}
+
+		allN = i.all.Next()
+	}
+
+	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+		err := i.loadChunk(int(nChunk))
+		if err != nil {
+			return nil, fmt.Errorf("error loading chunk: %v", err)
+		}
+	}
+
+	rv := &Posting{
+		iterator: i,
+		docNum:   uint64(n),
+	}
+
+	var err error
+	var normBits uint64
+	rv.freq, normBits, err = i.readFreqNorm()
+	if err != nil {
+		return nil, err
+	}
+	rv.norm = math.Float32frombits(uint32(normBits))
+	if i.postings.dictionary.segment.fieldsLoc[i.postings.dictionary.fieldID] {
+		// read off 'freq' locations
+		rv.locs = make([]segment.Location, rv.freq)
+		locs := make([]Location, rv.freq)
+		for j := 0; j < int(rv.freq); j++ {
+			err := i.readLocation(&locs[j])
+			if err != nil {
+				return nil, err
+			}
+			rv.locs[j] = &locs[j]
+		}
+	}
+
+	return rv, nil
+}
+
+// Posting is a single entry in a postings list
+type Posting struct {
+	iterator *PostingsIterator
+	docNum   uint64
+
+	freq uint64
+	norm float32
+	locs []segment.Location
+}
+
+// Number returns the document number of this posting in this segment
+func (p *Posting) Number() uint64 {
+	return p.docNum
+}
+
+// Frequency returns the frequence of occurance of this term in this doc/field
+func (p *Posting) Frequency() uint64 {
+	return p.freq
+}
+
+// Norm returns the normalization factor for this posting
+func (p *Posting) Norm() float64 {
+	return float64(p.norm)
+}
+
+// Locations returns the location information for each occurance
+func (p *Posting) Locations() []segment.Location {
+	return p.locs
+}
+
+// Location represents the location of a single occurance
+type Location struct {
+	field string
+	pos   uint64
+	start uint64
+	end   uint64
+	ap    []uint64
+}
+
+// Field returns the name of the field (useful in composite fields to know
+// which original field the value came from)
+func (l *Location) Field() string {
+	return l.field
+}
+
+// Start returns the start byte offset of this occurance
+func (l *Location) Start() uint64 {
+	return l.start
+}
+
+// End returns the end byte offset of this occurance
+func (l *Location) End() uint64 {
+	return l.end
+}
+
+// Pos returns the 1-based phrase position of this occurance
+func (l *Location) Pos() uint64 {
+	return l.pos
+}
+
+// ArrayPositions returns the array position vector associated with this occurance
+func (l *Location) ArrayPositions() []uint64 {
+	return l.ap
+}
--- a/index/scorch/segment/zap/segment.go
+++ b/index/scorch/segment/zap/segment.go
@ -0,0 +1,352 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/RoaringBitmap/roaring"
+	"github.com/Smerity/govarint"
+	"github.com/blevesearch/bleve/index/scorch/segment"
+	"github.com/couchbaselabs/vellum"
+	mmap "github.com/edsrzf/mmap-go"
+	"github.com/golang/snappy"
+)
+
+// Open returns a zap impl of a segment
+func Open(path string) (segment.Segment, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	mm, err := mmap.Map(f, mmap.RDONLY, 0)
+	if err != nil {
+		// mmap failed, try to close the file
+		_ = f.Close()
+		return nil, err
+	}
+
+	rv := &Segment{
+		f:         f,
+		mm:        mm,
+		path:      path,
+		fieldsMap: make(map[string]uint16),
+	}
+
+	err = rv.loadConfig()
+	if err != nil {
+		_ = rv.Close()
+		return nil, err
+	}
+
+	err = rv.loadFields()
+	if err != nil {
+		_ = rv.Close()
+		return nil, err
+	}
+
+	return rv, nil
+}
+
+// Segment implements the segment.Segment inteface over top the zap file format
+type Segment struct {
+	f                 *os.File
+	mm                mmap.MMap
+	path              string
+	crc               uint32
+	version           uint32
+	chunkFactor       uint32
+	numDocs           uint64
+	storedIndexOffset uint64
+	fieldsIndexOffset uint64
+
+	fieldsMap     map[string]uint16
+	fieldsInv     []string
+	fieldsLoc     []bool
+	fieldsOffsets []uint64
+}
+
+func (s *Segment) loadConfig() error {
+	crcOffset := len(s.mm) - 4
+	s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
+	verOffset := crcOffset - 4
+	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
+	if s.version != version {
+		return fmt.Errorf("unsupported version %d", s.version)
+	}
+	chunkOffset := verOffset - 4
+	s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
+	fieldsOffset := chunkOffset - 8
+	s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsOffset : fieldsOffset+8])
+	storedOffset := fieldsOffset - 8
+	s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedOffset : storedOffset+8])
+	docNumOffset := storedOffset - 8
+	s.numDocs = binary.BigEndian.Uint64(s.mm[docNumOffset : docNumOffset+8])
+	return nil
+
+}
+
+func (s *Segment) loadFields() error {
+	// NOTE for now we assume the fields index immediately preceeds the footer
+	// if this changes, need to adjust accordingly (or store epxlicit length)
+	fieldsIndexEnd := uint64(len(s.mm) - footerSize)
+
+	// iterate through fields index
+	var fieldID uint64
+	for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
+		addr := binary.BigEndian.Uint64(s.mm[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
+		var n uint64
+		hasStoredLoc, read := binary.Uvarint(s.mm[addr:fieldsIndexEnd])
+		n += uint64(read)
+		if hasStoredLoc == 1 {
+			s.fieldsLoc = append(s.fieldsLoc, true)
+		} else {
+			s.fieldsLoc = append(s.fieldsLoc, false)
+		}
+
+		var dictLoc uint64
+		dictLoc, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		n += uint64(read)
+		s.fieldsOffsets = append(s.fieldsOffsets, dictLoc)
+
+		var nameLen uint64
+		nameLen, read = binary.Uvarint(s.mm[addr+n : fieldsIndexEnd])
+		n += uint64(read)
+
+		name := string(s.mm[addr+n : addr+n+nameLen])
+		s.fieldsInv = append(s.fieldsInv, name)
+		s.fieldsMap[name] = uint16(fieldID + 1)
+
+		fieldID++
+	}
+	return nil
+}
+
+// Dictionary returns the term dictionary for the specified field
+func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
+	dict, err := s.dictionary(field)
+	if err == nil && dict == nil {
+		return &segment.EmptyDictionary{}, nil
+	}
+	return dict, err
+}
+
+func (s *Segment) dictionary(field string) (*Dictionary, error) {
+	rv := &Dictionary{
+		segment: s,
+		field:   field,
+	}
+
+	rv.fieldID = s.fieldsMap[field]
+	if rv.fieldID > 0 {
+		rv.fieldID = rv.fieldID - 1
+
+		dictStart := s.fieldsOffsets[rv.fieldID]
+
+		// read the length of the vellum data
+		vellumLen, read := binary.Uvarint(s.mm[dictStart : dictStart+binary.MaxVarintLen64])
+		fstBytes := s.mm[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
+		if fstBytes != nil {
+			fst, err := vellum.Load(fstBytes)
+			if err != nil {
+				return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
+			}
+			if err == nil {
+				rv.fst = fst
+			}
+		}
+
+	} else {
+		return nil, nil
+	}
+
+	return rv, nil
+}
+
+// VisitDocument invokes the DocFieldValueVistor for each stored field
+// for the specified doc number
+func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+	// first make sure this is a valid number in this segment
+	if num < s.numDocs {
+		docStoredStartAddr := s.storedIndexOffset + (8 * num)
+		docStoredStart := binary.BigEndian.Uint64(s.mm[docStoredStartAddr : docStoredStartAddr+8])
+		var n uint64
+		metaLen, read := binary.Uvarint(s.mm[docStoredStart : docStoredStart+binary.MaxVarintLen64])
+		n += uint64(read)
+		var dataLen uint64
+		dataLen, read = binary.Uvarint(s.mm[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64])
+		n += uint64(read)
+		meta := s.mm[docStoredStart+n : docStoredStart+n+metaLen]
+		data := s.mm[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen]
+		uncompressed, err := snappy.Decode(nil, data)
+		if err != nil {
+			panic(err)
+		}
+		// now decode meta and process
+		reader := bytes.NewReader(meta)
+		decoder := govarint.NewU64Base128Decoder(reader)
+
+		keepGoing := true
+		for keepGoing {
+			field, err := decoder.GetU64()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				return err
+			}
+			typ, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			offset, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			l, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			numap, err := decoder.GetU64()
+			if err != nil {
+				return err
+			}
+			var arrayPos []uint64
+			if numap > 0 {
+				arrayPos = make([]uint64, numap)
+				for i := 0; i < int(numap); i++ {
+					ap, err := decoder.GetU64()
+					if err != nil {
+						return err
+					}
+					arrayPos[i] = ap
+				}
+			}
+
+			value := uncompressed[offset : offset+l]
+			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
+		}
+	}
+	return nil
+}
+
+// Count returns the number of documents in this segment.
+func (s *Segment) Count() uint64 {
+	return s.numDocs
+}
+
+// DocNumbers returns a bitset corresponding to the doc numbers of all the
+// provided _id strings
+func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
+	rv := roaring.New()
+
+	if len(s.fieldsMap) > 0 {
+		idDict, err := s.dictionary("_id")
+		if err != nil {
+			return nil, err
+		}
+
+		for _, id := range ids {
+			postings, err := idDict.postingsList(id, nil)
+			if err != nil {
+				return nil, err
+			}
+			if postings.postings != nil {
+				rv.Or(postings.postings)
+			}
+		}
+	}
+
+	return rv, nil
+}
+
+// Fields returns the field names used in this segment
+func (s *Segment) Fields() []string {
+	return s.fieldsInv
+}
+
+// Path returns the path of this segment on disk
+func (s *Segment) Path() string {
+	return s.path
+}
+
+// Close releases all resources associated with this segment
+func (s *Segment) Close() (err error) {
+	if s.mm != nil {
+		err = s.mm.Unmap()
+	}
+	// try to close file even if unmap failed
+	if s.f != nil {
+		err2 := s.f.Close()
+		if err == nil {
+			// try to return first error
+			err = err2
+		}
+	}
+	return
+}
+
+// some helpers i started adding for the command-line utility
+
+// Data returns the underlying mmaped data slice
+func (s *Segment) Data() []byte {
+	return s.mm
+}
+
+// CRC returns the CRC value stored in the file footer
+func (s *Segment) CRC() uint32 {
+	return s.crc
+}
+
+// Version returns the file version in the file footer
+func (s *Segment) Version() uint32 {
+	return s.version
+}
+
+// ChunkFactor returns the chunk factor in the file footer
+func (s *Segment) ChunkFactor() uint32 {
+	return s.chunkFactor
+}
+
+// FieldsIndexOffset returns the fields index offset in the file footer
+func (s *Segment) FieldsIndexOffset() uint64 {
+	return s.fieldsIndexOffset
+}
+
+// StoredIndexOffset returns the stored value index offset in the file foooter
+func (s *Segment) StoredIndexOffset() uint64 {
+	return s.storedIndexOffset
+}
+
+// NumDocs returns the number of documents in the file footer
+func (s *Segment) NumDocs() uint64 {
+	return s.numDocs
+}
+
+// DictAddr is a helper function to compute the file offset where the
+// dictionary is stored for the specified field.
+func (s *Segment) DictAddr(field string) (uint64, error) {
+	var fieldID uint16
+	var ok bool
+	if fieldID, ok = s.fieldsMap[field]; !ok {
+		return 0, fmt.Errorf("no such field '%s'", field)
+	}
+
+	return s.fieldsOffsets[fieldID-1], nil
+}
--- a/index/scorch/segment/zap/segment_test.go
+++ b/index/scorch/segment/zap/segment_test.go
@ -0,0 +1,517 @@
+//  Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+	"math"
+	"os"
+	"reflect"
+	"testing"
+)
+
+func TestOpen(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegment()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	expectFields := map[string]struct{}{
+		"_id":  struct{}{},
+		"_all": struct{}{},
+		"name": struct{}{},
+		"desc": struct{}{},
+		"tag":  struct{}{},
+	}
+	fields := segment.Fields()
+	if len(fields) != len(expectFields) {
+		t.Errorf("expected %d fields, only got %d", len(expectFields), len(fields))
+	}
+	for _, field := range fields {
+		if _, ok := expectFields[field]; !ok {
+			t.Errorf("got unexpected field: %s", field)
+		}
+	}
+
+	docCount := segment.Count()
+	if docCount != 1 {
+		t.Errorf("expected count 1, got %d", docCount)
+	}
+
+	// check the _id field
+	dict, err := segment.Dictionary("_id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("a", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the name field
+	dict, err = segment.Dictionary("name")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		if nextPosting.Norm() != 1.0 {
+			t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// check the _all field (composite)
+	dict, err = segment.Dictionary("_all")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("wow", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		expectedNorm := float32(1.0 / math.Sqrt(float64(5)))
+		if nextPosting.Norm() != float64(expectedNorm) {
+			t.Errorf("expected norm %f, got %f", expectedNorm, nextPosting.Norm())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "name" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 3 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			if loc.ArrayPositions() != nil {
+				t.Errorf("expect loc array pos to be nil, got %v", loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+
+	// now try a field with array positions
+	dict, err = segment.Dictionary("tag")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err = dict.PostingsList("dark", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr = postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	nextPosting, err = postingsItr.Next()
+	for nextPosting != nil && err == nil {
+
+		if nextPosting.Frequency() != 1 {
+			t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
+		}
+		if nextPosting.Number() != 0 {
+			t.Errorf("expected doc number 0, got %d", nextPosting.Number())
+		}
+		var numLocs uint64
+		for _, loc := range nextPosting.Locations() {
+			numLocs++
+			if loc.Field() != "tag" {
+				t.Errorf("expected loc field to be 'name', got '%s'", loc.Field())
+			}
+			if loc.Start() != 0 {
+				t.Errorf("expected loc start to be 0, got %d", loc.Start())
+			}
+			if loc.End() != 4 {
+				t.Errorf("expected loc end to be 3, got %d", loc.End())
+			}
+			if loc.Pos() != 1 {
+				t.Errorf("expected loc pos to be 1, got %d", loc.Pos())
+			}
+			expectArrayPos := []uint64{1}
+			if !reflect.DeepEqual(loc.ArrayPositions(), expectArrayPos) {
+				t.Errorf("expect loc array pos to be %v, got %v", expectArrayPos, loc.ArrayPositions())
+			}
+		}
+		if numLocs != nextPosting.Frequency() {
+			t.Errorf("expected %d locations, got %d", nextPosting.Frequency(), numLocs)
+		}
+
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// now try and visit a document
+	var fieldValuesSeen int
+	err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
+		fieldValuesSeen++
+		return true
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fieldValuesSeen != 5 {
+		t.Errorf("expected 5 field values, got %d", fieldValuesSeen)
+	}
+}
+
+func TestOpenMulti(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1024)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsListExcludingCount := postingsListExcluding.Count()
+	if postingsListExcludingCount != 1 {
+		t.Errorf("expected count from postings list to be 1, got %d", postingsListExcludingCount)
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}
+
+func TestOpenMultiWithTwoChunks(t *testing.T) {
+	_ = os.RemoveAll("/tmp/scorch.zap")
+
+	memSegment := buildMemSegmentMulti()
+	err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
+	if err != nil {
+		t.Fatalf("error persisting segment: %v", err)
+	}
+
+	segment, err := Open("/tmp/scorch.zap")
+	if err != nil {
+		t.Fatalf("error opening segment: %v", err)
+	}
+	defer func() {
+		cerr := segment.Close()
+		if cerr != nil {
+			t.Fatalf("error closing segment: %v", err)
+		}
+	}()
+
+	if segment.Count() != 2 {
+		t.Errorf("expected count 2, got %d", segment.Count())
+	}
+
+	// check the desc field
+	dict, err := segment.Dictionary("desc")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if dict == nil {
+		t.Fatal("got nil dict, expected non-nil")
+	}
+
+	postingsList, err := dict.PostingsList("thing", nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItr := postingsList.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count := 0
+	nextPosting, err := postingsItr.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItr.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 2 {
+		t.Errorf("expected count to be 2, got %d", count)
+	}
+
+	// get docnum of a
+	exclude, err := segment.DocNumbers([]string{"a"})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// look for term 'thing' excluding doc 'a'
+	postingsListExcluding, err := dict.PostingsList("thing", exclude)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if postingsList == nil {
+		t.Fatal("got nil postings list, expected non-nil")
+	}
+
+	postingsItrExcluding := postingsListExcluding.Iterator()
+	if postingsItr == nil {
+		t.Fatal("got nil iterator, expected non-nil")
+	}
+
+	count = 0
+	nextPosting, err = postingsItrExcluding.Next()
+	for nextPosting != nil && err == nil {
+		count++
+		nextPosting, err = postingsItrExcluding.Next()
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if count != 1 {
+		t.Errorf("expected count to be 1, got %d", count)
+	}
+}