initial commit

2014-04-17 16:55:53 -04:00 · 2014-04-17 16:55:53 -04:00 · 3d842dfaf2
commit 3d842dfaf2
55 changed files with 4170 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,10 @@
+#*
+*.sublime-*
+*~
+.#*
+.project
+.settings
+.DS_Store
+/examples/bleve_index_json/bleve_index_json
+/examples/bleve_query/bleve_query
+/utils/bleve_dump/bleve_dump
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
+## bleve
+
+A modern text indexing library for go.
--- a/analysis/analyzers/keyword_analyzer/keyword.go
+++ b/analysis/analyzers/keyword_analyzer/keyword.go
@ -0,0 +1,24 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package keyword_analyzer
+
+import (
+	"github.com/couchbaselabs/bleve/analysis"
+	"github.com/couchbaselabs/bleve/analysis/tokenizers/single_token"
+)
+
+func NewKeywordAnalyzer() (*analysis.Analyzer, error) {
+	keyword := analysis.Analyzer{
+		CharFilters: []analysis.CharFilter{},
+		Tokenizer:   single_token.NewSingleTokenTokenizer(),
+		Filters:     []analysis.TokenFilter{},
+	}
+
+	return &keyword, nil
+}
--- a/analysis/analyzers/standard_analyzer/standard.go
+++ b/analysis/analyzers/standard_analyzer/standard.go
@ -0,0 +1,39 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package standard_analyzer
+
+import (
+	"github.com/couchbaselabs/bleve/analysis"
+	"github.com/couchbaselabs/bleve/analysis/token_filters/lower_case_filter"
+	"github.com/couchbaselabs/bleve/analysis/token_filters/stop_words_filter"
+	"github.com/couchbaselabs/bleve/analysis/tokenizers/unicode_word_boundary"
+)
+
+func NewStandardAnalyzer() (*analysis.Analyzer, error) {
+	lower_case_filter, err := lower_case_filter.NewLowerCaseFilter()
+	if err != nil {
+		return nil, err
+	}
+
+	stop_words_filter, err := stop_words_filter.NewStopWordsFilter()
+	if err != nil {
+		return nil, err
+	}
+
+	standard := analysis.Analyzer{
+		CharFilters: []analysis.CharFilter{},
+		Tokenizer:   unicode_word_boundary.NewUnicodeWordBoundaryTokenizer(),
+		TokenFilters: []analysis.TokenFilter{
+			lower_case_filter,
+			stop_words_filter,
+		},
+	}
+
+	return &standard, nil
+}
--- a/analysis/char_filters/html_char_filter/html_char_filter.go
+++ b/analysis/char_filters/html_char_filter/html_char_filter.go
@ -0,0 +1,32 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package html_char_filter
+
+import (
+	"regexp"
+
+	"github.com/couchbaselabs/bleve/analysis/char_filters/regexp_char_filter"
+)
+
+// the origin of this regex is here:
+// http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/
+// slightly modified by me to also match the DOCTYPE
+const htmlTagPattern = `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`
+
+var htmlRegex = regexp.MustCompile(htmlTagPattern)
+
+type HtmlCharFilter struct {
+	*regexp_char_filter.RegexpCharFilter
+}
+
+func NewHtmlCharFilter() *HtmlCharFilter {
+	return &HtmlCharFilter{
+		regexp_char_filter.NewRegexpCharFilter(htmlRegex, []byte{' '}),
+	}
+}
--- a/analysis/char_filters/html_char_filter/html_char_filter_test.go
+++ b/analysis/char_filters/html_char_filter/html_char_filter_test.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package html_char_filter
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestHtmlCharFilter(t *testing.T) {
+	tests := []struct {
+		input  []byte
+		output []byte
+	}{
+		{
+			input: []byte(`<!DOCTYPE html>
+<html>
+<body>
+
+<h1>My First Heading</h1>
+
+<p>My first paragraph.</p>
+
+</body>
+</html>`),
+			output: []byte(`               
+      
+      
+
+    My First Heading     
+
+   My first paragraph.    
+
+       
+       `),
+		},
+	}
+
+	for _, test := range tests {
+		filter := NewHtmlCharFilter()
+		output := filter.Filter(test.input)
+		if !reflect.DeepEqual(output, test.output) {
+			t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input))
+		}
+	}
+}
--- a/analysis/char_filters/regexp_char_filter/regexp_char_filter.go
+++ b/analysis/char_filters/regexp_char_filter/regexp_char_filter.go
@ -0,0 +1,30 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package regexp_char_filter
+
+import (
+	"bytes"
+	"regexp"
+)
+
+type RegexpCharFilter struct {
+	r           *regexp.Regexp
+	replacement []byte
+}
+
+func NewRegexpCharFilter(r *regexp.Regexp, replacement []byte) *RegexpCharFilter {
+	return &RegexpCharFilter{
+		r:           r,
+		replacement: replacement,
+	}
+}
+
+func (s *RegexpCharFilter) Filter(input []byte) []byte {
+	return s.r.ReplaceAllFunc(input, func(in []byte) []byte { return bytes.Repeat(s.replacement, len(in)) })
+}
--- a/analysis/freq.go
+++ b/analysis/freq.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package analysis
+
+type TokenLocation struct {
+	Start    int
+	End      int
+	Position int
+}
+
+type TokenFreq struct {
+	Term      []byte
+	Locations []*TokenLocation
+}
+
+func TokenFrequency(tokens TokenStream) []*TokenFreq {
+	index := make(map[string]*TokenFreq)
+
+	for _, token := range tokens {
+		curr, ok := index[string(token.Term)]
+		if ok {
+			curr.Locations = append(curr.Locations, &TokenLocation{
+				Start:    token.Start,
+				End:      token.End,
+				Position: token.Position,
+			})
+		} else {
+			index[string(token.Term)] = &TokenFreq{
+				Term: token.Term,
+				Locations: []*TokenLocation{
+					&TokenLocation{
+						Start:    token.Start,
+						End:      token.End,
+						Position: token.Position,
+					},
+				},
+			}
+		}
+	}
+
+	rv := make([]*TokenFreq, len(index))
+	i := 0
+	for _, tf := range index {
+		rv[i] = tf
+		i += 1
+	}
+
+	return rv
+}
--- a/analysis/token_filters/length_filter/length_filter.go
+++ b/analysis/token_filters/length_filter/length_filter.go
@ -0,0 +1,44 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package length_filter
+
+import (
+	"unicode/utf8"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type LengthFilter struct {
+	min int
+	max int
+}
+
+func NewLengthFilter(min, max int) (*LengthFilter, error) {
+	return &LengthFilter{
+		min: min,
+		max: max,
+	}, nil
+}
+
+func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	rv := make(analysis.TokenStream, 0)
+
+	for _, token := range input {
+		wordLen := utf8.RuneCount(token.Term)
+		if f.min > 0 && f.min > wordLen {
+			continue
+		}
+		if f.max > 0 && f.max < wordLen {
+			continue
+		}
+		rv = append(rv, token)
+	}
+
+	return rv
+}
--- a/analysis/token_filters/length_filter/length_filter_test.go
+++ b/analysis/token_filters/length_filter/length_filter_test.go
@ -0,0 +1,102 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package length_filter
+
+import (
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestLengthFilter(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("1"),
+		},
+		&analysis.Token{
+			Term: []byte("two"),
+		},
+		&analysis.Token{
+			Term: []byte("three"),
+		},
+	}
+
+	lengthFilter, err := NewLengthFilter(3, 4)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := lengthFilter.Filter(inputTokenStream)
+	if len(ouputTokenStream) != 1 {
+		t.Fatalf("expected 1 output token")
+	}
+	if string(ouputTokenStream[0].Term) != "two" {
+		t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
+	}
+}
+
+func TestLengthFilterNoMax(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("1"),
+		},
+		&analysis.Token{
+			Term: []byte("two"),
+		},
+		&analysis.Token{
+			Term: []byte("three"),
+		},
+	}
+
+	lengthFilter, err := NewLengthFilter(3, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := lengthFilter.Filter(inputTokenStream)
+	if len(ouputTokenStream) != 2 {
+		t.Fatalf("expected 2 output token")
+	}
+	if string(ouputTokenStream[0].Term) != "two" {
+		t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
+	}
+	if string(ouputTokenStream[1].Term) != "three" {
+		t.Errorf("expected term `three`, got `%s`", ouputTokenStream[0].Term)
+	}
+}
+
+func TestLengthFilterNoMin(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("1"),
+		},
+		&analysis.Token{
+			Term: []byte("two"),
+		},
+		&analysis.Token{
+			Term: []byte("three"),
+		},
+	}
+
+	lengthFilter, err := NewLengthFilter(-1, 4)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := lengthFilter.Filter(inputTokenStream)
+	if len(ouputTokenStream) != 2 {
+		t.Fatalf("expected 2 output token")
+	}
+	if string(ouputTokenStream[0].Term) != "1" {
+		t.Errorf("expected term `1`, got `%s`", ouputTokenStream[0].Term)
+	}
+	if string(ouputTokenStream[1].Term) != "two" {
+		t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
+	}
+}
--- a/analysis/token_filters/lower_case_filter/lower_case_filter.go
+++ b/analysis/token_filters/lower_case_filter/lower_case_filter.go
@ -0,0 +1,35 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package lower_case_filter
+
+import (
+	"strings"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type LowerCaseFilter struct {
+}
+
+func NewLowerCaseFilter() (*LowerCaseFilter, error) {
+	return &LowerCaseFilter{}, nil
+}
+
+func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	rv := make(analysis.TokenStream, 0)
+
+	for _, token := range input {
+		word := string(token.Term)
+		wordLowerCase := strings.ToLower(word)
+		token.Term = []byte(wordLowerCase)
+		rv = append(rv, token)
+	}
+
+	return rv
+}
--- a/analysis/token_filters/lower_case_filter/lower_case_filter_test.go
+++ b/analysis/token_filters/lower_case_filter/lower_case_filter_test.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package lower_case_filter
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestLowerCaseFilter(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("ONE"),
+		},
+		&analysis.Token{
+			Term: []byte("two"),
+		},
+		&analysis.Token{
+			Term: []byte("ThReE"),
+		},
+	}
+
+	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("one"),
+		},
+		&analysis.Token{
+			Term: []byte("two"),
+		},
+		&analysis.Token{
+			Term: []byte("three"),
+		},
+	}
+
+	filter, err := NewLowerCaseFilter()
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := filter.Filter(inputTokenStream)
+	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
+		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
+	}
+}
--- a/analysis/token_filters/stemmer_filter/stemmer_filter.go
+++ b/analysis/token_filters/stemmer_filter/stemmer_filter.go
@ -0,0 +1,46 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package stemmer_filter
+
+import (
+	"bitbucket.org/tebeka/snowball"
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type StemmerFilter struct {
+	lang    string
+	stemmer *snowball.Stemmer
+}
+
+func NewStemmerFilter(lang string) (*StemmerFilter, error) {
+	stemmer, err := snowball.New(lang)
+	if err != nil {
+		return nil, err
+	}
+	return &StemmerFilter{
+		lang:    lang,
+		stemmer: stemmer,
+	}, nil
+}
+
+func (s *StemmerFilter) List() []string {
+	return snowball.LangList()
+}
+
+func (s *StemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	rv := make(analysis.TokenStream, 0)
+
+	for _, token := range input {
+		stemmed := s.stemmer.Stem(string(token.Term))
+		token.Term = []byte(stemmed)
+		rv = append(rv, token)
+	}
+
+	return rv
+}
--- a/analysis/token_filters/stemmer_filter/stemmer_filter_test.go
+++ b/analysis/token_filters/stemmer_filter/stemmer_filter_test.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package stemmer_filter
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestStemmerFilter(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walking"),
+		},
+		&analysis.Token{
+			Term: []byte("talked"),
+		},
+		&analysis.Token{
+			Term: []byte("business"),
+		},
+	}
+
+	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walk"),
+		},
+		&analysis.Token{
+			Term: []byte("talk"),
+		},
+		&analysis.Token{
+			Term: []byte("busi"),
+		},
+	}
+
+	filter, err := NewStemmerFilter("english")
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := filter.Filter(inputTokenStream)
+	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
+		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
+	}
+}
--- a/analysis/token_filters/stop_words_filter/stop_words_filter.go
+++ b/analysis/token_filters/stop_words_filter/stop_words_filter.go
@ -0,0 +1,53 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package stop_words_filter
+
+import (
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+var DEFAULT_STOP_WORDS []string = []string{
+	"a", "an", "and", "are", "as", "at", "be", "but", "by",
+	"for", "if", "in", "into", "is", "it",
+	"no", "not", "of", "on", "or", "such",
+	"that", "the", "their", "then", "there", "these",
+	"they", "this", "to", "was", "will", "with",
+}
+
+type StopWordsFilter struct {
+	stopWords map[string]bool
+}
+
+func NewStopWordsFilter() (*StopWordsFilter, error) {
+	return &StopWordsFilter{
+		stopWords: buildStopWordMap(DEFAULT_STOP_WORDS),
+	}, nil
+}
+
+func (f *StopWordsFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	rv := make(analysis.TokenStream, 0)
+
+	for _, token := range input {
+		word := string(token.Term)
+		_, isStopWord := f.stopWords[word]
+		if !isStopWord {
+			rv = append(rv, token)
+		}
+	}
+
+	return rv
+}
+
+func buildStopWordMap(words []string) map[string]bool {
+	rv := make(map[string]bool, len(words))
+	for _, word := range words {
+		rv[word] = true
+	}
+	return rv
+}
--- a/analysis/token_filters/stop_words_filter/stop_words_filter_test.go
+++ b/analysis/token_filters/stop_words_filter/stop_words_filter_test.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package stop_words_filter
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestStopWordsFilter(t *testing.T) {
+
+	inputTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("a"),
+		},
+		&analysis.Token{
+			Term: []byte("walk"),
+		},
+		&analysis.Token{
+			Term: []byte("in"),
+		},
+		&analysis.Token{
+			Term: []byte("the"),
+		},
+		&analysis.Token{
+			Term: []byte("park"),
+		},
+	}
+
+	expectedTokenStream := analysis.TokenStream{
+		&analysis.Token{
+			Term: []byte("walk"),
+		},
+		&analysis.Token{
+			Term: []byte("park"),
+		},
+	}
+
+	filter, err := NewStopWordsFilter()
+	if err != nil {
+		t.Fatal(err)
+	}
+	ouputTokenStream := filter.Filter(inputTokenStream)
+	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
+		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
+	}
+}
--- a/analysis/tokenizers/regexp_tokenizer/regexp_tokenizer.go
+++ b/analysis/tokenizers/regexp_tokenizer/regexp_tokenizer.go
@ -0,0 +1,40 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package regexp_tokenizer
+
+import (
+	"regexp"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type RegexpTokenizer struct {
+	r *regexp.Regexp
+}
+
+func NewRegexpTokenizer(r *regexp.Regexp) *RegexpTokenizer {
+	return &RegexpTokenizer{
+		r: r,
+	}
+}
+
+func (rt *RegexpTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	matches := rt.r.FindAllIndex(input, -1)
+	rv := make(analysis.TokenStream, len(matches))
+	for i, match := range matches {
+		token := analysis.Token{
+			Term:     input[match[0]:match[1]],
+			Start:    match[0],
+			End:      match[1],
+			Position: i + 1,
+		}
+		rv[i] = &token
+	}
+	return rv
+}
--- a/analysis/tokenizers/simple_word_boundary/simple_word_boundary.go
+++ b/analysis/tokenizers/simple_word_boundary/simple_word_boundary.go
@ -0,0 +1,29 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package simple_word_boundary
+
+import (
+	"regexp"
+
+	"github.com/couchbaselabs/bleve/analysis/tokenizers/regexp_tokenizer"
+)
+
+const wordPattern = `\w+`
+
+var wordRegex = regexp.MustCompile(wordPattern)
+
+type SimpleWordBoundaryTokenizer struct {
+	*regexp_tokenizer.RegexpTokenizer
+}
+
+func NewSimpleWordBoundaryTokenizer() *SimpleWordBoundaryTokenizer {
+	return &SimpleWordBoundaryTokenizer{
+		regexp_tokenizer.NewRegexpTokenizer(wordRegex),
+	}
+}
--- a/analysis/tokenizers/simple_word_boundary/simple_word_boundary_test.go
+++ b/analysis/tokenizers/simple_word_boundary/simple_word_boundary_test.go
@ -0,0 +1,51 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package simple_word_boundary
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestBoundary(t *testing.T) {
+
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			[]byte("Hello World."),
+			analysis.TokenStream{
+				{
+					0,
+					5,
+					[]byte("Hello"),
+					1,
+				},
+				{
+					6,
+					11,
+					[]byte("World"),
+					2,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokenizer := NewSimpleWordBoundaryTokenizer()
+		actual := tokenizer.Tokenize(test.input)
+
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+		}
+	}
+}
--- a/analysis/tokenizers/single_token/single_token.go
+++ b/analysis/tokenizers/single_token/single_token.go
@ -0,0 +1,31 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package single_token
+
+import (
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type SingleTokenTokenizer struct {
+}
+
+func NewSingleTokenTokenizer() *SingleTokenTokenizer {
+	return &SingleTokenTokenizer{}
+}
+
+func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	return analysis.TokenStream{
+		&analysis.Token{
+			Term:     input,
+			Position: 1,
+			Start:    0,
+			End:      len(input),
+		},
+	}
+}
--- a/analysis/tokenizers/single_token/single_token_test.go
+++ b/analysis/tokenizers/single_token/single_token_test.go
@ -0,0 +1,67 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package single_token
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestSingleTokenTokenizer(t *testing.T) {
+
+	tests := []struct {
+		input  []byte
+		output analysis.TokenStream
+	}{
+		{
+			[]byte("Hello World"),
+			analysis.TokenStream{
+				{
+					0,
+					11,
+					[]byte("Hello World"),
+					1,
+				},
+			},
+		},
+		{
+			[]byte("こんにちは世界"),
+			analysis.TokenStream{
+				{
+					0,
+					21,
+					[]byte("こんにちは世界"),
+					1,
+				},
+			},
+		},
+		{
+			[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
+			analysis.TokenStream{
+				{
+					0,
+					72,
+					[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
+					1,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokenizer := NewSingleTokenTokenizer()
+		actual := tokenizer.Tokenize(test.input)
+
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+		}
+	}
+}
--- a/analysis/tokenizers/unicode_word_boundary/boundary.go
+++ b/analysis/tokenizers/unicode_word_boundary/boundary.go
@ -0,0 +1,114 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package unicode_word_boundary
+
+// #cgo pkg-config: icu-uc
+// #include <stdio.h>
+// #include <stdlib.h>
+// #include "unicode/utypes.h"
+// #include "unicode/uchar.h"
+// #include "unicode/ubrk.h"
+// #include "unicode/ustring.h"
+import "C"
+
+import "log"
+import "unsafe"
+import "github.com/couchbaselabs/bleve/analysis"
+
+type UnicodeWordBoundaryTokenizer struct {
+	locale *C.char
+}
+
+func NewUnicodeWordBoundaryTokenizer() *UnicodeWordBoundaryTokenizer {
+	return &UnicodeWordBoundaryTokenizer{}
+}
+
+func NewUnicodeWordBoundaryCustomLocaleTokenizer(locale string) *UnicodeWordBoundaryTokenizer {
+	return &UnicodeWordBoundaryTokenizer{
+		locale: C.CString(locale),
+	}
+}
+
+func (t *UnicodeWordBoundaryTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	// var bi *C.UBreakIterator
+	rv := make(analysis.TokenStream, 0)
+	defer C.free(unsafe.Pointer(t.locale))
+
+	if len(input) < 1 {
+		return rv
+	}
+
+	// works
+	var myUnsafePointer = unsafe.Pointer(&(input[0]))
+	var myCCharPointer *C.char = (*C.char)(myUnsafePointer)
+
+	var inlen C.int32_t = C.int32_t(len(input))
+	var buflen C.int32_t = C.int32_t(2*len(input) + 1) // worse case each byte becomes 2
+	var stringToExamine []C.UChar = make([]C.UChar, buflen)
+	//log.Printf("new buff is: %v", stringToExamine)
+	var myUnsafePointerToExamine = unsafe.Pointer(&(stringToExamine[0]))
+	var myUCharPointer *C.UChar = (*C.UChar)(myUnsafePointerToExamine)
+	C.u_uastrncpy(myUCharPointer, myCCharPointer, inlen)
+
+	//log.Printf("after copy new buff is: %v", stringToExamine)
+
+	var err C.UErrorCode = C.U_ZERO_ERROR
+	bi := C.ubrk_open(C.UBRK_WORD, t.locale, myUCharPointer, -1, &err)
+
+	if err > C.U_ZERO_ERROR {
+		log.Printf("error opening boundary iterator")
+		return rv
+	}
+
+	defer C.ubrk_close(bi)
+
+	position := 0
+	var prev C.int32_t
+	p := C.ubrk_first(bi)
+	for p != C.UBRK_DONE {
+
+		q := C.ubrk_getRuleStatus(bi)
+
+		// convert boundaries back to utf8 positions
+		var nilCString *C.char
+		var indexA C.int32_t
+
+		C.u_strToUTF8(nilCString, 0, &indexA, myUCharPointer, prev, &err)
+		if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR {
+			log.Printf("error converting boundary %d", err)
+			return rv
+		} else {
+			err = C.U_ZERO_ERROR
+		}
+
+		var indexB C.int32_t
+		C.u_strToUTF8(nilCString, 0, &indexB, myUCharPointer, p, &err)
+		if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR {
+			log.Printf("error converting boundary %d", err)
+			return rv
+		} else {
+			err = C.U_ZERO_ERROR
+		}
+
+		if q != 0 {
+			position += 1
+			token := analysis.Token{
+				Start:    int(indexA),
+				End:      int(indexB),
+				Term:     input[indexA:indexB],
+				Position: position,
+			}
+			rv = append(rv, &token)
+		}
+		prev = p
+		p = C.ubrk_next(bi)
+	}
+
+	return rv
+}
--- a/analysis/tokenizers/unicode_word_boundary/boundary_test.go
+++ b/analysis/tokenizers/unicode_word_boundary/boundary_test.go
@ -0,0 +1,125 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package unicode_word_boundary
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+func TestBoundary(t *testing.T) {
+
+	tests := []struct {
+		input  []byte
+		locale string
+		output analysis.TokenStream
+	}{
+		{
+			[]byte("Hello World"),
+			"en_US",
+			analysis.TokenStream{
+				{
+					0,
+					5,
+					[]byte("Hello"),
+					1,
+				},
+				{
+					6,
+					11,
+					[]byte("World"),
+					2,
+				},
+			},
+		},
+		{
+			[]byte("こんにちは世界"),
+			"en_US",
+			analysis.TokenStream{
+				{
+					0,
+					15,
+					[]byte("こんにちは"),
+					1,
+				},
+				{
+					15,
+					21,
+					[]byte("世界"),
+					2,
+				},
+			},
+		},
+		{
+			[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
+			"th_TH",
+			analysis.TokenStream{
+				{
+					0,
+					9,
+					[]byte("แยก"),
+					1,
+				},
+				{
+					9,
+					15,
+					[]byte("คำ"),
+					2,
+				},
+				{
+					15,
+					27,
+					[]byte("ภาษา"),
+					3,
+				},
+				{
+					27,
+					36,
+					[]byte("ไทย"),
+					4,
+				},
+				{
+					36,
+					42,
+					[]byte("ก็"),
+					5,
+				},
+				{
+					42,
+					57,
+					[]byte("ทำได้"),
+					6,
+				},
+				{
+					57,
+					63,
+					[]byte("นะ"),
+					7,
+				},
+				{
+					63,
+					72,
+					[]byte("จ้ะ"),
+					8,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer(test.locale)
+		actual := tokenizer.Tokenize(test.input)
+
+		if !reflect.DeepEqual(actual, test.output) {
+			t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+		}
+	}
+}
--- a/analysis/type.go
+++ b/analysis/type.go
@ -0,0 +1,59 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package analysis
+
+import (
+	"fmt"
+)
+
+type CharFilter interface {
+	Filter([]byte) []byte
+}
+
+type Token struct {
+	Start    int
+	End      int
+	Term     []byte
+	Position int
+}
+
+func (t *Token) String() string {
+	return fmt.Sprintf("Start: %d  End: %d  Position: %d  Token: %s", t.Start, t.End, t.Position, string(t.Term))
+}
+
+type TokenStream []*Token
+
+type Tokenizer interface {
+	Tokenize([]byte) TokenStream
+}
+
+type TokenFilter interface {
+	Filter(TokenStream) TokenStream
+}
+
+type Analyzer struct {
+	CharFilters  []CharFilter
+	Tokenizer    Tokenizer
+	TokenFilters []TokenFilter
+}
+
+func (a *Analyzer) Analyze(input []byte) TokenStream {
+	if a.CharFilters != nil {
+		for _, cf := range a.CharFilters {
+			input = cf.Filter(input)
+		}
+	}
+	tokens := a.Tokenizer.Tokenize(input)
+	if a.TokenFilters != nil {
+		for _, tf := range a.TokenFilters {
+			tokens = tf.Filter(tokens)
+		}
+	}
+	return tokens
+}
--- a/document/document.go
+++ b/document/document.go
@ -0,0 +1,34 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package document
+
+import (
+	"encoding/json"
+)
+
+type Document struct {
+	ID     string   `json:"id"`
+	Fields []*Field `json:"fields"`
+}
+
+func NewDocument(id string) *Document {
+	return &Document{
+		ID:     id,
+		Fields: make([]*Field, 0),
+	}
+}
+
+func (d *Document) AddField(f *Field) {
+	d.Fields = append(d.Fields, f)
+}
+
+func (d *Document) String() string {
+	bytes, _ := json.MarshalIndent(d, "", "    ")
+	return string(bytes)
+}
--- a/document/field.go
+++ b/document/field.go
@ -0,0 +1,29 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package document
+
+import (
+	"github.com/couchbaselabs/bleve/analysis"
+)
+
+type Field struct {
+	Name            string
+	IndexingOptions int
+	Analyzer        *analysis.Analyzer
+	Value           []byte
+}
+
+func NewField(name string, value []byte, indexingOptions int, analyzer *analysis.Analyzer) *Field {
+	return &Field{
+		Name:            name,
+		IndexingOptions: indexingOptions,
+		Analyzer:        analyzer,
+		Value:           value,
+	}
+}
--- a/document/field_text.go
+++ b/document/field_text.go
@ -0,0 +1,41 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package document
+
+import (
+	"log"
+
+	"github.com/couchbaselabs/bleve/analysis"
+	"github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
+)
+
+var standardAnalyzer *analysis.Analyzer
+
+func init() {
+	var err error
+	standardAnalyzer, err = standard_analyzer.NewStandardAnalyzer()
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD
+
+func NewTextField(name string, value []byte) *Field {
+	return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS)
+}
+
+func NewTextFieldWithIndexingOptions(name string, value []byte, indexingOptions int) *Field {
+	return &Field{
+		Name:            name,
+		IndexingOptions: indexingOptions,
+		Analyzer:        standardAnalyzer,
+		Value:           value,
+	}
+}
--- a/document/indexing_options.go
+++ b/document/indexing_options.go
@ -0,0 +1,27 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package document
+
+const (
+	INDEX_FIELD = 1 << iota
+	STORE_FIELD
+	INCLUDE_TERM_VECTORS
+)
+
+func IsIndexedField(arg int) bool {
+	return arg&INDEX_FIELD != 0
+}
+
+func IsStoredField(arg int) bool {
+	return arg&STORE_FIELD != 0
+}
+
+func IncludeTermVectors(arg int) bool {
+	return arg&INCLUDE_TERM_VECTORS != 0
+}
--- a/document/indexing_options_test.go
+++ b/document/indexing_options_test.go
@ -0,0 +1,68 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package document
+
+import (
+	"testing"
+)
+
+func TestIndexingOptions(t *testing.T) {
+	tests := []struct {
+		indexingOptions    int
+		isIndexed          bool
+		isStored           bool
+		includeTermVectors bool
+	}{
+		{
+			indexingOptions:    INDEX_FIELD | STORE_FIELD | INCLUDE_TERM_VECTORS,
+			isIndexed:          true,
+			isStored:           true,
+			includeTermVectors: true,
+		},
+		{
+			indexingOptions:    INDEX_FIELD | INCLUDE_TERM_VECTORS,
+			isIndexed:          true,
+			isStored:           false,
+			includeTermVectors: true,
+		},
+		{
+			indexingOptions:    STORE_FIELD | INCLUDE_TERM_VECTORS,
+			isIndexed:          false,
+			isStored:           true,
+			includeTermVectors: true,
+		},
+		{
+			indexingOptions:    INDEX_FIELD,
+			isIndexed:          true,
+			isStored:           false,
+			includeTermVectors: false,
+		},
+		{
+			indexingOptions:    STORE_FIELD,
+			isIndexed:          false,
+			isStored:           true,
+			includeTermVectors: false,
+		},
+	}
+
+	for _, test := range tests {
+		actuallyIndexed := IsIndexedField(test.indexingOptions)
+		if actuallyIndexed != test.isIndexed {
+			t.Errorf("expected indexed to be %v, got %v for %d", test.isIndexed, actuallyIndexed, test.indexingOptions)
+		}
+		actuallyStored := IsStoredField(test.indexingOptions)
+		if actuallyStored != test.isStored {
+			t.Errorf("expected stored to be %v, got %v for %d", test.isStored, actuallyStored, test.indexingOptions)
+		}
+		actuallyIncludeTermVectors := IncludeTermVectors(test.indexingOptions)
+		if actuallyIncludeTermVectors != test.includeTermVectors {
+			t.Errorf("expected includeTermVectors to be %v, got %v for %d", test.includeTermVectors, actuallyIncludeTermVectors, test.indexingOptions)
+		}
+	}
+}
--- a/examples/bleve_index_json/main.go
+++ b/examples/bleve_index_json/main.go
@ -0,0 +1,63 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package main
+
+import (
+	"flag"
+	"io/ioutil"
+	"log"
+
+	"github.com/couchbaselabs/bleve/index/upside_down"
+	"github.com/couchbaselabs/bleve/shredder"
+)
+
+var jsonDir = flag.String("jsonDir", "json", "json directory")
+var indexDir = flag.String("indexDir", "index", "index directory")
+
+func main() {
+
+	flag.Parse()
+
+	// create a automatic JSON document shredder
+	jsonShredder := shredder.NewAutoJsonShredder()
+
+	// create a new index
+	index := upside_down.NewUpsideDownCouch(*indexDir)
+	err := index.Open()
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer index.Close()
+
+	// open the directory
+	dirEntries, err := ioutil.ReadDir(*jsonDir)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// walk the directory entries
+	for _, dirEntry := range dirEntries {
+		// read the bytes
+		jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + dirEntry.Name())
+		if err != nil {
+			log.Fatal(err)
+		}
+		// shred them into a document
+		doc, err := jsonShredder.Shred(dirEntry.Name(), jsonBytes)
+		if err != nil {
+			log.Fatal(err)
+		}
+		//log.Printf("%+v", doc)
+		// update the index
+		err = index.Update(doc)
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+}
--- a/examples/bleve_query/main.go
+++ b/examples/bleve_query/main.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+
+	"github.com/couchbaselabs/bleve/index/upside_down"
+	"github.com/couchbaselabs/bleve/search"
+)
+
+var field = flag.String("field", "description", "field to query")
+var indexDir = flag.String("indexDir", "index", "index directory")
+var limit = flag.Int("limit", 10, "limit to first N results")
+
+func main() {
+
+	flag.Parse()
+
+	if flag.NArg() < 1 {
+		log.Fatal("Specify search term")
+	}
+
+	// open index
+	index := upside_down.NewUpsideDownCouch(*indexDir)
+	err := index.Open()
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer index.Close()
+
+	tq := search.TermQuery{
+		Term:     flag.Arg(0),
+		Field:    *field,
+		BoostVal: 1.0,
+		Explain:  true,
+	}
+	collector := search.NewTopScorerCollector(*limit)
+	searcher, err := tq.Searcher(index)
+	if err != nil {
+		log.Fatalf("searcher error: %v", err)
+		return
+	}
+	err = collector.Collect(searcher)
+	if err != nil {
+		log.Fatalf("search error: %v", err)
+		return
+	}
+	results := collector.Results()
+	if len(results) == 0 {
+		fmt.Printf("No matches\n")
+	} else {
+		last := uint64(*limit)
+		if searcher.Count() < last {
+			last = searcher.Count()
+		}
+		fmt.Printf("%d matches, showing %d through %d\n", searcher.Count(), 1, last)
+		for i, result := range results {
+			fmt.Printf("%2d. %s (%f)\n", i+1, result.ID, result.Score)
+		}
+	}
+}
--- a/index/index.go
+++ b/index/index.go
@ -0,0 +1,48 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package index
+
+import (
+	"github.com/couchbaselabs/bleve/document"
+)
+
+type Index interface {
+	Open() error
+	Close()
+
+	Update(doc *document.Document) error
+	Delete(id string) error
+
+	TermFieldReader(term []byte, field string) (TermFieldReader, error)
+
+	DocCount() uint64
+
+	Dump()
+}
+
+type TermFieldVector struct {
+	Field string
+	Pos   uint64
+	Start uint64
+	End   uint64
+}
+
+type TermFieldDoc struct {
+	ID      string
+	Freq    uint64
+	Norm    float64
+	Vectors []*TermFieldVector
+}
+
+type TermFieldReader interface {
+	Next() (*TermFieldDoc, error)
+	Advance(ID string) (*TermFieldDoc, error)
+	Count() uint64
+	Close()
+}
--- a/index/mock/mock.go
+++ b/index/mock/mock.go
@ -0,0 +1,227 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package mock
+
+import (
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/couchbaselabs/bleve/analysis"
+	"github.com/couchbaselabs/bleve/document"
+	"github.com/couchbaselabs/bleve/index"
+)
+
+type mockFreq struct {
+	freq    uint64
+	norm    float64
+	vectors []*index.TermFieldVector
+}
+
+// key doc id
+type mockDocFreq map[string]*mockFreq
+
+//key field
+type mockFieldDocFreq map[string]mockDocFreq
+
+// 2 dim array
+// inner level are always pairs (field name, term)
+type mockBackIndexEntry [][]string
+
+type MockIndex struct {
+
+	//this level of the map, the key is the term
+	termIndex map[string]mockFieldDocFreq
+
+	// key is docid
+	backIndex map[string]mockBackIndexEntry
+
+	docCount uint64
+	analyzer map[string]*analysis.Analyzer
+}
+
+func NewMockIndexWithDocs(docs []*document.Document) *MockIndex {
+	rv := NewMockIndex()
+	for _, doc := range docs {
+		rv.Update(doc)
+	}
+	return rv
+}
+
+func NewMockIndex() *MockIndex {
+	mi := MockIndex{
+		termIndex: make(map[string]mockFieldDocFreq),
+		backIndex: make(map[string]mockBackIndexEntry),
+		analyzer:  make(map[string]*analysis.Analyzer),
+	}
+
+	return &mi
+}
+
+func (index *MockIndex) Open() error {
+	return nil
+}
+
+func (index *MockIndex) Close() {}
+
+// for this implementation we dont care about performance
+// update is simply delete then add
+func (index *MockIndex) Update(doc *document.Document) error {
+	index.Delete(doc.ID)
+
+	backIndexEntry := make(mockBackIndexEntry, 0)
+	for _, field := range doc.Fields {
+
+		analyzer := field.Analyzer
+		tokens := analyzer.Analyze(field.Value)
+		fieldLength := len(tokens) // number of tokens in this doc field
+		fieldNorm := 1.0 / math.Sqrt(float64(fieldLength))
+		tokenFreqs := analysis.TokenFrequency(tokens)
+		for _, tf := range tokenFreqs {
+			mf := mockFreq{
+				freq: uint64(len(tf.Locations)),
+				norm: fieldNorm,
+			}
+			if document.IncludeTermVectors(field.IndexingOptions) {
+				mf.vectors = index.mockVectorsFromTokenFreq(field.Name, tf)
+			}
+			termString := string(tf.Term)
+			fieldMap, ok := index.termIndex[termString]
+			if !ok {
+				fieldMap = make(map[string]mockDocFreq)
+				index.termIndex[termString] = fieldMap
+			}
+			docMap, ok := fieldMap[field.Name]
+			if !ok {
+				docMap = make(map[string]*mockFreq)
+				fieldMap[field.Name] = docMap
+			}
+			docMap[doc.ID] = &mf
+			backIndexInnerEntry := []string{field.Name, termString}
+			backIndexEntry = append(backIndexEntry, backIndexInnerEntry)
+		}
+	}
+	index.backIndex[doc.ID] = backIndexEntry
+	index.docCount += 1
+	return nil
+}
+
+func (index *MockIndex) Delete(id string) error {
+	backIndexEntry, existed := index.backIndex[id]
+	if existed {
+		for _, backIndexPair := range backIndexEntry {
+			if len(backIndexPair) == 2 {
+				field := backIndexPair[0]
+				term := backIndexPair[1]
+				delete(index.termIndex[term][field], id)
+				if len(index.termIndex[term][field]) == 0 {
+					delete(index.termIndex[term], field)
+					if len(index.termIndex[term]) == 0 {
+						delete(index.termIndex, term)
+					}
+				}
+			}
+		}
+		delete(index.backIndex, id)
+		index.docCount -= 1
+	}
+
+	return nil
+}
+
+func (index *MockIndex) TermFieldReader(term []byte, field string) (index.TermFieldReader, error) {
+
+	fdf, ok := index.termIndex[string(term)]
+	if !ok {
+		fdf = make(mockFieldDocFreq)
+	}
+	docFreqs, ok := fdf[field]
+	if !ok {
+		docFreqs = make(mockDocFreq)
+	}
+	mtfr := mockTermFieldReader{
+		index:        docFreqs,
+		sortedDocIds: make(sort.StringSlice, len(docFreqs)),
+		curr:         -1,
+	}
+	i := 0
+	for k, _ := range docFreqs {
+		mtfr.sortedDocIds[i] = k
+		i += 1
+	}
+	sort.Sort(mtfr.sortedDocIds)
+
+	return &mtfr, nil
+}
+
+func (index *MockIndex) DocCount() uint64 {
+	return index.docCount
+}
+
+type mockTermFieldReader struct {
+	index        mockDocFreq
+	sortedDocIds sort.StringSlice
+	curr         int
+}
+
+func (reader *mockTermFieldReader) Next() (*index.TermFieldDoc, error) {
+	next := reader.curr + 1
+	if next < len(reader.sortedDocIds) {
+		nextTermKey := reader.sortedDocIds[next]
+		nextTerm := reader.index[nextTermKey]
+		reader.curr = next
+		return &index.TermFieldDoc{ID: nextTermKey, Freq: nextTerm.freq, Norm: nextTerm.norm, Vectors: nextTerm.vectors}, nil
+	}
+	return nil, nil
+}
+
+func (reader *mockTermFieldReader) Advance(ID string) (*index.TermFieldDoc, error) {
+	if reader.curr >= len(reader.sortedDocIds) {
+		return nil, nil
+	}
+
+	i := reader.curr
+	for currTermID := reader.sortedDocIds[i]; currTermID < ID && i < len(reader.sortedDocIds); i += 1 {
+		reader.curr = i
+		currTermID = reader.sortedDocIds[reader.curr]
+	}
+
+	if reader.curr < len(reader.sortedDocIds) {
+		nextTermKey := reader.sortedDocIds[reader.curr]
+		nextTerm := reader.index[nextTermKey]
+		return &index.TermFieldDoc{ID: nextTermKey, Freq: nextTerm.freq, Norm: nextTerm.norm, Vectors: nextTerm.vectors}, nil
+	}
+	return nil, nil
+}
+
+func (reader *mockTermFieldReader) Count() uint64 {
+	return uint64(len(reader.sortedDocIds))
+}
+
+func (reader *mockTermFieldReader) Close() {}
+
+func (mi *MockIndex) mockVectorsFromTokenFreq(field string, tf *analysis.TokenFreq) []*index.TermFieldVector {
+	rv := make([]*index.TermFieldVector, len(tf.Locations))
+
+	for i, l := range tf.Locations {
+		mv := index.TermFieldVector{
+			Field: field,
+			Pos:   uint64(l.Position),
+			Start: uint64(l.Start),
+			End:   uint64(l.End),
+		}
+		rv[i] = &mv
+	}
+
+	return rv
+}
+
+func (mi *MockIndex) Dump() {
+	fmt.Println("dump not implemented")
+}
--- a/index/mock/mock_test.go
+++ b/index/mock/mock_test.go
@ -0,0 +1,124 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package mock
+
+import (
+	"reflect"
+	"testing"
+
+	_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
+	"github.com/couchbaselabs/bleve/document"
+	"github.com/couchbaselabs/bleve/index"
+)
+
+func TestCRUD(t *testing.T) {
+	i := NewMockIndex()
+
+	// create doc, assert doc count goes up
+	doc1 := document.NewDocument("1")
+	doc1.AddField(document.NewTextField("name", []byte("marty")))
+	i.Update(doc1)
+	count := i.DocCount()
+	if count != 1 {
+		t.Errorf("expected document count to be 1, was: %d", count)
+	}
+
+	// add another doc, assert doc count goes up again
+	doc2 := document.NewDocument("2")
+	doc2.AddField(document.NewTextField("name", []byte("bob")))
+	i.Update(doc2)
+	count = i.DocCount()
+	if count != 2 {
+		t.Errorf("expected document count to be 2, was: %d", count)
+	}
+
+	// search for doc with term that should exist
+	expectedMatch := &index.TermFieldDoc{
+		ID:   "1",
+		Freq: 1,
+		Norm: 1,
+	}
+	tfr, err := i.TermFieldReader([]byte("marty"), "name")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	match, err := tfr.Next()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if !reflect.DeepEqual(expectedMatch, match) {
+		t.Errorf("got %v, expected %v", match, expectedMatch)
+	}
+	nomatch, err := tfr.Next()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if nomatch != nil {
+		t.Errorf("expected nil after last match")
+	}
+
+	// update doc, assert doc count doesn't go up
+	doc1 = document.NewDocument("1")
+	doc1.AddField(document.NewTextField("name", []byte("salad")))
+	doc1.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
+	i.Update(doc1)
+	count = i.DocCount()
+	if count != 2 {
+		t.Errorf("expected document count to be 2, was: %d", count)
+	}
+
+	// perform the original search again, should NOT find anything this time
+	tfr, err = i.TermFieldReader([]byte("marty"), "name")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	nomatch, err = tfr.Next()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if nomatch != nil {
+		t.Errorf("expected no matches, found one")
+		t.Logf("%v", i)
+	}
+
+	// delete a doc, ensure the count is 1
+	err = i.Delete("2")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	count = i.DocCount()
+	if count != 1 {
+		t.Errorf("expected document count to be 1, was: %d", count)
+	}
+
+	expectedMatch = &index.TermFieldDoc{
+		ID:   "1",
+		Freq: 1,
+		Norm: 0.5773502691896258,
+		Vectors: []*index.TermFieldVector{
+			&index.TermFieldVector{
+				Field: "desc",
+				Pos:   3,
+				Start: 9,
+				End:   13,
+			},
+		},
+	}
+	tfr, err = i.TermFieldReader([]byte("rice"), "desc")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	match, err = tfr.Next()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if !reflect.DeepEqual(expectedMatch, match) {
+		t.Errorf("got %#v, expected %#v", match, expectedMatch)
+	}
+}
--- a/index/upside_down/reader.go
+++ b/index/upside_down/reader.go
@ -0,0 +1,101 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"bytes"
+
+	"github.com/jmhodges/levigo"
+
+	"github.com/couchbaselabs/bleve/index"
+)
+
+type UpsideDownCouchTermFieldReader struct {
+	index    *UpsideDownCouch
+	iterator *levigo.Iterator
+	count    uint64
+	term     []byte
+	field    uint16
+}
+
+func newUpsideDownCouchTermFieldReader(index *UpsideDownCouch, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) {
+	ro := defaultReadOptions()
+	it := index.db.NewIterator(ro)
+
+	tfr := NewTermFrequencyRow(term, field, "", 0, 0)
+	it.Seek(tfr.Key())
+
+	var count uint64 = 0
+	if it.Valid() {
+		if bytes.Equal(it.Key(), tfr.Key()) {
+			tfr = ParseFromKeyValue(it.Key(), it.Value()).(*TermFrequencyRow)
+			count = tfr.freq
+		}
+
+	} else {
+		return nil, it.GetError()
+	}
+
+	return &UpsideDownCouchTermFieldReader{
+		index:    index,
+		iterator: it,
+		count:    count,
+		term:     term,
+		field:    field,
+	}, nil
+}
+
+func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
+	return r.count
+}
+
+func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
+	r.iterator.Next()
+	if r.iterator.Valid() {
+		tfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
+		if !bytes.HasPrefix(r.iterator.Key(), tfr.Key()) {
+			// end of the line
+			return nil, nil
+		}
+		tfr = ParseFromKeyValue(r.iterator.Key(), r.iterator.Value()).(*TermFrequencyRow)
+		return &index.TermFieldDoc{
+			ID:      string(tfr.doc),
+			Freq:    tfr.freq,
+			Norm:    float64(tfr.norm),
+			Vectors: r.index.termFieldVectorsFromTermVectors(tfr.vectors),
+		}, nil
+	} else {
+		return nil, r.iterator.GetError()
+	}
+}
+
+func (r *UpsideDownCouchTermFieldReader) Advance(docId string) (*index.TermFieldDoc, error) {
+	tfr := NewTermFrequencyRow(r.term, r.field, docId, 0, 0)
+	r.iterator.Seek(tfr.Key())
+	if r.iterator.Valid() {
+		tfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
+		if !bytes.HasPrefix(r.iterator.Key(), tfr.Key()) {
+			// end of the line
+			return nil, nil
+		}
+		tfr = ParseFromKeyValue(r.iterator.Key(), r.iterator.Value()).(*TermFrequencyRow)
+		return &index.TermFieldDoc{
+			ID:      string(tfr.doc),
+			Freq:    tfr.freq,
+			Norm:    float64(tfr.norm),
+			Vectors: r.index.termFieldVectorsFromTermVectors(tfr.vectors),
+		}, nil
+	} else {
+		return nil, r.iterator.GetError()
+	}
+}
+
+func (r *UpsideDownCouchTermFieldReader) Close() {
+	r.iterator.Close()
+}
--- a/index/upside_down/reader_test.go
+++ b/index/upside_down/reader_test.go
@ -0,0 +1,111 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"os"
+	"reflect"
+	"testing"
+
+	_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
+	"github.com/couchbaselabs/bleve/document"
+	"github.com/couchbaselabs/bleve/index"
+)
+
+func TestIndexReader(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer idx.Close()
+
+	var expectedCount uint64 = 0
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount += 1
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []byte("test test test")))
+	doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount += 1
+
+	// first look for a term that doesnt exist
+	reader, err := idx.TermFieldReader([]byte("nope"), "name")
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+	count := reader.Count()
+	if count != 0 {
+		t.Errorf("Expected doc count to be: %d got: %d", 0, count)
+	}
+	reader.Close()
+
+	reader, err = idx.TermFieldReader([]byte("test"), "name")
+	if err != nil {
+		t.Errorf("Error accessing term field reader: %v", err)
+	}
+	defer reader.Close()
+
+	expectedCount = 2
+	count = reader.Count()
+	if count != expectedCount {
+		t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
+	}
+
+	var match *index.TermFieldDoc
+	var actualCount uint64
+	match, err = reader.Next()
+	for err == nil && match != nil {
+		match, err = reader.Next()
+		if err != nil {
+			t.Errorf("unexpected error reading next")
+		}
+		actualCount += 1
+	}
+	if actualCount != count {
+		t.Errorf("count was 2, but only saw %d", actualCount)
+	}
+
+	expectedMatch := &index.TermFieldDoc{
+		ID:   "2",
+		Freq: 1,
+		Norm: 0.5773502588272095,
+		Vectors: []*index.TermFieldVector{
+			&index.TermFieldVector{
+				Field: "desc",
+				Pos:   3,
+				Start: 9,
+				End:   13,
+			},
+		},
+	}
+	tfr, err := idx.TermFieldReader([]byte("rice"), "desc")
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	match, err = tfr.Next()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+	if !reflect.DeepEqual(expectedMatch, match) {
+		t.Errorf("got %#v, expected %#v", match, expectedMatch)
+	}
+}
--- a/index/upside_down/row.go
+++ b/index/upside_down/row.go
@ -0,0 +1,412 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+)
+
+const BYTE_SEPARATOR byte = 0xff
+
+type UpsideDownCouchRowStream chan UpsideDownCouchRow
+
+type UpsideDownCouchRow interface {
+	Key() []byte
+	Value() []byte
+}
+
+func ParseFromKeyValue(key, value []byte) UpsideDownCouchRow {
+	switch key[0] {
+	case 'v':
+		return NewVersionRowKV(key, value)
+	case 'f':
+		return NewFieldRowKV(key, value)
+	case 't':
+		return NewTermFrequencyRowKV(key, value)
+	case 'b':
+		return NewBackIndexRowKV(key, value)
+	}
+	return nil
+}
+
+// VERSION
+
+type VersionRow struct {
+	version uint8
+}
+
+func (v *VersionRow) Key() []byte {
+	return []byte{'v'}
+}
+
+func (v *VersionRow) Value() []byte {
+	buf := new(bytes.Buffer)
+	err := binary.Write(buf, binary.LittleEndian, v.version)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	return buf.Bytes()
+}
+
+func (v *VersionRow) String() string {
+	return fmt.Sprintf("Version: %d", v.version)
+}
+
+func NewVersionRow(version uint8) *VersionRow {
+	return &VersionRow{
+		version: version,
+	}
+}
+
+func NewVersionRowKV(key, value []byte) *VersionRow {
+	rv := VersionRow{}
+	buf := bytes.NewBuffer(value)
+	err := binary.Read(buf, binary.LittleEndian, &rv.version)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+	return &rv
+}
+
+// FIELD definition
+
+type FieldRow struct {
+	index uint16
+	name  string
+}
+
+func (f *FieldRow) Key() []byte {
+	buf := new(bytes.Buffer)
+	err := buf.WriteByte('f')
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+	}
+	err = binary.Write(buf, binary.LittleEndian, f.index)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	return buf.Bytes()
+}
+
+func (f *FieldRow) Value() []byte {
+	buf := new(bytes.Buffer)
+	_, err := buf.WriteString(f.name)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteString failed: %v", err))
+	}
+	err = buf.WriteByte(BYTE_SEPARATOR)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+	}
+	return buf.Bytes()
+}
+
+func (f *FieldRow) String() string {
+	return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
+}
+
+func NewFieldRow(index uint16, name string) *FieldRow {
+	return &FieldRow{
+		index: index,
+		name:  name,
+	}
+}
+
+func NewFieldRowKV(key, value []byte) *FieldRow {
+	rv := FieldRow{}
+
+	buf := bytes.NewBuffer(key)
+	buf.ReadByte() // type
+	err := binary.Read(buf, binary.LittleEndian, &rv.index)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+
+	buf = bytes.NewBuffer(value)
+	rv.name, err = buf.ReadString(BYTE_SEPARATOR)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
+	}
+	rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
+
+	return &rv
+}
+
+// TERM FIELD FREQUENCY
+
+type TermVector struct {
+	field uint16
+	pos   uint64
+	start uint64
+	end   uint64
+}
+
+func (tv *TermVector) String() string {
+	return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d", tv.field, tv.pos, tv.start, tv.end)
+}
+
+type TermFrequencyRow struct {
+	term    []byte
+	field   uint16
+	doc     []byte
+	freq    uint64
+	norm    float32
+	vectors []*TermVector
+}
+
+func (tfr *TermFrequencyRow) Key() []byte {
+	buf := new(bytes.Buffer)
+	err := buf.WriteByte('t')
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+	}
+	_, err = buf.Write(tfr.term)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.Write failed: %v", err))
+	}
+	err = buf.WriteByte(BYTE_SEPARATOR)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+	}
+	err = binary.Write(buf, binary.LittleEndian, tfr.field)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	_, err = buf.Write(tfr.doc)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.Write failed: %v", err))
+	}
+	return buf.Bytes()
+}
+
+func (tfr *TermFrequencyRow) Value() []byte {
+	buf := new(bytes.Buffer)
+	err := binary.Write(buf, binary.LittleEndian, tfr.freq)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	err = binary.Write(buf, binary.LittleEndian, tfr.norm)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	for _, vector := range tfr.vectors {
+		err = binary.Write(buf, binary.LittleEndian, vector.field)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Write failed: %v", err))
+		}
+		err = binary.Write(buf, binary.LittleEndian, vector.pos)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Write failed: %v", err))
+		}
+		err = binary.Write(buf, binary.LittleEndian, vector.start)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Write failed: %v", err))
+		}
+		err = binary.Write(buf, binary.LittleEndian, vector.end)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Write failed: %v", err))
+		}
+	}
+	return buf.Bytes()
+}
+
+func (tfr *TermFrequencyRow) String() string {
+	return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
+}
+
+func NewTermFrequencyRow(term []byte, field uint16, doc string, freq uint64, norm float32) *TermFrequencyRow {
+	return &TermFrequencyRow{
+		term:  term,
+		field: field,
+		doc:   []byte(doc),
+		freq:  freq,
+		norm:  norm,
+	}
+}
+
+func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
+	return &TermFrequencyRow{
+		term:    term,
+		field:   field,
+		doc:     []byte(doc),
+		freq:    freq,
+		norm:    norm,
+		vectors: vectors,
+	}
+}
+
+func NewTermFrequencyRowKV(key, value []byte) *TermFrequencyRow {
+	rv := TermFrequencyRow{
+		doc: []byte(""),
+	}
+	buf := bytes.NewBuffer(key)
+	buf.ReadByte() // type
+
+	var err error
+	rv.term, err = buf.ReadBytes(BYTE_SEPARATOR)
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
+	}
+	rv.term = rv.term[:len(rv.term)-1] // trim off separator byte
+
+	err = binary.Read(buf, binary.LittleEndian, &rv.field)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+
+	doc, err := buf.ReadBytes(BYTE_SEPARATOR)
+	if err != io.EOF {
+		panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
+	}
+	if doc != nil {
+		rv.doc = doc
+	}
+
+	buf = bytes.NewBuffer((value))
+	err = binary.Read(buf, binary.LittleEndian, &rv.freq)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+	err = binary.Read(buf, binary.LittleEndian, &rv.norm)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+
+	var field uint16
+	err = binary.Read(buf, binary.LittleEndian, &field)
+	if err != nil && err != io.EOF {
+		panic(fmt.Sprintf("binary.Read failed: %v", err))
+	}
+	for err != io.EOF {
+		tv := TermVector{}
+		tv.field = field
+		// at this point we expect at least one term vector
+		if rv.vectors == nil {
+			rv.vectors = make([]*TermVector, 0)
+		}
+
+		err = binary.Read(buf, binary.LittleEndian, &tv.pos)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Read failed: %v", err))
+		}
+		err = binary.Read(buf, binary.LittleEndian, &tv.start)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Read failed: %v", err))
+		}
+		err = binary.Read(buf, binary.LittleEndian, &tv.end)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Read failed: %v", err))
+		}
+		rv.vectors = append(rv.vectors, &tv)
+		// try to read next record (may not exist)
+		err = binary.Read(buf, binary.LittleEndian, &field)
+	}
+
+	return &rv
+
+}
+
+type BackIndexEntry struct {
+	term  []byte
+	field uint16
+}
+
+func (bie *BackIndexEntry) String() string {
+	return fmt.Sprintf("Term: `%s` Field: %d", string(bie.term), bie.field)
+}
+
+type BackIndexRow struct {
+	doc     []byte
+	entries []*BackIndexEntry
+}
+
+func (br *BackIndexRow) Key() []byte {
+	buf := new(bytes.Buffer)
+	err := buf.WriteByte('b')
+	if err != nil {
+		panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+	}
+	err = binary.Write(buf, binary.LittleEndian, br.doc)
+	if err != nil {
+		panic(fmt.Sprintf("binary.Write failed: %v", err))
+	}
+	return buf.Bytes()
+}
+
+func (br *BackIndexRow) Value() []byte {
+	buf := new(bytes.Buffer)
+	for _, e := range br.entries {
+		_, err := buf.Write(e.term)
+		if err != nil {
+			panic(fmt.Sprintf("Buffer.Write failed: %v", err))
+		}
+		err = buf.WriteByte(BYTE_SEPARATOR)
+		if err != nil {
+			panic(fmt.Sprintf("Buffer.WriteByte failed: %v", err))
+		}
+		err = binary.Write(buf, binary.LittleEndian, e.field)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Write failed: %v", err))
+		}
+	}
+	return buf.Bytes()
+}
+
+func (br *BackIndexRow) String() string {
+	return fmt.Sprintf("Backindex DocId: `%s` Entries: %v", string(br.doc), br.entries)
+}
+
+func NewBackIndexRow(doc string, entries []*BackIndexEntry) *BackIndexRow {
+	return &BackIndexRow{
+		doc:     []byte(doc),
+		entries: entries,
+	}
+}
+
+func NewBackIndexRowKV(key, value []byte) *BackIndexRow {
+	rv := BackIndexRow{}
+
+	buf := bytes.NewBuffer(key)
+	buf.ReadByte() // type
+
+	var err error
+	rv.doc, err = buf.ReadBytes(BYTE_SEPARATOR)
+	if err != io.EOF {
+		panic(fmt.Sprintf("expected binary.ReadString to end in EOF: %v", err))
+	}
+
+	buf = bytes.NewBuffer(value)
+	rv.entries = make([]*BackIndexEntry, 0)
+
+	var term []byte
+	term, err = buf.ReadBytes(BYTE_SEPARATOR)
+	if err != nil && err != io.EOF {
+		panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
+	}
+	for err != io.EOF {
+		ent := BackIndexEntry{}
+		ent.term = term[:len(term)-1] // trim off separator byte
+
+		err = binary.Read(buf, binary.LittleEndian, &ent.field)
+		if err != nil {
+			panic(fmt.Sprintf("binary.Read failed: %v", err))
+		}
+		rv.entries = append(rv.entries, &ent)
+
+		term, err = buf.ReadBytes(BYTE_SEPARATOR)
+		if err != nil && err != io.EOF {
+			panic(fmt.Sprintf("Buffer.ReadString failed: %v", err))
+		}
+	}
+
+	return &rv
+}
--- a/index/upside_down/row_test.go
+++ b/index/upside_down/row_test.go
@ -0,0 +1,89 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestRows(t *testing.T) {
+	tests := []struct {
+		input  UpsideDownCouchRow
+		outKey []byte
+		outVal []byte
+	}{
+		{
+			NewVersionRow(1),
+			[]byte{'v'},
+			[]byte{0x1},
+		},
+		{
+			NewFieldRow(0, "name"),
+			[]byte{'f', 0, 0},
+			[]byte{'n', 'a', 'm', 'e', BYTE_SEPARATOR},
+		},
+		{
+			NewFieldRow(1, "desc"),
+			[]byte{'f', 1, 0},
+			[]byte{'d', 'e', 's', 'c', BYTE_SEPARATOR},
+		},
+		{
+			NewFieldRow(513, "style"),
+			[]byte{'f', 1, 2},
+			[]byte{'s', 't', 'y', 'l', 'e', BYTE_SEPARATOR},
+		},
+		{
+			NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14),
+			[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
+			[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64},
+		},
+		{
+			NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14),
+			[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
+			[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64},
+		},
+		{
+			NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
+			[]byte{'t', 'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
+			[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0},
+		},
+		{
+			NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}}),
+			[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
+			[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
+		},
+		{
+			NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}, &BackIndexEntry{[]byte{'b', 'e', 'a', 't'}, 1}}),
+			[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
+			[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0},
+		},
+	}
+
+	// test going from struct to k/v bytes
+	for _, test := range tests {
+		rk := test.input.Key()
+		if !reflect.DeepEqual(rk, test.outKey) {
+			t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
+		}
+		rv := test.input.Value()
+		if !reflect.DeepEqual(rv, test.outVal) {
+			t.Errorf("Expected value to be %v got: %v", test.outVal, rv)
+		}
+	}
+
+	// now test going back from k/v bytes to struct
+	for _, test := range tests {
+		row := ParseFromKeyValue(test.outKey, test.outVal)
+		if !reflect.DeepEqual(row, test.input) {
+			t.Fatalf("Expected: %#v got: %#v", test.input, row)
+		}
+	}
+
+}
--- a/index/upside_down/upside_down.go
+++ b/index/upside_down/upside_down.go
@ -0,0 +1,466 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"math"
+
+	"github.com/couchbaselabs/bleve/analysis"
+	"github.com/jmhodges/levigo"
+
+	"github.com/couchbaselabs/bleve/document"
+	"github.com/couchbaselabs/bleve/index"
+)
+
+var VERSION_KEY []byte = []byte{'v'}
+
+const VERSION uint8 = 1
+
+type UpsideDownCouch struct {
+	version        uint8
+	path           string
+	opts           *levigo.Options
+	db             *levigo.DB
+	fieldIndexes   map[string]uint16
+	lastFieldIndex int
+	analyzer       map[string]*analysis.Analyzer
+	docCount       uint64
+}
+
+func NewUpsideDownCouch(path string) *UpsideDownCouch {
+	opts := levigo.NewOptions()
+	opts.SetCreateIfMissing(true)
+
+	return &UpsideDownCouch{
+		version:      VERSION,
+		path:         path,
+		opts:         opts,
+		analyzer:     make(map[string]*analysis.Analyzer),
+		fieldIndexes: make(map[string]uint16),
+	}
+}
+
+func (udc *UpsideDownCouch) init() (err error) {
+	// prepare a list of rows
+	rows := make([]UpsideDownCouchRow, 0)
+
+	// version marker
+	rows = append(rows, NewVersionRow(udc.version))
+
+	return udc.batchRows(nil, rows, nil)
+}
+
+func (udc *UpsideDownCouch) loadSchema() (err error) {
+	// schema := make([]*index.Field, 0)
+
+	ro := defaultReadOptions()
+	it := udc.db.NewIterator(ro)
+	defer it.Close()
+
+	keyPrefix := []byte{'f'}
+	it.Seek(keyPrefix)
+	for it = it; it.Valid(); it.Next() {
+		// stop when
+		if !bytes.HasPrefix(it.Key(), keyPrefix) {
+			break
+		}
+		fieldRow := NewFieldRowKV(it.Key(), it.Value())
+		udc.fieldIndexes[fieldRow.name] = fieldRow.index
+		if int(fieldRow.index) > udc.lastFieldIndex {
+			udc.lastFieldIndex = int(fieldRow.index)
+		}
+	}
+	err = it.GetError()
+	if err != nil {
+		return
+	}
+
+	return
+}
+
+func (udc *UpsideDownCouch) batchRows(addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {
+	ro := defaultReadOptions()
+
+	// prepare batch
+	wb := levigo.NewWriteBatch()
+
+	// add
+	for _, row := range addRows {
+		tfr, ok := row.(*TermFrequencyRow)
+		if ok {
+			// need to increment counter
+			tr := NewTermFrequencyRow(tfr.term, tfr.field, "", 0, 0)
+			val, err := udc.db.Get(ro, tr.Key())
+			if err != nil {
+				return err
+			}
+			if val != nil {
+				tr = ParseFromKeyValue(tr.Key(), val).(*TermFrequencyRow)
+				tr.freq += 1 // incr
+			} else {
+				tr = NewTermFrequencyRow(tfr.term, tfr.field, "", 1, 0)
+			}
+
+			// now add this to the batch
+			wb.Put(tr.Key(), tr.Value())
+		}
+		wb.Put(row.Key(), row.Value())
+	}
+
+	// update
+	for _, row := range updateRows {
+		wb.Put(row.Key(), row.Value())
+	}
+
+	// delete
+	for _, row := range deleteRows {
+		tfr, ok := row.(*TermFrequencyRow)
+		if ok {
+			// need to decrement counter
+			tr := NewTermFrequencyRow(tfr.term, tfr.field, "", 0, 0)
+			val, err := udc.db.Get(ro, tr.Key())
+			if err != nil {
+				return err
+			}
+			if val != nil {
+				tr = ParseFromKeyValue(tr.Key(), val).(*TermFrequencyRow)
+				tr.freq -= 1 // incr
+			} else {
+				log.Panic(fmt.Sprintf("unexpected missing row, deleting term, expected count row to exit: %v", tr.Key()))
+			}
+
+			if tr.freq == 0 {
+				wb.Delete(tr.Key())
+			} else {
+				// now add this to the batch
+				wb.Put(tr.Key(), tr.Value())
+			}
+
+		}
+		wb.Delete(row.Key())
+	}
+
+	// write out the batch
+	wo := defaultWriteOptions()
+	err = udc.db.Write(wo, wb)
+	return
+}
+
+func (udc *UpsideDownCouch) DocCount() uint64 {
+	return udc.docCount
+}
+
+func (udc *UpsideDownCouch) Open() (err error) {
+	udc.db, err = levigo.Open(udc.path, udc.opts)
+	if err != nil {
+		return
+	}
+
+	ro := defaultReadOptions()
+	var value []byte
+	value, err = udc.db.Get(ro, VERSION_KEY)
+	if err != nil {
+		return
+	}
+
+	// init new index OR load schema
+	if value == nil {
+		err = udc.init()
+		if err != nil {
+			return
+		}
+	} else {
+		err = udc.loadSchema()
+		if err != nil {
+			return
+		}
+	}
+	// set doc count
+	udc.docCount = udc.countDocs()
+	return
+}
+
+func (udc *UpsideDownCouch) countDocs() uint64 {
+	ro := defaultReadOptions()
+	ro.SetFillCache(false) // dont fill the cache with this
+	it := udc.db.NewIterator(ro)
+	defer it.Close()
+
+	// begining of back index
+	it.Seek([]byte{'b'})
+
+	var rv uint64 = 0
+	for it = it; it.Valid(); it.Next() {
+		if !bytes.HasPrefix(it.Key(), []byte{'b'}) {
+			break
+		}
+		rv += 1
+	}
+	return rv
+}
+
+func (udc *UpsideDownCouch) rowCount() uint64 {
+	ro := defaultReadOptions()
+	ro.SetFillCache(false) // dont fill the cache with this
+	it := udc.db.NewIterator(ro)
+	defer it.Close()
+
+	it.Seek([]byte{0})
+
+	var rv uint64 = 0
+	for it = it; it.Valid(); it.Next() {
+		rv += 1
+	}
+	return rv
+}
+
+func (udc *UpsideDownCouch) Close() {
+	udc.db.Close()
+}
+
+func (udc *UpsideDownCouch) Update(doc *document.Document) error {
+	// first we lookup the backindex row for the doc id if it exists
+	// lookup the back index row
+	backIndexRow, err := udc.backIndexRowForDoc(doc.ID)
+	if err != nil {
+		return err
+	}
+
+	var isAdd = true
+	// a map for each field, map key is term (string) bool true for existence
+	// FIMXE hard-coded to max of 256 fields
+	existingTermFieldMaps := make([]map[string]bool, 256)
+	if backIndexRow != nil {
+		isAdd = false
+		for _, entry := range backIndexRow.entries {
+			existingTermFieldMap := existingTermFieldMaps[entry.field]
+			if existingTermFieldMap == nil {
+				existingTermFieldMap = make(map[string]bool, 0)
+				existingTermFieldMaps[entry.field] = existingTermFieldMap
+			}
+			existingTermFieldMap[string(entry.term)] = true
+		}
+	}
+
+	// prepare a list of rows
+	updateRows := make([]UpsideDownCouchRow, 0)
+	addRows := make([]UpsideDownCouchRow, 0)
+
+	// track our back index entries
+	backIndexEntries := make([]*BackIndexEntry, 0)
+
+	for _, field := range doc.Fields {
+		fieldIndex, fieldExists := udc.fieldIndexes[field.Name]
+		if !fieldExists {
+			// assign next field id
+			fieldIndex = uint16(udc.lastFieldIndex + 1)
+			udc.fieldIndexes[field.Name] = fieldIndex
+			// ensure this batch adds a row for this field
+			row := NewFieldRow(uint16(fieldIndex), field.Name)
+			updateRows = append(updateRows, row)
+			udc.lastFieldIndex = int(fieldIndex)
+		}
+
+		existingTermFieldMap := existingTermFieldMaps[fieldIndex]
+
+		analyzer := field.Analyzer
+		tokens := analyzer.Analyze(field.Value)
+		fieldLength := len(tokens) // number of tokens in this doc field
+		fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
+		tokenFreqs := analysis.TokenFrequency(tokens)
+		for _, tf := range tokenFreqs {
+			var termFreqRow *TermFrequencyRow
+			if document.IncludeTermVectors(field.IndexingOptions) {
+				tv := termVectorsFromTokenFreq(uint16(fieldIndex), tf)
+				termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
+			} else {
+				termFreqRow = NewTermFrequencyRow(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
+			}
+
+			// record the back index entry
+			backIndexEntry := BackIndexEntry{tf.Term, uint16(fieldIndex)}
+			backIndexEntries = append(backIndexEntries, &backIndexEntry)
+
+			// remove the entry from the map of existing term fields if it exists
+			if existingTermFieldMap != nil {
+				termString := string(tf.Term)
+				_, ok := existingTermFieldMap[termString]
+				if ok {
+					// this is an update
+					updateRows = append(updateRows, termFreqRow)
+					// this term existed last time, delete it from that map
+					delete(existingTermFieldMap, termString)
+				} else {
+					// this is an add
+					addRows = append(addRows, termFreqRow)
+				}
+			} else {
+				// this is an add
+				addRows = append(addRows, termFreqRow)
+			}
+		}
+
+	}
+
+	// build the back index row
+	backIndexRow = NewBackIndexRow(doc.ID, backIndexEntries)
+	updateRows = append(updateRows, backIndexRow)
+
+	// any of the existing rows that weren't updated need to be deleted
+	deleteRows := make([]UpsideDownCouchRow, 0)
+	for fieldIndex, existingTermFieldMap := range existingTermFieldMaps {
+		if existingTermFieldMap != nil {
+			for termString, _ := range existingTermFieldMap {
+				termFreqRow := NewTermFrequencyRow([]byte(termString), uint16(fieldIndex), doc.ID, 0, 0)
+				deleteRows = append(deleteRows, termFreqRow)
+			}
+		}
+	}
+
+	err = udc.batchRows(addRows, updateRows, deleteRows)
+	if err == nil && isAdd {
+		udc.docCount += 1
+	}
+	return err
+}
+
+func (udc *UpsideDownCouch) Delete(id string) error {
+	// lookup the back index row
+	backIndexRow, err := udc.backIndexRowForDoc(id)
+	if err != nil {
+		return err
+	}
+	if backIndexRow == nil {
+		return nil
+	}
+
+	// prepare a list of rows to delete
+	rows := make([]UpsideDownCouchRow, 0)
+	for _, backIndexEntry := range backIndexRow.entries {
+		tfr := NewTermFrequencyRow(backIndexEntry.term, backIndexEntry.field, id, 0, 0)
+		rows = append(rows, tfr)
+	}
+
+	// also delete the back entry itself
+	rows = append(rows, backIndexRow)
+
+	err = udc.batchRows(nil, nil, rows)
+	if err == nil {
+		udc.docCount -= 1
+	}
+	return err
+}
+
+func (udc *UpsideDownCouch) backIndexRowForDoc(docId string) (*BackIndexRow, error) {
+	ro := defaultReadOptions()
+
+	// use a temporary row structure to build key
+	tempRow := &BackIndexRow{
+		doc: []byte(docId),
+	}
+	key := tempRow.Key()
+	value, err := udc.db.Get(ro, key)
+	if err != nil {
+		return nil, err
+	}
+	if value == nil {
+		return nil, nil
+	}
+	backIndexRow := ParseFromKeyValue(key, value).(*BackIndexRow)
+	return backIndexRow, nil
+}
+
+func (udc *UpsideDownCouch) Dump() {
+	ro := defaultReadOptions()
+	ro.SetFillCache(false)
+	it := udc.db.NewIterator(ro)
+	defer it.Close()
+	it.SeekToFirst()
+	for it = it; it.Valid(); it.Next() {
+		//fmt.Printf("Key: `%v`               Value: `%v`\n", string(it.Key()), string(it.Value()))
+		row := ParseFromKeyValue(it.Key(), it.Value())
+		if row != nil {
+			fmt.Printf("%v\n", row)
+			fmt.Printf("Key:   % -100x\nValue: % -100x\n\n", it.Key(), it.Value())
+		}
+	}
+	err := it.GetError()
+	if err != nil {
+		fmt.Printf("Error reading iterator: %v", err)
+	}
+}
+
+func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (index.TermFieldReader, error) {
+	fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
+	if fieldExists {
+		return newUpsideDownCouchTermFieldReader(udc, term, uint16(fieldIndex))
+	}
+	log.Printf("fields: %v", udc.fieldIndexes)
+	return nil, fmt.Errorf("No field named `%s` in the schema", fieldName)
+}
+
+func defaultWriteOptions() *levigo.WriteOptions {
+	wo := levigo.NewWriteOptions()
+	// request fsync on write for safety
+	wo.SetSync(true)
+	return wo
+}
+
+func defaultReadOptions() *levigo.ReadOptions {
+	ro := levigo.NewReadOptions()
+	return ro
+}
+
+func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
+	return len(tf.Locations)
+}
+
+func termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) []*TermVector {
+	rv := make([]*TermVector, len(tf.Locations))
+
+	for i, l := range tf.Locations {
+		tv := TermVector{
+			field: field,
+			pos:   uint64(l.Position),
+			start: uint64(l.Start),
+			end:   uint64(l.End),
+		}
+		rv[i] = &tv
+	}
+
+	return rv
+}
+
+func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
+	rv := make([]*index.TermFieldVector, len(in))
+
+	for i, tv := range in {
+		fieldName := udc.fieldIndexToName(tv.field)
+		tfv := index.TermFieldVector{
+			Field: fieldName,
+			Pos:   tv.pos,
+			Start: tv.start,
+			End:   tv.end,
+		}
+		rv[i] = &tfv
+	}
+	return rv
+}
+
+func (udc *UpsideDownCouch) fieldIndexToName(i uint16) string {
+	for fieldName, fieldIndex := range udc.fieldIndexes {
+		if i == fieldIndex {
+			return fieldName
+		}
+	}
+	return ""
+}
--- a/index/upside_down/upside_down_test.go
+++ b/index/upside_down/upside_down_test.go
@ -0,0 +1,221 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package upside_down
+
+import (
+	"os"
+	"testing"
+
+	_ "github.com/couchbaselabs/bleve/analysis/analyzers/standard_analyzer"
+	"github.com/couchbaselabs/bleve/document"
+)
+
+func TestIndexOpenReopen(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	var expectedCount uint64 = 0
+	docCount := idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	// opening database should have inserted version
+	expectedLength := uint64(1)
+	rowCount := idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+
+	// now close it
+	idx.Close()
+
+	idx = NewUpsideDownCouch("test")
+	err = idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+
+	// now close it
+	idx.Close()
+}
+
+func TestIndexInsert(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer idx.Close()
+
+	var expectedCount uint64 = 0
+	docCount := idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount += 1
+
+	docCount = idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	// should have 4 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the term count,  and 1 for the back index entry)
+	expectedLength := uint64(1 + 1 + 1 + 1 + 1)
+	rowCount := idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+}
+
+func TestIndexInsertThenDelete(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer idx.Close()
+
+	var expectedCount uint64 = 0
+	docCount := idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+	expectedCount += 1
+
+	docCount = idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	err = idx.Delete("1")
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+	expectedCount -= 1
+
+	docCount = idx.DocCount()
+	if docCount != expectedCount {
+		t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
+	}
+
+	// should have 2 row (1 for version, 1 for schema field)
+	expectedLength := uint64(1 + 1)
+	rowCount := idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+}
+
+func TestIndexInsertThenUpdate(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer idx.Close()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	// this update should overwrite one term, and introduce one new one
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test fail")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+
+	// should have 2 row (1 for version, 1 for schema field, and 2 for the two term, and 2 for the term counts, and 1 for the back index entry)
+	expectedLength := uint64(1 + 1 + 2 + 2 + 1)
+	rowCount := idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+
+	// now do another update that should remove one of term
+	doc = document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("fail")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error deleting entry from index: %v", err)
+	}
+
+	// should have 2 row (1 for version, 1 for schema field, and 1 for the remaining term, and 1 for the term count, and 1 for the back index entry)
+	expectedLength = uint64(1 + 1 + 1 + 1 + 1)
+	rowCount = idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+}
+
+func TestIndexInsertMultiple(t *testing.T) {
+	defer os.RemoveAll("test")
+
+	idx := NewUpsideDownCouch("test")
+
+	err := idx.Open()
+	if err != nil {
+		t.Errorf("error opening index: %v", err)
+	}
+	defer idx.Close()
+
+	doc := document.NewDocument("1")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	doc = document.NewDocument("2")
+	doc.AddField(document.NewTextField("name", []byte("test")))
+	err = idx.Update(doc)
+	if err != nil {
+		t.Errorf("Error updating index: %v", err)
+	}
+
+	// should have 4 rows (1 for version, 1 for schema field, and 2 for single term, and 1 for the term count,  and 2 for the back index entries)
+	expectedLength := uint64(1 + 1 + 2 + 1 + 2)
+	rowCount := idx.rowCount()
+	if rowCount != expectedLength {
+		t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
+	}
+}
--- a/search/collector.go
+++ b/search/collector.go
@ -0,0 +1,21 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"time"
+)
+
+type Collector interface {
+	Collect(searcher Searcher) error
+	Results() DocumentMatchCollection
+	Total() uint64
+	MaxScore() float64
+	Took() time.Duration
+}
--- a/search/collector_top_score.go
+++ b/search/collector_top_score.go
@ -0,0 +1,96 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"container/list"
+	"time"
+)
+
+type TopScoreCollector struct {
+	k        int
+	results  *list.List
+	took     time.Duration
+	maxScore float64
+	total    uint64
+}
+
+func NewTopScorerCollector(k int) *TopScoreCollector {
+	return &TopScoreCollector{
+		k:       k,
+		results: list.New(),
+	}
+}
+
+func (tksc *TopScoreCollector) Total() uint64 {
+	return tksc.total
+}
+
+func (tksc *TopScoreCollector) MaxScore() float64 {
+	return tksc.maxScore
+}
+
+func (tksc *TopScoreCollector) Took() time.Duration {
+	return tksc.took
+}
+
+func (tksc *TopScoreCollector) Collect(searcher Searcher) error {
+	startTime := time.Now()
+	next, err := searcher.Next()
+	for err == nil && next != nil {
+		tksc.collectSingle(next)
+		next, err = searcher.Next()
+	}
+	// compute search duration
+	tksc.took = time.Since(startTime)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (tksc *TopScoreCollector) collectSingle(dm *DocumentMatch) {
+	// increment total hits
+	tksc.total += 1
+
+	// update max score
+	if dm.Score > tksc.maxScore {
+		tksc.maxScore = dm.Score
+	}
+
+	for e := tksc.results.Front(); e != nil; e = e.Next() {
+		curr := e.Value.(*DocumentMatch)
+		if dm.Score < curr.Score {
+
+			tksc.results.InsertBefore(dm, e)
+			// if we just made the list too long
+			if tksc.results.Len() > tksc.k {
+				// remove the head
+				tksc.results.Remove(tksc.results.Front())
+			}
+			return
+		}
+	}
+	// if we got to the end, we still have to add it
+	tksc.results.PushBack(dm)
+	if tksc.results.Len() > tksc.k {
+		// remove the head
+		tksc.results.Remove(tksc.results.Front())
+	}
+}
+
+func (tksc *TopScoreCollector) Results() DocumentMatchCollection {
+	rv := make(DocumentMatchCollection, tksc.results.Len())
+	i := 0
+	for e := tksc.results.Back(); e != nil; e = e.Prev() {
+		rv[i] = e.Value.(*DocumentMatch)
+		i++
+	}
+	return rv
+}
--- a/search/collector_top_score_test.go
+++ b/search/collector_top_score_test.go
@ -0,0 +1,107 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"testing"
+)
+
+func TestTop10Scores(t *testing.T) {
+
+	// a stub search with more than 10 matches
+	// the top-10 scores are > 10
+	// everything else is less than 10
+	searcher := &stubSearcher{
+		matches: DocumentMatchCollection{
+			&DocumentMatch{
+				ID:    "a",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "b",
+				Score: 9,
+			},
+			&DocumentMatch{
+				ID:    "c",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "d",
+				Score: 9,
+			},
+			&DocumentMatch{
+				ID:    "e",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "f",
+				Score: 9,
+			},
+			&DocumentMatch{
+				ID:    "g",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "h",
+				Score: 9,
+			},
+			&DocumentMatch{
+				ID:    "i",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "j",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "k",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "l",
+				Score: 99,
+			},
+			&DocumentMatch{
+				ID:    "m",
+				Score: 11,
+			},
+			&DocumentMatch{
+				ID:    "n",
+				Score: 11,
+			},
+		},
+	}
+
+	collector := NewTopScorerCollector(10)
+	collector.Collect(searcher)
+	results := collector.Results()
+
+	if len(results) != 10 {
+		t.Fatalf("expected 10 results, got %d", len(results))
+	}
+
+	if results[0].ID != "l" {
+		t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
+	}
+
+	if results[0].Score != 99.0 {
+		t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
+	}
+
+	minScore := 1000.0
+	for _, result := range results {
+		if result.Score < minScore {
+			minScore = result.Score
+		}
+	}
+
+	if minScore < 10 {
+		t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
+	}
+}
--- a/search/explanation.go
+++ b/search/explanation.go
@ -0,0 +1,28 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"encoding/json"
+	"fmt"
+)
+
+type Explanation struct {
+	Value    float64        `json:"value"`
+	Message  string         `json:"message"`
+	Children []*Explanation `json:"children,omitempty"`
+}
+
+func (expl *Explanation) String() string {
+	js, err := json.MarshalIndent(expl, "", "  ")
+	if err != nil {
+		return fmt.Sprintf("error serializing explation to json: %v", err)
+	}
+	return string(js)
+}
--- a/search/query.go
+++ b/search/query.go
@ -0,0 +1,19 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"github.com/couchbaselabs/bleve/index"
+)
+
+type Query interface {
+	Boost() float64
+	Searcher(index index.Index) (Searcher, error)
+	Validate() error
+}
--- a/search/query_term.go
+++ b/search/query_term.go
@ -0,0 +1,32 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"github.com/couchbaselabs/bleve/index"
+)
+
+type TermQuery struct {
+	Term     string  `json:"term"`
+	Field    string  `json:"field,omitempty"`
+	BoostVal float64 `json:"boost,omitempty"`
+	Explain  bool    `json:"explain,omitempty"`
+}
+
+func (q *TermQuery) Boost() float64 {
+	return q.BoostVal
+}
+
+func (q *TermQuery) Searcher(index index.Index) (Searcher, error) {
+	return NewTermSearcher(index, q)
+}
+
+func (q *TermQuery) Validate() error {
+	return nil
+}
--- a/search/scorer_term.go
+++ b/search/scorer_term.go
@ -0,0 +1,172 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/couchbaselabs/bleve/index"
+)
+
+const MAX_SCORE_CACHE = 64
+
+type TermQueryScorer struct {
+	query                  *TermQuery
+	docTerm                uint64
+	docTotal               uint64
+	idf                    float64
+	explain                bool
+	idfExplanation         *Explanation
+	scoreCache             map[int]float64
+	scoreExplanationCache  map[int]*Explanation
+	queryNorm              float64
+	queryWeight            float64
+	queryWeightExplanation *Explanation
+}
+
+func NewTermQueryScorer(query *TermQuery, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
+	rv := TermQueryScorer{
+		query:                 query,
+		docTerm:               docTerm,
+		docTotal:              docTotal,
+		idf:                   1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
+		explain:               explain,
+		scoreCache:            make(map[int]float64, MAX_SCORE_CACHE),
+		scoreExplanationCache: make(map[int]*Explanation, MAX_SCORE_CACHE),
+		queryWeight:           1.0,
+	}
+
+	if explain {
+		rv.idfExplanation = &Explanation{
+			Value:   rv.idf,
+			Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
+		}
+	}
+
+	return &rv
+}
+
+func (s *TermQueryScorer) Weight() float64 {
+	sum := s.query.Boost() * s.idf
+	return sum * sum
+}
+
+func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
+	s.queryNorm = qnorm
+
+	// update the query weight
+	s.queryWeight = s.query.Boost() * s.idf * s.queryNorm
+
+	if s.explain {
+		childrenExplanations := make([]*Explanation, 3)
+		childrenExplanations[0] = &Explanation{
+			Value:   s.query.Boost(),
+			Message: "boost",
+		}
+		childrenExplanations[1] = s.idfExplanation
+		childrenExplanations[2] = &Explanation{
+			Value:   s.queryNorm,
+			Message: "queryNorm",
+		}
+		s.queryWeightExplanation = &Explanation{
+			Value:    s.queryWeight,
+			Message:  fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.query.Field, string(s.query.Term), s.query.Boost()),
+			Children: childrenExplanations,
+		}
+	}
+}
+
+func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch {
+
+	var scoreExplanation *Explanation
+	// see if the score was cached
+	score, ok := s.scoreCache[int(termMatch.Freq)]
+	if !ok {
+		// need to compute score
+		var tf float64
+		if termMatch.Freq < MAX_SQRT_CACHE {
+			tf = SQRT_CACHE[int(termMatch.Freq)]
+		} else {
+			tf = math.Sqrt(float64(termMatch.Freq))
+		}
+
+		score = tf * termMatch.Norm * s.idf
+
+		if s.explain {
+			childrenExplanations := make([]*Explanation, 3)
+			childrenExplanations[0] = &Explanation{
+				Value:   tf,
+				Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.query.Field, string(s.query.Term), termMatch.Freq),
+			}
+			childrenExplanations[1] = &Explanation{
+				Value:   termMatch.Norm,
+				Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.query.Field, termMatch.ID),
+			}
+			childrenExplanations[2] = s.idfExplanation
+			scoreExplanation = &Explanation{
+				Value:    score,
+				Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.query.Field, string(s.query.Term), termMatch.ID),
+				Children: childrenExplanations,
+			}
+		}
+
+		// if the query weight isn't 1, multiply
+		if s.queryWeight != 1.0 {
+			score = score * s.queryWeight
+			if s.explain {
+				childExplanations := make([]*Explanation, 2)
+				childExplanations[0] = s.queryWeightExplanation
+				childExplanations[1] = scoreExplanation
+				scoreExplanation = &Explanation{
+					Value:    score,
+					Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.query.Field, string(s.query.Term), s.query.Boost(), termMatch.ID),
+					Children: childExplanations,
+				}
+			}
+		}
+
+		if termMatch.Freq < MAX_SCORE_CACHE {
+			s.scoreCache[int(termMatch.Freq)] = score
+			if s.explain {
+				s.scoreExplanationCache[int(termMatch.Freq)] = scoreExplanation
+			}
+		}
+	}
+
+	if ok && s.explain {
+		scoreExplanation = s.scoreExplanationCache[int(termMatch.Freq)]
+	}
+
+	rv := DocumentMatch{
+		ID:    termMatch.ID,
+		Score: score,
+	}
+	if s.explain {
+		rv.Expl = scoreExplanation
+	}
+
+	if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
+		locations := make(Locations, len(termMatch.Vectors))
+		for i, v := range termMatch.Vectors {
+			loc := Location{
+				Pos:   float64(v.Pos),
+				Start: float64(v.Start),
+				End:   float64(v.End),
+			}
+			locations[i] = &loc
+		}
+		tlm := make(TermLocationMap)
+		tlm[s.query.Term] = locations
+		rv.Locations = make(FieldTermLocationMap)
+		rv.Locations[s.query.Field] = tlm
+	}
+
+	return &rv
+}
--- a/search/search.go
+++ b/search/search.go
@ -0,0 +1,39 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+type Location struct {
+	Pos   float64 `json:"pos"`
+	Start float64 `json:"start"`
+	End   float64 `json:"end"`
+}
+
+type Locations []*Location
+
+type TermLocationMap map[string]Locations
+
+type FieldTermLocationMap map[string]TermLocationMap
+
+type DocumentMatch struct {
+	ID        string               `json:"id"`
+	Score     float64              `json:"score"`
+	Expl      *Explanation         `json:"explanation,omitempty"`
+	Locations FieldTermLocationMap `json:"locations,omitempty"`
+}
+
+type DocumentMatchCollection []*DocumentMatch
+
+type Searcher interface {
+	Next() (*DocumentMatch, error)
+	Advance(ID string) (*DocumentMatch, error)
+	Close()
+	Weight() float64
+	SetQueryNorm(float64)
+	Count() uint64
+}
--- a/search/search_term.go
+++ b/search/search_term.go
@ -0,0 +1,84 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"github.com/couchbaselabs/bleve/index"
+)
+
+type TermSearcher struct {
+	index  index.Index
+	query  *TermQuery
+	reader index.TermFieldReader
+	scorer *TermQueryScorer
+}
+
+func NewTermSearcher(index index.Index, query *TermQuery) (*TermSearcher, error) {
+	reader, err := index.TermFieldReader([]byte(query.Term), query.Field)
+	if err != nil {
+		return nil, err
+	}
+	scorer := NewTermQueryScorer(query, index.DocCount(), reader.Count(), query.Explain)
+	return &TermSearcher{
+		index:  index,
+		query:  query,
+		reader: reader,
+		scorer: scorer,
+	}, nil
+}
+
+func (s *TermSearcher) Count() uint64 {
+	return s.reader.Count()
+}
+
+func (s *TermSearcher) Weight() float64 {
+	return s.scorer.Weight()
+}
+
+func (s *TermSearcher) SetQueryNorm(qnorm float64) {
+	s.scorer.SetQueryNorm(qnorm)
+}
+
+func (s *TermSearcher) Next() (*DocumentMatch, error) {
+	termMatch, err := s.reader.Next()
+	if err != nil {
+		return nil, err
+	}
+
+	if termMatch == nil {
+		return nil, nil
+	}
+
+	// score match
+	docMatch := s.scorer.Score(termMatch)
+	// return doc match
+	return docMatch, nil
+
+}
+
+func (s *TermSearcher) Advance(ID string) (*DocumentMatch, error) {
+	termMatch, err := s.reader.Advance(ID)
+	if err != nil {
+		return nil, err
+	}
+
+	if termMatch == nil {
+		return nil, nil
+	}
+
+	// score match
+	docMatch := s.scorer.Score(termMatch)
+
+	// return doc match
+	return docMatch, nil
+}
+
+func (s *TermSearcher) Close() {
+	s.reader.Close()
+}
--- a/search/search_test.go
+++ b/search/search_test.go
@ -0,0 +1,50 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+type stubSearcher struct {
+	index   int
+	matches DocumentMatchCollection
+}
+
+func (ss *stubSearcher) Next() (*DocumentMatch, error) {
+	if ss.index < len(ss.matches) {
+		rv := ss.matches[ss.index]
+		ss.index++
+		return rv, nil
+	}
+	return nil, nil
+}
+
+func (ss *stubSearcher) Advance(ID string) (*DocumentMatch, error) {
+
+	for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID {
+		ss.index++
+	}
+	if ss.index < len(ss.matches) {
+		rv := ss.matches[ss.index]
+		ss.index++
+		return rv, nil
+	}
+	return nil, nil
+}
+
+func (ss *stubSearcher) Close() {
+}
+
+func (ss *stubSearcher) Weight() float64 {
+	return 0.0
+}
+
+func (ss *stubSearcher) SetQueryNorm(float64) {
+}
+
+func (ss *stubSearcher) Count() uint64 {
+	return uint64(len(ss.matches))
+}
--- a/search/sqrt_cache.go
+++ b/search/sqrt_cache.go
@ -0,0 +1,24 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package search
+
+import (
+	"math"
+)
+
+var SQRT_CACHE map[int]float64
+
+const MAX_SQRT_CACHE = 64
+
+func init() {
+	SQRT_CACHE = make(map[int]float64, MAX_SQRT_CACHE)
+	for i := 0; i < MAX_SQRT_CACHE; i++ {
+		SQRT_CACHE[i] = math.Sqrt(float64(i))
+	}
+}
--- a/shredder/json_shredder.go
+++ b/shredder/json_shredder.go
@ -0,0 +1,64 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+
+package shredder
+
+import (
+	"encoding/json"
+	"strconv"
+
+	"github.com/couchbaselabs/bleve/document"
+)
+
+// A simple automatic JSON shredder which parses the whole document body.
+// Any strings found in the JSON are added as text fields
+
+type AutoJsonShredder struct {
+}
+
+func NewAutoJsonShredder() *AutoJsonShredder {
+	return &AutoJsonShredder{}
+}
+
+func (s *AutoJsonShredder) Shred(id string, body []byte) (*document.Document, error) {
+	rv := document.NewDocument(id)
+
+	var section interface{}
+	err := json.Unmarshal(body, &section)
+	if err != nil {
+		return nil, err
+	}
+
+	shredSection(rv, section, "")
+
+	return rv, nil
+}
+
+func shredSection(doc *document.Document, section interface{}, parent string) {
+	nextParent := parent
+	if nextParent != "" {
+		nextParent = nextParent + "."
+	}
+	switch section := section.(type) {
+
+	case string:
+		f := document.NewTextField(parent, []byte(section))
+		doc.AddField(f)
+
+	case []interface{}:
+		for i, sub := range section {
+			shredSection(doc, sub, nextParent+strconv.Itoa(i))
+		}
+
+	case map[string]interface{}:
+		for k, sub := range section {
+			shredSection(doc, sub, nextParent+k)
+		}
+	}
+}
--- a/shredder/jsonpointer_shredder.go
+++ b/shredder/jsonpointer_shredder.go
@ -0,0 +1,55 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package shredder
+
+import (
+	"github.com/couchbaselabs/bleve/document"
+	"github.com/dustin/go-jsonpointer"
+)
+
+// A simple automatic JSON shredder which parses the whole document body.
+// Any strings found in the JSON are added as text fields
+
+type JsonPointerShredder struct {
+	fieldPaths map[string]string
+	paths      []string
+}
+
+func NewJsonPointerShredder() *JsonPointerShredder {
+	return &JsonPointerShredder{
+		fieldPaths: make(map[string]string),
+		paths:      make([]string, 0),
+	}
+}
+
+func (s *JsonPointerShredder) AddTextField(name string, path string) {
+	s.fieldPaths[name] = path
+	s.paths = append(s.paths, path)
+}
+
+func (s *JsonPointerShredder) AddField(name string, path string) {
+	s.fieldPaths[name] = path
+	s.paths = append(s.paths, path)
+}
+
+func (s *JsonPointerShredder) Shred(id string, body []byte) (*document.Document, error) {
+	rv := document.NewDocument(id)
+
+	values, err := jsonpointer.FindMany(body, s.paths)
+	if err != nil {
+		return nil, err
+	}
+
+	for fieldName, fieldPath := range s.fieldPaths {
+		field := document.NewTextField(fieldName, values[fieldPath])
+		rv.AddField(field)
+	}
+
+	return rv, nil
+}
--- a/shredder/shredder.go
+++ b/shredder/shredder.go
@ -0,0 +1,17 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package shredder
+
+import (
+	"github.com/couchbaselabs/bleve/document"
+)
+
+type Shredder interface {
+	Shred(id string, body []byte) (document.Document, error)
+}
--- a/utils/bleve_dump/main.go
+++ b/utils/bleve_dump/main.go
@ -0,0 +1,31 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+//  except in compliance with the License. You may obtain a copy of the License at
+//    http://www.apache.org/licenses/LICENSE-2.0
+//  Unless required by applicable law or agreed to in writing, software distributed under the
+//  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+//  either express or implied. See the License for the specific language governing permissions
+//  and limitations under the License.
+package main
+
+import (
+	"flag"
+	"log"
+
+	"github.com/couchbaselabs/bleve/index/upside_down"
+)
+
+var indexDir = flag.String("indexDir", "index", "index directory")
+
+func main() {
+	flag.Parse()
+
+	index := upside_down.NewUpsideDownCouch(*indexDir)
+	err := index.Open()
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer index.Close()
+
+	index.Dump()
+}