From 95ae51f59ddccb573eb4859442afea0cf961263d Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 19 Nov 2014 19:46:24 +0100 Subject: [PATCH] adding bleve_bulkindex utility Usage: bleve_bulkindex -index path file.ldj [file2.ldj, ...] where file.ldj is a line-delimited JSON, each representing a document. docIDs are autogenerated. --- utils/bleve_bulkindex/main.go | 91 +++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 utils/bleve_bulkindex/main.go diff --git a/utils/bleve_bulkindex/main.go b/utils/bleve_bulkindex/main.go new file mode 100644 index 00000000..8bdef1f5 --- /dev/null +++ b/utils/bleve_bulkindex/main.go @@ -0,0 +1,91 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package main + +import ( + "bufio" + "flag" + "log" + "math/rand" + "os" + + "github.com/blevesearch/bleve" +) + +var indexPath = flag.String("index", "", "index path") +var batchSize = flag.Int("size", 1000, "size of a single batch to index") + +func main() { + + flag.Parse() + + if *indexPath == "" { + log.Fatal("must specify index path") + } + + // open the index + index, err := bleve.Open(*indexPath) + if err != nil { + log.Fatal(err) + } + defer index.Close() + + if flag.NArg() < 1 { + log.Fatal("must specify at least one path to index") + } + + i := 0 + batch := bleve.NewBatch() + + for _, file := range flag.Args() { + + file, err := os.Open(file) + defer file.Close() + if err != nil { + log.Fatal(err) + } + + log.Printf("Indexing: %s\n", file.Name()) + r := bufio.NewReader(file) + + for { + if i%*batchSize == 0 { + log.Printf("Indexing batch (%d docs)...\n", i) + err := index.Batch(batch) + if err != nil { + log.Fatal(err) + } + batch = bleve.NewBatch() + } + + b, _ := r.ReadBytes('\n') + if len(b) == 0 { + break + } + docID := randomString(5) + batch.Index(docID, b) + i++ + } + err = index.Batch(batch) + if err != nil { + log.Fatal(err) + } + } +} + +var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + +func randomString(n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letters[rand.Intn(len(letters))] + } + return string(b) +}