0
0
Fork 0
bleve/cmd/bleve/cmd/bulk.go

118 lines
2.6 KiB
Go

// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"bufio"
"encoding/json"
"fmt"
"math/rand"
"os"
"github.com/spf13/cobra"
)
var batchSize int
// bulkCmd represents the bulk command
var bulkCmd = &cobra.Command{
Use: "bulk [index path] [data paths ...]",
Short: "bulk loads from newline delimited JSON files",
Long: `The bulk command will perform batch loading of documents in one or more newline delimited JSON files.`,
Annotations: map[string]string{
canMutateBleveIndex: "true",
},
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify at least one path")
}
i := 0
batch := idx.NewBatch()
for _, file := range args[1:] {
file, err := os.Open(file)
if err != nil {
return err
}
fmt.Printf("Indexing: %s\n", file.Name())
r := bufio.NewReader(file)
for {
if i%batchSize == 0 {
fmt.Printf("Indexing batch (%d docs)...\n", i)
err := idx.Batch(batch)
if err != nil {
return err
}
batch = idx.NewBatch()
}
b, _ := r.ReadBytes('\n')
if len(b) == 0 {
break
}
var doc interface{}
doc = b
var err error
if parseJSON {
err = json.Unmarshal(b, &doc)
if err != nil {
return fmt.Errorf("error parsing JSON: %v", err)
}
}
docID := randomString(5)
err = batch.Index(docID, doc)
if err != nil {
return err
}
i++
}
err = idx.Batch(batch)
if err != nil {
return err
}
err = file.Close()
if err != nil {
return err
}
}
return nil
},
}
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func randomString(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
func init() {
RootCmd.AddCommand(bulkCmd)
bulkCmd.Flags().IntVarP(&batchSize, "batch", "b", 1000, "Batch size for loading, default 1000.")
bulkCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON, defaults true.")
}