2017-12-11 19:40:35 +01:00
|
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
//
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
2017-12-15 22:25:37 +01:00
|
|
|
|
// Package mergeplan provides a segment merge planning approach that's
|
|
|
|
|
// inspired by Lucene's TieredMergePolicy.java and descriptions like
|
|
|
|
|
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
|
2017-12-11 19:40:35 +01:00
|
|
|
|
package mergeplan
|
|
|
|
|
|
|
|
|
|
import (
|
2017-12-16 17:39:10 +01:00
|
|
|
|
"fmt"
|
2017-12-11 19:40:35 +01:00
|
|
|
|
"math"
|
|
|
|
|
"sort"
|
2017-12-16 17:39:10 +01:00
|
|
|
|
"strings"
|
2017-12-11 19:40:35 +01:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// A Segment represents the information that the planner needs to
|
|
|
|
|
// calculate segment merging.
|
|
|
|
|
type Segment interface {
|
|
|
|
|
// Unique id of the segment -- used for sorting.
|
|
|
|
|
Id() uint64
|
|
|
|
|
|
|
|
|
|
// Full segment size (the size before any logical deletions).
|
|
|
|
|
FullSize() int64
|
|
|
|
|
|
|
|
|
|
// Size of the live data of the segment; i.e., FullSize() minus
|
|
|
|
|
// any logical deletions.
|
|
|
|
|
LiveSize() int64
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Plan() will functionally compute a merge plan. A segment will be
|
|
|
|
|
// assigned to at most a single MergeTask in the output MergePlan. A
|
|
|
|
|
// segment not assigned to any MergeTask means the segment should
|
|
|
|
|
// remain unmerged.
|
2017-12-12 00:03:29 +01:00
|
|
|
|
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
|
|
|
|
return plan(segments, o)
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A MergePlan is the result of the Plan() API.
|
|
|
|
|
//
|
|
|
|
|
// The planner doesn’t know how or whether these tasks are executed --
|
|
|
|
|
// that’s up to a separate merge execution system, which might execute
|
|
|
|
|
// these tasks concurrently or not, and which might execute all the
|
|
|
|
|
// tasks or not.
|
|
|
|
|
type MergePlan struct {
|
|
|
|
|
Tasks []*MergeTask
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A MergeTask represents several segments that should be merged
|
|
|
|
|
// together into a single segment.
|
|
|
|
|
type MergeTask struct {
|
|
|
|
|
Segments []Segment
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The MergePlanOptions is designed to be reusable between planning calls.
|
|
|
|
|
type MergePlanOptions struct {
|
|
|
|
|
// Max # segments per logarithmic tier, or max width of any
|
|
|
|
|
// logarithmic “step”. Smaller values mean more merging but fewer
|
|
|
|
|
// segments. Should be >= SegmentsPerMergeTask, else you'll have
|
|
|
|
|
// too much merging.
|
|
|
|
|
MaxSegmentsPerTier int
|
|
|
|
|
|
|
|
|
|
// Max size of any segment produced after merging. Actual
|
|
|
|
|
// merging, however, may produce segment sizes different than the
|
|
|
|
|
// planner’s predicted sizes.
|
|
|
|
|
MaxSegmentSize int64
|
|
|
|
|
|
2017-12-16 23:22:15 +01:00
|
|
|
|
// The growth factor for each tier in a staircase of idealized
|
|
|
|
|
// segments computed by CalcBudget().
|
|
|
|
|
TierGrowth float64
|
|
|
|
|
|
2017-12-11 19:40:35 +01:00
|
|
|
|
// The number of segments in any resulting MergeTask. e.g.,
|
|
|
|
|
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
|
|
|
|
|
SegmentsPerMergeTask int
|
|
|
|
|
|
|
|
|
|
// Small segments are rounded up to this size, i.e., treated as
|
|
|
|
|
// equal (floor) size for consideration. This is to prevent lots
|
|
|
|
|
// of tiny segments from resulting in a long tail in the index.
|
|
|
|
|
FloorSegmentSize int64
|
|
|
|
|
|
|
|
|
|
// Controls how aggressively merges that reclaim more deletions
|
|
|
|
|
// are favored. Higher values will more aggressively target
|
|
|
|
|
// merges that reclaim deletions, but be careful not to go so high
|
|
|
|
|
// that way too much merging takes place; a value of 3.0 is
|
|
|
|
|
// probably nearly too high. A value of 0.0 means deletions don't
|
|
|
|
|
// impact merge selection.
|
|
|
|
|
ReclaimDeletesWeight float64
|
|
|
|
|
|
|
|
|
|
// Optional, defaults to mergeplan.CalcBudget().
|
|
|
|
|
CalcBudget func(totalSize int64, firstTierSize int64,
|
|
|
|
|
o *MergePlanOptions) (budgetNumSegments int)
|
|
|
|
|
|
|
|
|
|
// Optional, defaults to mergeplan.ScoreSegments().
|
|
|
|
|
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
|
|
|
|
|
|
|
|
|
|
// Optional.
|
|
|
|
|
Logger func(string)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Returns the higher of the input or FloorSegmentSize.
|
|
|
|
|
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
|
|
|
|
|
if s > o.FloorSegmentSize {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
return o.FloorSegmentSize
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Suggested default options.
|
|
|
|
|
var DefaultMergePlanOptions = MergePlanOptions{
|
|
|
|
|
MaxSegmentsPerTier: 10,
|
|
|
|
|
MaxSegmentSize: 5000000,
|
2017-12-16 23:22:15 +01:00
|
|
|
|
TierGrowth: 10.0,
|
2017-12-11 19:40:35 +01:00
|
|
|
|
SegmentsPerMergeTask: 10,
|
|
|
|
|
FloorSegmentSize: 2000,
|
|
|
|
|
ReclaimDeletesWeight: 2.0,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
2017-12-11 19:40:35 +01:00
|
|
|
|
if len(segmentsIn) <= 1 {
|
|
|
|
|
return nil, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if o == nil {
|
|
|
|
|
o = &DefaultMergePlanOptions
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
segments := append([]Segment(nil), segmentsIn...) // Copy.
|
|
|
|
|
|
|
|
|
|
sort.Sort(byLiveSizeDescending(segments))
|
|
|
|
|
|
|
|
|
|
var minLiveSize int64 = math.MaxInt64
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
var eligibles []Segment
|
|
|
|
|
var eligiblesLiveSize int64
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
|
if minLiveSize > segment.LiveSize() {
|
|
|
|
|
minLiveSize = segment.LiveSize()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only small-enough segments are eligible.
|
|
|
|
|
if segment.LiveSize() < o.MaxSegmentSize/2 {
|
2017-12-12 00:03:29 +01:00
|
|
|
|
eligibles = append(eligibles, segment)
|
|
|
|
|
eligiblesLiveSize += segment.LiveSize()
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
|
|
|
|
|
|
|
|
|
|
calcBudget := o.CalcBudget
|
|
|
|
|
if calcBudget == nil {
|
|
|
|
|
calcBudget = CalcBudget
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o)
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
|
|
|
|
scoreSegments := o.ScoreSegments
|
|
|
|
|
if scoreSegments == nil {
|
|
|
|
|
scoreSegments = ScoreSegments
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rv := &MergePlan{}
|
|
|
|
|
|
2017-12-18 20:33:19 +01:00
|
|
|
|
var empties []Segment
|
|
|
|
|
for _, eligible := range eligibles {
|
|
|
|
|
if eligible.LiveSize() <= 0 {
|
|
|
|
|
empties = append(empties, eligible)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(empties) > 0 {
|
|
|
|
|
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
|
|
|
|
|
eligibles = removeSegments(eligibles, empties)
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-11 19:40:35 +01:00
|
|
|
|
// While we’re over budget, keep looping, which might produce
|
|
|
|
|
// another MergeTask.
|
2018-02-06 09:18:16 +01:00
|
|
|
|
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
2017-12-11 19:40:35 +01:00
|
|
|
|
// Track a current best roster as we examine and score
|
|
|
|
|
// potential rosters of merges.
|
|
|
|
|
var bestRoster []Segment
|
|
|
|
|
var bestRosterScore float64 // Lower score is better.
|
|
|
|
|
|
2018-02-06 09:18:16 +01:00
|
|
|
|
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
2017-12-11 19:40:35 +01:00
|
|
|
|
var roster []Segment
|
|
|
|
|
var rosterLiveSize int64
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
|
|
|
|
|
eligible := eligibles[idx]
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize {
|
|
|
|
|
roster = append(roster, eligible)
|
|
|
|
|
rosterLiveSize += eligible.LiveSize()
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
if len(roster) > 0 {
|
|
|
|
|
rosterScore := scoreSegments(roster, o)
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
|
|
|
|
|
bestRoster = roster
|
|
|
|
|
bestRosterScore = rosterScore
|
|
|
|
|
}
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(bestRoster) <= 0 {
|
|
|
|
|
return rv, nil
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
eligibles = removeSegments(eligibles, bestRoster)
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rv, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute the number of segments that would be needed to cover the
|
2017-12-16 23:22:15 +01:00
|
|
|
|
// totalSize, by climbing up a logarithmically growing staircase of
|
|
|
|
|
// segment tiers.
|
2017-12-11 19:40:35 +01:00
|
|
|
|
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
|
|
|
|
|
budgetNumSegments int) {
|
|
|
|
|
tierSize := firstTierSize
|
2017-12-12 00:03:29 +01:00
|
|
|
|
if tierSize < 1 {
|
|
|
|
|
tierSize = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
maxSegmentsPerTier := o.MaxSegmentsPerTier
|
|
|
|
|
if maxSegmentsPerTier < 1 {
|
|
|
|
|
maxSegmentsPerTier = 1
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-16 23:22:15 +01:00
|
|
|
|
tierGrowth := o.TierGrowth
|
|
|
|
|
if tierGrowth < 1.0 {
|
|
|
|
|
tierGrowth = 1.0
|
2017-12-12 00:03:29 +01:00
|
|
|
|
}
|
2017-12-11 19:40:35 +01:00
|
|
|
|
|
|
|
|
|
for totalSize > 0 {
|
|
|
|
|
segmentsInTier := float64(totalSize) / float64(tierSize)
|
2017-12-12 00:03:29 +01:00
|
|
|
|
if segmentsInTier < float64(maxSegmentsPerTier) {
|
2017-12-11 19:40:35 +01:00
|
|
|
|
budgetNumSegments += int(math.Ceil(segmentsInTier))
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
budgetNumSegments += maxSegmentsPerTier
|
|
|
|
|
totalSize -= int64(maxSegmentsPerTier) * tierSize
|
2017-12-16 23:22:15 +01:00
|
|
|
|
tierSize = int64(float64(tierSize) * tierGrowth)
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return budgetNumSegments
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
// Of note, removeSegments() keeps the ordering of the results stable.
|
|
|
|
|
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
|
|
|
|
|
rv := make([]Segment, 0, len(segments)-len(toRemove))
|
2017-12-11 19:40:35 +01:00
|
|
|
|
OUTER:
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
|
for _, r := range toRemove {
|
|
|
|
|
if segment == r {
|
|
|
|
|
continue OUTER
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
rv = append(rv, segment)
|
|
|
|
|
}
|
|
|
|
|
return rv
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
// Smaller result score is better.
|
2017-12-11 19:40:35 +01:00
|
|
|
|
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
|
2017-12-12 00:03:29 +01:00
|
|
|
|
var totBeforeSize int64
|
|
|
|
|
var totAfterSize int64
|
|
|
|
|
var totAfterSizeFloored int64
|
|
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
|
totBeforeSize += segment.FullSize()
|
|
|
|
|
totAfterSize += segment.LiveSize()
|
|
|
|
|
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 19:36:16 +01:00
|
|
|
|
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-12 00:03:29 +01:00
|
|
|
|
// Roughly guess the "balance" of the segments -- whether the
|
|
|
|
|
// segments are about the same size.
|
|
|
|
|
balance :=
|
|
|
|
|
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
|
|
|
|
|
float64(totAfterSizeFloored)
|
|
|
|
|
|
|
|
|
|
// Gently favor smaller merges over bigger ones. We don't want to
|
|
|
|
|
// make the exponent too large else we end up with poor merges of
|
|
|
|
|
// small segments in order to avoid the large merges.
|
|
|
|
|
score := balance * math.Pow(float64(totAfterSize), 0.05)
|
|
|
|
|
|
|
|
|
|
// Strongly favor merges that reclaim deletes.
|
|
|
|
|
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
|
|
|
|
|
|
|
|
|
|
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
|
|
|
|
|
|
|
|
|
|
return score
|
2017-12-11 19:40:35 +01:00
|
|
|
|
}
|
2017-12-16 17:39:10 +01:00
|
|
|
|
|
|
|
|
|
// ------------------------------------------
|
|
|
|
|
|
|
|
|
|
// ToBarChart returns an ASCII rendering of the segments and the plan.
|
|
|
|
|
// The barMax is the max width of the bars in the bar chart.
|
|
|
|
|
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
|
|
|
|
|
rv := make([]string, 0, len(segments))
|
|
|
|
|
|
|
|
|
|
var maxFullSize int64
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
|
if maxFullSize < segment.FullSize() {
|
|
|
|
|
maxFullSize = segment.FullSize()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if maxFullSize < 0 {
|
|
|
|
|
maxFullSize = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
|
barFull := int(segment.FullSize())
|
|
|
|
|
barLive := int(segment.LiveSize())
|
|
|
|
|
|
|
|
|
|
if maxFullSize > int64(barMax) {
|
|
|
|
|
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
|
|
|
|
|
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
barKind := " "
|
|
|
|
|
barChar := "."
|
|
|
|
|
|
|
|
|
|
if plan != nil {
|
|
|
|
|
TASK_LOOP:
|
|
|
|
|
for taski, task := range plan.Tasks {
|
|
|
|
|
for _, taskSegment := range task.Segments {
|
|
|
|
|
if taskSegment == segment {
|
|
|
|
|
barKind = "*"
|
|
|
|
|
barChar = fmt.Sprintf("%d", taski)
|
|
|
|
|
break TASK_LOOP
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bar :=
|
|
|
|
|
strings.Repeat(barChar, barLive)[0:barLive] +
|
|
|
|
|
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
|
|
|
|
|
|
|
|
|
|
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
|
|
|
|
|
segment.Id(),
|
|
|
|
|
segment.LiveSize(),
|
|
|
|
|
segment.FullSize(),
|
|
|
|
|
barKind, bar))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return strings.Join(rv, "\n")
|
|
|
|
|
}
|