Skip to content
This repository was archived by the owner on Mar 9, 2019. It is now read-only.
This repository was archived by the owner on Mar 9, 2019. It is now read-only.

Memory usage depends on db size #253

Closed
@bobrik

Description

@bobrik

Here is quick and dirty program showing that boltdb memory consumption depends on db size. Each transaction updates up to 1000 keys and I think memory could be reused after each transaction, but rss is growing with db size.

Linear write (100k unique keys):

# /tmp/test.linux -write -linear -iterations 100 2>&1 | tail -n3
2014/09/12 16:22:46 iteration 99
2014/09/12 16:22:47 compacted 99000 -> 100000
2014/09/12 16:22:47 rss memory usage 43516 kB

Linear write (200k unique keys):

web293 ~ # /tmp/test.linux -write -linear -iterations 200 2>&1 | tail -n3
2014/09/12 16:24:00 iteration 199
2014/09/12 16:24:00 compacted 199000 -> 200000
2014/09/12 16:24:00 rss memory usage 83456 kB

Kind of random write (50k unique keys, 100k updates):

web293 ~ # /tmp/test.linux -write -iterations 100 2>&1 | tail -n3
2014/09/12 16:26:10 iteration 99
2014/09/12 16:26:11 compacted 99000 -> 43116
2014/09/12 16:26:11 rss memory usage 27076 kB

Kind of random write (50k unique keys, 200k updates):

web293 ~ # /tmp/test.linux -write -iterations 200 2>&1 | tail -n3
2014/09/12 16:28:41 iteration 199
2014/09/12 16:28:41 compacted 199000 -> 49030
2014/09/12 16:28:41 rss memory usage 29984 kB

Program that outputs that:

package main

import "os"
import (
    "github.com/boltdb/bolt"
    "log"
    "encoding/binary"
    "math/rand"
    "encoding/json"
    "flag"
    "io/ioutil"
    "strings"
    "strconv"
)

const haha = "hahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahah"

func main() {
    write := flag.Bool("write", false, "do puts in db")
    linear := flag.Bool("linear", false, "make keys unique")
    iterations := flag.Int("iterations", 200, "how many thousands updates to perform")

    flag.Parse()

    file := "test.db"

    err := os.Remove(file)
    if err != nil && !os.IsNotExist(err) {
        log.Fatal(err)
    }

    db, err := bolt.Open(file, 0600, nil)
    if err != nil {
        log.Fatal(err)
    }

    tx, err := db.Begin(true)
    if err != nil {
        log.Fatal(err)
    }

    _, err = tx.CreateBucket([]byte("events"))

    err = tx.Commit()
    if err != nil {
        log.Fatal(err)
    }

    for i := 0; i < *iterations; i++ {
        log.Println("iteration", i)

        tx, err := db.Begin(true)
        if err != nil {
            log.Fatal(err)
        }

        bucket := tx.Bucket([]byte("events"))

        buf := make([]byte, 64/8)

        for j := 0; j < 1000; j++ {
            increment := rand.Intn(500)

            key := map[string]interface {}{}
            if *linear {
                key[haha] = i * 1000 + j
            } else {
                key[haha] = rand.Intn(50000)
            }

            serialized, err := json.Marshal(key)
            if err != nil {
                log.Fatal(err)
            }

            current := int64(0)
            current_buf := bucket.Get(serialized)
            if current_buf != nil {
                parsed, read := binary.Varint(current_buf)
                if read == 0 {
                    log.Fatal("failed to read int from buffer")
                }

                current = parsed
            }

            binary.PutVarint(buf, current + int64(increment))

            if *write {
                bucket.Put(serialized, buf)
            }
        }

        tx.Commit()

        size := 0
        err = db.View(func(tx *bolt.Tx) error {
            size = tx.Bucket([]byte("events")).Stats().KeyN
            return nil
        })

        log.Printf("compacted %d -> %d\n", i * 1000, size)
        log.Println("rss memory usage", rss())
    }
}

func rss() string {
    buf, err := ioutil.ReadFile("/proc/" + strconv.Itoa(os.Getpid()) + "/status")
    if err != nil {
        log.Fatal(err)
    }

    for _, line := range strings.Split(string(buf), "\n") {
        if strings.HasPrefix(line, "VmRSS:") {
            return strings.TrimSpace(strings.TrimPrefix(line, "VmRSS:"))
        }
    }

    log.Fatal("Wait, we missed something here")

    return ""
}

I'd like to take 80-100 million keys and compact them to 10-15 million keys, but memory usage is too high to do that with bolt.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions