Memory usage depends on db size

Here is quick and dirty program showing that boltdb memory consumption depends on db size. Each transaction updates up to 1000 keys and I think memory could be reused after each transaction, but rss is growing with db size.

Linear write (100k unique keys):

```
# /tmp/test.linux -write -linear -iterations 100 2>&1 | tail -n3
2014/09/12 16:22:46 iteration 99
2014/09/12 16:22:47 compacted 99000 -> 100000
2014/09/12 16:22:47 rss memory usage 43516 kB
```

Linear write (200k unique keys):

```
web293 ~ # /tmp/test.linux -write -linear -iterations 200 2>&1 | tail -n3
2014/09/12 16:24:00 iteration 199
2014/09/12 16:24:00 compacted 199000 -> 200000
2014/09/12 16:24:00 rss memory usage 83456 kB
```

Kind of random write (50k unique keys, 100k updates):

```
web293 ~ # /tmp/test.linux -write -iterations 100 2>&1 | tail -n3
2014/09/12 16:26:10 iteration 99
2014/09/12 16:26:11 compacted 99000 -> 43116
2014/09/12 16:26:11 rss memory usage 27076 kB
```

Kind of random write (50k unique keys, 200k updates):

```
web293 ~ # /tmp/test.linux -write -iterations 200 2>&1 | tail -n3
2014/09/12 16:28:41 iteration 199
2014/09/12 16:28:41 compacted 199000 -> 49030
2014/09/12 16:28:41 rss memory usage 29984 kB
```

Program that outputs that:

``` go
package main

import "os"
import (
    "github.com/boltdb/bolt"
    "log"
    "encoding/binary"
    "math/rand"
    "encoding/json"
    "flag"
    "io/ioutil"
    "strings"
    "strconv"
)

const haha = "hahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahah"

func main() {
    write := flag.Bool("write", false, "do puts in db")
    linear := flag.Bool("linear", false, "make keys unique")
    iterations := flag.Int("iterations", 200, "how many thousands updates to perform")

    flag.Parse()

    file := "test.db"

    err := os.Remove(file)
    if err != nil && !os.IsNotExist(err) {
        log.Fatal(err)
    }

    db, err := bolt.Open(file, 0600, nil)
    if err != nil {
        log.Fatal(err)
    }

    tx, err := db.Begin(true)
    if err != nil {
        log.Fatal(err)
    }

    _, err = tx.CreateBucket([]byte("events"))

    err = tx.Commit()
    if err != nil {
        log.Fatal(err)
    }

    for i := 0; i < *iterations; i++ {
        log.Println("iteration", i)

        tx, err := db.Begin(true)
        if err != nil {
            log.Fatal(err)
        }

        bucket := tx.Bucket([]byte("events"))

        buf := make([]byte, 64/8)

        for j := 0; j < 1000; j++ {
            increment := rand.Intn(500)

            key := map[string]interface {}{}
            if *linear {
                key[haha] = i * 1000 + j
            } else {
                key[haha] = rand.Intn(50000)
            }

            serialized, err := json.Marshal(key)
            if err != nil {
                log.Fatal(err)
            }

            current := int64(0)
            current_buf := bucket.Get(serialized)
            if current_buf != nil {
                parsed, read := binary.Varint(current_buf)
                if read == 0 {
                    log.Fatal("failed to read int from buffer")
                }

                current = parsed
            }

            binary.PutVarint(buf, current + int64(increment))

            if *write {
                bucket.Put(serialized, buf)
            }
        }

        tx.Commit()

        size := 0
        err = db.View(func(tx *bolt.Tx) error {
            size = tx.Bucket([]byte("events")).Stats().KeyN
            return nil
        })

        log.Printf("compacted %d -> %d\n", i * 1000, size)
        log.Println("rss memory usage", rss())
    }
}

func rss() string {
    buf, err := ioutil.ReadFile("/proc/" + strconv.Itoa(os.Getpid()) + "/status")
    if err != nil {
        log.Fatal(err)
    }

    for _, line := range strings.Split(string(buf), "\n") {
        if strings.HasPrefix(line, "VmRSS:") {
            return strings.TrimSpace(strings.TrimPrefix(line, "VmRSS:"))
        }
    }

    log.Fatal("Wait, we missed something here")

    return ""
}
```

I'd like to take 80-100 million keys and compact them to 10-15 million keys, but memory usage is too high to do that with bolt.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Memory usage depends on db size #253

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Memory usage depends on db size #253

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions