This repository was archived by the owner on Mar 9, 2019. It is now read-only.
This repository was archived by the owner on Mar 9, 2019. It is now read-only.
Memory usage depends on db size #253
Closed
Description
Here is quick and dirty program showing that boltdb memory consumption depends on db size. Each transaction updates up to 1000 keys and I think memory could be reused after each transaction, but rss is growing with db size.
Linear write (100k unique keys):
# /tmp/test.linux -write -linear -iterations 100 2>&1 | tail -n3
2014/09/12 16:22:46 iteration 99
2014/09/12 16:22:47 compacted 99000 -> 100000
2014/09/12 16:22:47 rss memory usage 43516 kB
Linear write (200k unique keys):
web293 ~ # /tmp/test.linux -write -linear -iterations 200 2>&1 | tail -n3
2014/09/12 16:24:00 iteration 199
2014/09/12 16:24:00 compacted 199000 -> 200000
2014/09/12 16:24:00 rss memory usage 83456 kB
Kind of random write (50k unique keys, 100k updates):
web293 ~ # /tmp/test.linux -write -iterations 100 2>&1 | tail -n3
2014/09/12 16:26:10 iteration 99
2014/09/12 16:26:11 compacted 99000 -> 43116
2014/09/12 16:26:11 rss memory usage 27076 kB
Kind of random write (50k unique keys, 200k updates):
web293 ~ # /tmp/test.linux -write -iterations 200 2>&1 | tail -n3
2014/09/12 16:28:41 iteration 199
2014/09/12 16:28:41 compacted 199000 -> 49030
2014/09/12 16:28:41 rss memory usage 29984 kB
Program that outputs that:
package main
import "os"
import (
"github.com/boltdb/bolt"
"log"
"encoding/binary"
"math/rand"
"encoding/json"
"flag"
"io/ioutil"
"strings"
"strconv"
)
const haha = "hahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahahhahahahahahahahahhahahahahahahahahahah"
func main() {
write := flag.Bool("write", false, "do puts in db")
linear := flag.Bool("linear", false, "make keys unique")
iterations := flag.Int("iterations", 200, "how many thousands updates to perform")
flag.Parse()
file := "test.db"
err := os.Remove(file)
if err != nil && !os.IsNotExist(err) {
log.Fatal(err)
}
db, err := bolt.Open(file, 0600, nil)
if err != nil {
log.Fatal(err)
}
tx, err := db.Begin(true)
if err != nil {
log.Fatal(err)
}
_, err = tx.CreateBucket([]byte("events"))
err = tx.Commit()
if err != nil {
log.Fatal(err)
}
for i := 0; i < *iterations; i++ {
log.Println("iteration", i)
tx, err := db.Begin(true)
if err != nil {
log.Fatal(err)
}
bucket := tx.Bucket([]byte("events"))
buf := make([]byte, 64/8)
for j := 0; j < 1000; j++ {
increment := rand.Intn(500)
key := map[string]interface {}{}
if *linear {
key[haha] = i * 1000 + j
} else {
key[haha] = rand.Intn(50000)
}
serialized, err := json.Marshal(key)
if err != nil {
log.Fatal(err)
}
current := int64(0)
current_buf := bucket.Get(serialized)
if current_buf != nil {
parsed, read := binary.Varint(current_buf)
if read == 0 {
log.Fatal("failed to read int from buffer")
}
current = parsed
}
binary.PutVarint(buf, current + int64(increment))
if *write {
bucket.Put(serialized, buf)
}
}
tx.Commit()
size := 0
err = db.View(func(tx *bolt.Tx) error {
size = tx.Bucket([]byte("events")).Stats().KeyN
return nil
})
log.Printf("compacted %d -> %d\n", i * 1000, size)
log.Println("rss memory usage", rss())
}
}
func rss() string {
buf, err := ioutil.ReadFile("/proc/" + strconv.Itoa(os.Getpid()) + "/status")
if err != nil {
log.Fatal(err)
}
for _, line := range strings.Split(string(buf), "\n") {
if strings.HasPrefix(line, "VmRSS:") {
return strings.TrimSpace(strings.TrimPrefix(line, "VmRSS:"))
}
}
log.Fatal("Wait, we missed something here")
return ""
}
I'd like to take 80-100 million keys and compact them to 10-15 million keys, but memory usage is too high to do that with bolt.
Metadata
Metadata
Assignees
Labels
No labels