Skip to content

Commit

Permalink
runtime: improve timers scalability on multi-CPU systems
Browse files Browse the repository at this point in the history
Use per-P timers, so each P may work with its own timers.

This CL improves performance on multi-CPU systems
in the following cases:

- When serving high number of concurrent connections
  with read/write deadlines set (for instance, highly loaded
  net/http server).

- When using high number of concurrent timers. These timers
  may be implicitly created via context.WithDeadline
  or context.WithTimeout.

Production servers should usually set timeout on connections
and external requests in order to prevent from resource leakage.
See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/

Below are relevant benchmark results for various GOMAXPROCS values
on linux/amd64:

context package:

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40      4.92µs ± 0%  5.17µs ± 1%  +5.07%  (p=0.000 n=9+9)
WithTimeout/concurrency=4000    6.03µs ± 1%  6.49µs ± 0%  +7.63%  (p=0.000 n=8+10)
WithTimeout/concurrency=400000  8.58µs ± 7%  9.02µs ± 4%  +5.02%  (p=0.019 n=10+10)

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40-2      3.70µs ± 1%  2.78µs ± 4%  -24.90%  (p=0.000 n=8+9)
WithTimeout/concurrency=4000-2    4.49µs ± 4%  3.67µs ± 5%  -18.26%  (p=0.000 n=10+10)
WithTimeout/concurrency=400000-2  6.16µs ±10%  5.15µs ±13%  -16.30%  (p=0.000 n=10+10)

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40-4      3.58µs ± 1%  2.64µs ± 2%  -26.13%  (p=0.000 n=9+10)
WithTimeout/concurrency=4000-4    4.17µs ± 0%  3.32µs ± 1%  -20.36%  (p=0.000 n=10+10)
WithTimeout/concurrency=400000-4  5.57µs ± 9%  4.83µs ±10%  -13.27%  (p=0.001 n=10+10)

time package:

name                     old time/op  new time/op  delta
AfterFunc                6.15ms ± 3%  6.07ms ± 2%     ~     (p=0.133 n=10+9)
AfterFunc-2              3.43ms ± 1%  3.56ms ± 1%   +3.91%  (p=0.000 n=10+9)
AfterFunc-4              5.04ms ± 2%  2.36ms ± 0%  -53.20%  (p=0.000 n=10+9)
After                    6.54ms ± 2%  6.49ms ± 3%     ~     (p=0.393 n=10+10)
After-2                  3.68ms ± 1%  3.87ms ± 0%   +5.14%  (p=0.000 n=9+9)
After-4                  6.66ms ± 1%  2.87ms ± 1%  -56.89%  (p=0.000 n=10+10)
Stop                      698µs ± 2%   689µs ± 1%   -1.26%  (p=0.011 n=10+10)
Stop-2                    729µs ± 2%   434µs ± 3%  -40.49%  (p=0.000 n=10+10)
Stop-4                    837µs ± 3%   333µs ± 2%  -60.20%  (p=0.000 n=10+10)
SimultaneousAfterFunc     694µs ± 1%   692µs ± 7%     ~     (p=0.481 n=10+10)
SimultaneousAfterFunc-2   714µs ± 3%   569µs ± 2%  -20.33%  (p=0.000 n=10+10)
SimultaneousAfterFunc-4   782µs ± 2%   386µs ± 2%  -50.67%  (p=0.000 n=10+10)
StartStop                 267µs ± 3%   274µs ± 0%   +2.64%  (p=0.000 n=8+9)
StartStop-2               238µs ± 2%   140µs ± 3%  -40.95%  (p=0.000 n=10+8)
StartStop-4               320µs ± 1%   125µs ± 1%  -61.02%  (p=0.000 n=9+9)
Reset                    75.0µs ± 1%  77.5µs ± 2%   +3.38%  (p=0.000 n=10+10)
Reset-2                   150µs ± 2%    40µs ± 5%  -73.09%  (p=0.000 n=10+9)
Reset-4                   226µs ± 1%    33µs ± 1%  -85.42%  (p=0.000 n=10+10)
Sleep                     857µs ± 6%   878µs ± 9%     ~     (p=0.079 n=10+9)
Sleep-2                   617µs ± 4%   585µs ± 2%   -5.21%  (p=0.000 n=10+10)
Sleep-4                   689µs ± 3%   465µs ± 4%  -32.53%  (p=0.000 n=10+10)
Ticker                   55.9ms ± 2%  55.9ms ± 2%     ~     (p=0.971 n=10+10)
Ticker-2                 28.7ms ± 2%  28.1ms ± 1%   -2.06%  (p=0.000 n=10+10)
Ticker-4                 14.6ms ± 0%  13.6ms ± 1%   -6.80%  (p=0.000 n=9+10)

Fixes #15133

Change-Id: I6f4b09d2db8c5bec93146db6501b44dbfe5c0ac4
Reviewed-on: https://go-review.googlesource.com/34784
Reviewed-by: Austin Clements <austin@google.com>
  • Loading branch information
valyala authored and ianlancetaylor committed Sep 12, 2017
1 parent 7377d0c commit 76f4fd8
Show file tree
Hide file tree
Showing 9 changed files with 285 additions and 113 deletions.
56 changes: 55 additions & 1 deletion src/context/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,64 @@ package context_test
import (
. "context"
"fmt"
"runtime"
"sync"
"testing"
"time"
)

func BenchmarkContextCancelTree(b *testing.B) {
func BenchmarkWithTimeout(b *testing.B) {
for concurrency := 40; concurrency <= 4e5; concurrency *= 100 {
name := fmt.Sprintf("concurrency=%d", concurrency)
b.Run(name, func(b *testing.B) {
benchmarkWithTimeout(b, concurrency)
})
}
}

func benchmarkWithTimeout(b *testing.B, concurrentContexts int) {
gomaxprocs := runtime.GOMAXPROCS(0)
perPContexts := concurrentContexts / gomaxprocs
root := Background()

// Generate concurrent contexts.
var wg sync.WaitGroup
ccf := make([][]CancelFunc, gomaxprocs)
for i := range ccf {
wg.Add(1)
go func(i int) {
defer wg.Done()
cf := make([]CancelFunc, perPContexts)
for j := range cf {
_, cf[j] = WithTimeout(root, time.Hour)
}
ccf[i] = cf
}(i)
}
wg.Wait()

b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
wcf := make([]CancelFunc, 10)
for pb.Next() {
for i := range wcf {
_, wcf[i] = WithTimeout(root, time.Hour)
}
for _, f := range wcf {
f()
}
}
})
b.StopTimer()

for _, cf := range ccf {
for _, f := range cf {
f()
}
}
}

func BenchmarkCancelTree(b *testing.B) {
depths := []int{1, 10, 100, 1000}
for _, d := range depths {
b.Run(fmt.Sprintf("depth=%d", d), func(b *testing.B) {
Expand Down
7 changes: 4 additions & 3 deletions src/internal/trace/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,9 @@ func parseHeader(buf []byte) (int, error) {
// It does analyze and verify per-event-type arguments.
func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
var ticksPerSec, lastSeq, lastTs int64
var lastG, timerGoid uint64
var lastG uint64
var lastP int
timerGoids := make(map[uint64]bool)
lastGs := make(map[int]uint64) // last goroutine running on P
stacks = make(map[uint64][]*Frame)
batches := make(map[int][]*Event) // events by P
Expand Down Expand Up @@ -308,7 +309,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
return
}
case EvTimerGoroutine:
timerGoid = raw.args[0]
timerGoids[raw.args[0]] = true
case EvStack:
if len(raw.args) < 2 {
err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
Expand Down Expand Up @@ -431,7 +432,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
for _, ev := range events {
ev.Ts = int64(float64(ev.Ts-minTs) * freq)
// Move timers and syscalls to separate fake Ps.
if timerGoid != 0 && ev.G == timerGoid && ev.Type == EvGoUnblock {
if timerGoids[ev.G] && ev.Type == EvGoUnblock {
ev.P = TimerP
}
if ev.Type == EvGoSysExit {
Expand Down
12 changes: 4 additions & 8 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -3863,15 +3863,11 @@ func sysmon() {
}
shouldRelax := true
if osRelaxMinNS > 0 {
lock(&timers.lock)
if timers.sleeping {
now := nanotime()
next := timers.sleepUntil
if next-now < osRelaxMinNS {
shouldRelax = false
}
next := timeSleepUntil()
now := nanotime()
if next-now < osRelaxMinNS {
shouldRelax = false
}
unlock(&timers.lock)
}
if shouldRelax {
osRelax(true)
Expand Down
Loading

0 comments on commit 76f4fd8

Please sign in to comment.