From c539bda166bb0b04e0558d46799bb5e691b8fdbd Mon Sep 17 00:00:00 2001
From: Felix Lange <fjl@twurst.com>
Date: Fri, 11 Nov 2022 13:16:13 +0100
Subject: [PATCH] metrics: improve reading Go runtime metrics (#25886)

This changes how we read performance metrics from the Go runtime. Instead
of using runtime.ReadMemStats, we now rely on the API provided by package
runtime/metrics.

runtime/metrics provides more accurate information. For example, the new
interface has better reporting of memory use. In my testing, the reported
value of held memory more accurately reflects the usage reported by the OS.

The semantics of metrics system/memory/allocs and system/memory/frees have
changed to report amounts in bytes. ReadMemStats only reported the count of
allocations in number-of-objects. This is imprecise: 'tiny objects' are not
counted because the runtime allocates them in batches; and certain
improvements in allocation behavior, such as struct size optimizations,
will be less visible when the number of allocs doesn't change.

Changing allocation reports to be in bytes makes it appear in graphs that
lots more is being allocated. I don't think that's a problem because this
metric is primarily interesting for geth developers.

The metric system/memory/pauses has been changed to report statistical
values from the histogram provided by the runtime. Its name in influxdb has
changed from geth.system/memory/pauses.meter to
geth.system/memory/pauses.histogram.

We also have a new histogram metric, system/cpu/schedlatency, reporting the
Go scheduler latency.
---
 metrics/influxdb/influxdb.go        |  35 +--
 metrics/metrics.go                  | 176 ++++++++++-----
 metrics/metrics_test.go             |  32 +--
 metrics/runtime.go                  | 212 ------------------
 metrics/runtime_cgo.go              |  10 -
 metrics/runtime_gccpufraction.go    |  10 -
 metrics/runtime_no_cgo.go           |   8 -
 metrics/runtime_no_gccpufraction.go |  10 -
 metrics/runtime_test.go             |  88 --------
 metrics/runtimehistogram.go         | 319 ++++++++++++++++++++++++++++
 metrics/runtimehistogram_test.go    | 133 ++++++++++++
 11 files changed, 602 insertions(+), 431 deletions(-)
 delete mode 100644 metrics/runtime.go
 delete mode 100644 metrics/runtime_cgo.go
 delete mode 100644 metrics/runtime_gccpufraction.go
 delete mode 100644 metrics/runtime_no_cgo.go
 delete mode 100644 metrics/runtime_no_gccpufraction.go
 delete mode 100644 metrics/runtime_test.go
 create mode 100644 metrics/runtimehistogram.go
 create mode 100644 metrics/runtimehistogram_test.go

diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go
index e99717aeeb..1bf0c355ed 100644
--- a/metrics/influxdb/influxdb.go
+++ b/metrics/influxdb/influxdb.go
@@ -160,27 +160,28 @@ func (r *reporter) send() error {
 			})
 		case metrics.Histogram:
 			ms := metric.Snapshot()
-
 			if ms.Count() > 0 {
-				ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+				ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+				fields := map[string]interface{}{
+					"count":    ms.Count(),
+					"max":      ms.Max(),
+					"mean":     ms.Mean(),
+					"min":      ms.Min(),
+					"stddev":   ms.StdDev(),
+					"variance": ms.Variance(),
+					"p25":      ps[0],
+					"p50":      ps[1],
+					"p75":      ps[2],
+					"p95":      ps[3],
+					"p99":      ps[4],
+					"p999":     ps[5],
+					"p9999":    ps[6],
+				}
 				pts = append(pts, client.Point{
 					Measurement: fmt.Sprintf("%s%s.histogram", namespace, name),
 					Tags:        r.tags,
-					Fields: map[string]interface{}{
-						"count":    ms.Count(),
-						"max":      ms.Max(),
-						"mean":     ms.Mean(),
-						"min":      ms.Min(),
-						"stddev":   ms.StdDev(),
-						"variance": ms.Variance(),
-						"p50":      ps[0],
-						"p75":      ps[1],
-						"p95":      ps[2],
-						"p99":      ps[3],
-						"p999":     ps[4],
-						"p9999":    ps[5],
-					},
-					Time: now,
+					Fields:      fields,
+					Time:        now,
 				})
 			}
 		case metrics.Meter:
diff --git a/metrics/metrics.go b/metrics/metrics.go
index 747d6471a7..2edf8e35f1 100644
--- a/metrics/metrics.go
+++ b/metrics/metrics.go
@@ -7,7 +7,8 @@ package metrics
 
 import (
 	"os"
-	"runtime"
+	"runtime/metrics"
+	"runtime/pprof"
 	"strings"
 	"time"
 
@@ -54,38 +55,106 @@ func init() {
 	}
 }
 
-// CollectProcessMetrics periodically collects various metrics about the running
-// process.
+var threadCreateProfile = pprof.Lookup("threadcreate")
+
+type runtimeStats struct {
+	GCPauses     *metrics.Float64Histogram
+	GCAllocBytes uint64
+	GCFreedBytes uint64
+
+	MemTotal     uint64
+	HeapObjects  uint64
+	HeapFree     uint64
+	HeapReleased uint64
+	HeapUnused   uint64
+
+	Goroutines   uint64
+	SchedLatency *metrics.Float64Histogram
+}
+
+var runtimeSamples = []metrics.Sample{
+	{Name: "/gc/pauses:seconds"}, // histogram
+	{Name: "/gc/heap/allocs:bytes"},
+	{Name: "/gc/heap/frees:bytes"},
+	{Name: "/memory/classes/total:bytes"},
+	{Name: "/memory/classes/heap/objects:bytes"},
+	{Name: "/memory/classes/heap/free:bytes"},
+	{Name: "/memory/classes/heap/released:bytes"},
+	{Name: "/memory/classes/heap/unused:bytes"},
+	{Name: "/sched/goroutines:goroutines"},
+	{Name: "/sched/latencies:seconds"}, // histogram
+}
+
+func readRuntimeStats(v *runtimeStats) {
+	metrics.Read(runtimeSamples)
+	for _, s := range runtimeSamples {
+		// Skip invalid/unknown metrics. This is needed because some metrics
+		// are unavailable in older Go versions, and attempting to read a 'bad'
+		// metric panics.
+		if s.Value.Kind() == metrics.KindBad {
+			continue
+		}
+
+		switch s.Name {
+		case "/gc/pauses:seconds":
+			v.GCPauses = s.Value.Float64Histogram()
+		case "/gc/heap/allocs:bytes":
+			v.GCAllocBytes = s.Value.Uint64()
+		case "/gc/heap/frees:bytes":
+			v.GCFreedBytes = s.Value.Uint64()
+		case "/memory/classes/total:bytes":
+			v.MemTotal = s.Value.Uint64()
+		case "/memory/classes/heap/objects:bytes":
+			v.HeapObjects = s.Value.Uint64()
+		case "/memory/classes/heap/free:bytes":
+			v.HeapFree = s.Value.Uint64()
+		case "/memory/classes/heap/released:bytes":
+			v.HeapReleased = s.Value.Uint64()
+		case "/memory/classes/heap/unused:bytes":
+			v.HeapUnused = s.Value.Uint64()
+		case "/sched/goroutines:goroutines":
+			v.Goroutines = s.Value.Uint64()
+		case "/sched/latencies:seconds":
+			v.SchedLatency = s.Value.Float64Histogram()
+		}
+	}
+}
+
+// CollectProcessMetrics periodically collects various metrics about the running process.
 func CollectProcessMetrics(refresh time.Duration) {
 	// Short circuit if the metrics system is disabled
 	if !Enabled {
 		return
 	}
+
 	refreshFreq := int64(refresh / time.Second)
 
 	// Create the various data collectors
-	cpuStats := make([]*CPUStats, 2)
-	memstats := make([]*runtime.MemStats, 2)
-	diskstats := make([]*DiskStats, 2)
-	for i := 0; i < len(memstats); i++ {
-		cpuStats[i] = new(CPUStats)
-		memstats[i] = new(runtime.MemStats)
-		diskstats[i] = new(DiskStats)
-	}
-	// Define the various metrics to collect
 	var (
-		cpuSysLoad    = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
-		cpuSysWait    = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry)
-		cpuProcLoad   = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
-		cpuThreads    = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
-		cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
-
-		memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry)
-		memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry)
-		memFrees  = GetOrRegisterMeter("system/memory/frees", DefaultRegistry)
-		memHeld   = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
-		memUsed   = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
+		cpustats  = make([]CPUStats, 2)
+		diskstats = make([]DiskStats, 2)
+		rstats    = make([]runtimeStats, 2)
+	)
+
+	// This scale factor is used for the runtime's time metrics. It's useful to convert to
+	// ns here because the runtime gives times in float seconds, but runtimeHistogram can
+	// only provide integers for the minimum and maximum values.
+	const secondsToNs = float64(time.Second)
 
+	// Define the various metrics to collect
+	var (
+		cpuSysLoad            = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
+		cpuSysWait            = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry)
+		cpuProcLoad           = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
+		cpuThreads            = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
+		cpuGoroutines         = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
+		cpuSchedLatency       = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
+		memPauses             = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil)
+		memAllocs             = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry)
+		memFrees              = GetOrRegisterMeter("system/memory/frees", DefaultRegistry)
+		memTotal              = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
+		heapUsed              = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
+		heapObjects           = GetOrRegisterGauge("system/memory/objects", DefaultRegistry)
 		diskReads             = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry)
 		diskReadBytes         = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry)
 		diskReadBytesCounter  = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry)
@@ -93,34 +162,43 @@ func CollectProcessMetrics(refresh time.Duration) {
 		diskWriteBytes        = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry)
 		diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry)
 	)
-	// Iterate loading the different stats and updating the meters
-	for i := 1; ; i++ {
-		location1 := i % 2
-		location2 := (i - 1) % 2
-
-		ReadCPUStats(cpuStats[location1])
-		cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq)
-		cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq)
-		cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq)
+
+	// Iterate loading the different stats and updating the meters.
+	now, prev := 0, 1
+	for ; ; now, prev = prev, now {
+		// CPU
+		ReadCPUStats(&cpustats[now])
+		cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq)
+		cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq)
+		cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq)
+
+		// Threads
 		cpuThreads.Update(int64(threadCreateProfile.Count()))
-		cpuGoroutines.Update(int64(runtime.NumGoroutine()))
-
-		runtime.ReadMemStats(memstats[location1])
-		memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs))
-		memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs))
-		memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees))
-		memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased))
-		memUsed.Update(int64(memstats[location1].Alloc))
-
-		if ReadDiskStats(diskstats[location1]) == nil {
-			diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount)
-			diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
-			diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount)
-			diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
-
-			diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
-			diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
+
+		// Go runtime metrics
+		readRuntimeStats(&rstats[now])
+
+		cpuGoroutines.Update(int64(rstats[now].Goroutines))
+		cpuSchedLatency.update(rstats[now].SchedLatency)
+		memPauses.update(rstats[now].GCPauses)
+
+		memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes))
+		memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes))
+
+		memTotal.Update(int64(rstats[now].MemTotal))
+		heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased))
+		heapObjects.Update(int64(rstats[now].HeapObjects))
+
+		// Disk
+		if ReadDiskStats(&diskstats[now]) == nil {
+			diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount)
+			diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
+			diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount)
+			diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
+			diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
+			diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
 		}
+
 		time.Sleep(refresh)
 	}
 }
diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go
index 029c99870e..e3fde1ea62 100644
--- a/metrics/metrics_test.go
+++ b/metrics/metrics_test.go
@@ -2,8 +2,6 @@ package metrics
 
 import (
 	"fmt"
-	"io"
-	"log"
 	"sync"
 	"testing"
 	"time"
@@ -11,11 +9,11 @@ import (
 
 const FANOUT = 128
 
-// Stop the compiler from complaining during debugging.
-var (
-	_ = io.Discard
-	_ = log.LstdFlags
-)
+func TestReadRuntimeValues(t *testing.T) {
+	var v runtimeStats
+	readRuntimeStats(&v)
+	t.Logf("%+v", v)
+}
 
 func BenchmarkMetrics(b *testing.B) {
 	r := NewRegistry()
@@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) {
 	m := NewRegisteredMeter("meter", r)
 	t := NewRegisteredTimer("timer", r)
 	RegisterDebugGCStats(r)
-	RegisterRuntimeMemStats(r)
 	b.ResetTimer()
 	ch := make(chan bool)
 
@@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) {
 		}()
 	//*/
 
-	wgR := &sync.WaitGroup{}
-	//*
-	wgR.Add(1)
-	go func() {
-		defer wgR.Done()
-		//log.Println("go CaptureRuntimeMemStats")
-		for {
-			select {
-			case <-ch:
-				//log.Println("done CaptureRuntimeMemStats")
-				return
-			default:
-				CaptureRuntimeMemStatsOnce(r)
-			}
-		}
-	}()
-	//*/
-
 	wgW := &sync.WaitGroup{}
 	/*
 		wgW.Add(1)
@@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) {
 	wg.Wait()
 	close(ch)
 	wgD.Wait()
-	wgR.Wait()
 	wgW.Wait()
 }
 
diff --git a/metrics/runtime.go b/metrics/runtime.go
deleted file mode 100644
index 9450c479ba..0000000000
--- a/metrics/runtime.go
+++ /dev/null
@@ -1,212 +0,0 @@
-package metrics
-
-import (
-	"runtime"
-	"runtime/pprof"
-	"time"
-)
-
-var (
-	memStats       runtime.MemStats
-	runtimeMetrics struct {
-		MemStats struct {
-			Alloc         Gauge
-			BuckHashSys   Gauge
-			DebugGC       Gauge
-			EnableGC      Gauge
-			Frees         Gauge
-			HeapAlloc     Gauge
-			HeapIdle      Gauge
-			HeapInuse     Gauge
-			HeapObjects   Gauge
-			HeapReleased  Gauge
-			HeapSys       Gauge
-			LastGC        Gauge
-			Lookups       Gauge
-			Mallocs       Gauge
-			MCacheInuse   Gauge
-			MCacheSys     Gauge
-			MSpanInuse    Gauge
-			MSpanSys      Gauge
-			NextGC        Gauge
-			NumGC         Gauge
-			GCCPUFraction GaugeFloat64
-			PauseNs       Histogram
-			PauseTotalNs  Gauge
-			StackInuse    Gauge
-			StackSys      Gauge
-			Sys           Gauge
-			TotalAlloc    Gauge
-		}
-		NumCgoCall   Gauge
-		NumGoroutine Gauge
-		NumThread    Gauge
-		ReadMemStats Timer
-	}
-	frees       uint64
-	lookups     uint64
-	mallocs     uint64
-	numGC       uint32
-	numCgoCalls int64
-
-	threadCreateProfile = pprof.Lookup("threadcreate")
-)
-
-// Capture new values for the Go runtime statistics exported in
-// runtime.MemStats.  This is designed to be called as a goroutine.
-func CaptureRuntimeMemStats(r Registry, d time.Duration) {
-	for range time.Tick(d) {
-		CaptureRuntimeMemStatsOnce(r)
-	}
-}
-
-// Capture new values for the Go runtime statistics exported in
-// runtime.MemStats.  This is designed to be called in a background
-// goroutine.  Giving a registry which has not been given to
-// RegisterRuntimeMemStats will panic.
-//
-// Be very careful with this because runtime.ReadMemStats calls the C
-// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld()
-// and that last one does what it says on the tin.
-func CaptureRuntimeMemStatsOnce(r Registry) {
-	t := time.Now()
-	runtime.ReadMemStats(&memStats) // This takes 50-200us.
-	runtimeMetrics.ReadMemStats.UpdateSince(t)
-
-	runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc))
-	runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys))
-	if memStats.DebugGC {
-		runtimeMetrics.MemStats.DebugGC.Update(1)
-	} else {
-		runtimeMetrics.MemStats.DebugGC.Update(0)
-	}
-	if memStats.EnableGC {
-		runtimeMetrics.MemStats.EnableGC.Update(1)
-	} else {
-		runtimeMetrics.MemStats.EnableGC.Update(0)
-	}
-
-	runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees))
-	runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc))
-	runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle))
-	runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse))
-	runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects))
-	runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased))
-	runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys))
-	runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC))
-	runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups))
-	runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs))
-	runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse))
-	runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys))
-	runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse))
-	runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys))
-	runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC))
-	runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC))
-	runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats))
-
-	// <https://code.google.com/p/go/source/browse/src/pkg/runtime/mgc0.c>
-	i := numGC % uint32(len(memStats.PauseNs))
-	ii := memStats.NumGC % uint32(len(memStats.PauseNs))
-	if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) {
-		for i = 0; i < uint32(len(memStats.PauseNs)); i++ {
-			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-		}
-	} else {
-		if i > ii {
-			for ; i < uint32(len(memStats.PauseNs)); i++ {
-				runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-			}
-			i = 0
-		}
-		for ; i < ii; i++ {
-			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-		}
-	}
-	frees = memStats.Frees
-	lookups = memStats.Lookups
-	mallocs = memStats.Mallocs
-	numGC = memStats.NumGC
-
-	runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs))
-	runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse))
-	runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys))
-	runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys))
-	runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc))
-
-	currentNumCgoCalls := numCgoCall()
-	runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls)
-	numCgoCalls = currentNumCgoCalls
-
-	runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine()))
-
-	runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count()))
-}
-
-// Register runtimeMetrics for the Go runtime statistics exported in runtime and
-// specifically runtime.MemStats.  The runtimeMetrics are named by their
-// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc.
-func RegisterRuntimeMemStats(r Registry) {
-	runtimeMetrics.MemStats.Alloc = NewGauge()
-	runtimeMetrics.MemStats.BuckHashSys = NewGauge()
-	runtimeMetrics.MemStats.DebugGC = NewGauge()
-	runtimeMetrics.MemStats.EnableGC = NewGauge()
-	runtimeMetrics.MemStats.Frees = NewGauge()
-	runtimeMetrics.MemStats.HeapAlloc = NewGauge()
-	runtimeMetrics.MemStats.HeapIdle = NewGauge()
-	runtimeMetrics.MemStats.HeapInuse = NewGauge()
-	runtimeMetrics.MemStats.HeapObjects = NewGauge()
-	runtimeMetrics.MemStats.HeapReleased = NewGauge()
-	runtimeMetrics.MemStats.HeapSys = NewGauge()
-	runtimeMetrics.MemStats.LastGC = NewGauge()
-	runtimeMetrics.MemStats.Lookups = NewGauge()
-	runtimeMetrics.MemStats.Mallocs = NewGauge()
-	runtimeMetrics.MemStats.MCacheInuse = NewGauge()
-	runtimeMetrics.MemStats.MCacheSys = NewGauge()
-	runtimeMetrics.MemStats.MSpanInuse = NewGauge()
-	runtimeMetrics.MemStats.MSpanSys = NewGauge()
-	runtimeMetrics.MemStats.NextGC = NewGauge()
-	runtimeMetrics.MemStats.NumGC = NewGauge()
-	runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64()
-	runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015))
-	runtimeMetrics.MemStats.PauseTotalNs = NewGauge()
-	runtimeMetrics.MemStats.StackInuse = NewGauge()
-	runtimeMetrics.MemStats.StackSys = NewGauge()
-	runtimeMetrics.MemStats.Sys = NewGauge()
-	runtimeMetrics.MemStats.TotalAlloc = NewGauge()
-	runtimeMetrics.NumCgoCall = NewGauge()
-	runtimeMetrics.NumGoroutine = NewGauge()
-	runtimeMetrics.NumThread = NewGauge()
-	runtimeMetrics.ReadMemStats = NewTimer()
-
-	r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc)
-	r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys)
-	r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC)
-	r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC)
-	r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees)
-	r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc)
-	r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle)
-	r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse)
-	r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects)
-	r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased)
-	r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys)
-	r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC)
-	r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups)
-	r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs)
-	r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse)
-	r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys)
-	r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse)
-	r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys)
-	r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC)
-	r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC)
-	r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction)
-	r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs)
-	r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs)
-	r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse)
-	r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys)
-	r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys)
-	r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc)
-	r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall)
-	r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine)
-	r.Register("runtime.NumThread", runtimeMetrics.NumThread)
-	r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats)
-}
diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go
deleted file mode 100644
index 4307ebdba6..0000000000
--- a/metrics/runtime_cgo.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build cgo && !appengine && !js
-// +build cgo,!appengine,!js
-
-package metrics
-
-import "runtime"
-
-func numCgoCall() int64 {
-	return runtime.NumCgoCall()
-}
diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go
deleted file mode 100644
index 28cd44752b..0000000000
--- a/metrics/runtime_gccpufraction.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build go1.5
-// +build go1.5
-
-package metrics
-
-import "runtime"
-
-func gcCPUFraction(memStats *runtime.MemStats) float64 {
-	return memStats.GCCPUFraction
-}
diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go
deleted file mode 100644
index 1799bef63b..0000000000
--- a/metrics/runtime_no_cgo.go
+++ /dev/null
@@ -1,8 +0,0 @@
-//go:build !cgo || appengine || js
-// +build !cgo appengine js
-
-package metrics
-
-func numCgoCall() int64 {
-	return 0
-}
diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go
deleted file mode 100644
index af1a4b63c8..0000000000
--- a/metrics/runtime_no_gccpufraction.go
+++ /dev/null
@@ -1,10 +0,0 @@
-//go:build !go1.5
-// +build !go1.5
-
-package metrics
-
-import "runtime"
-
-func gcCPUFraction(memStats *runtime.MemStats) float64 {
-	return 0
-}
diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go
deleted file mode 100644
index f85f7868f7..0000000000
--- a/metrics/runtime_test.go
+++ /dev/null
@@ -1,88 +0,0 @@
-package metrics
-
-import (
-	"runtime"
-	"testing"
-	"time"
-)
-
-func BenchmarkRuntimeMemStats(b *testing.B) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		CaptureRuntimeMemStatsOnce(r)
-	}
-}
-
-func TestRuntimeMemStats(t *testing.T) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	CaptureRuntimeMemStatsOnce(r)
-	zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests.
-	runtime.GC()
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 {
-		t.Fatal(count - zero)
-	}
-	runtime.GC()
-	runtime.GC()
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 {
-		t.Fatal(count - zero)
-	}
-	for i := 0; i < 256; i++ {
-		runtime.GC()
-	}
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 {
-		t.Fatal(count - zero)
-	}
-	for i := 0; i < 257; i++ {
-		runtime.GC()
-	}
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures.
-		t.Fatal(count - zero)
-	}
-}
-
-func TestRuntimeMemStatsNumThread(t *testing.T) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	CaptureRuntimeMemStatsOnce(r)
-
-	if value := runtimeMetrics.NumThread.Value(); value < 1 {
-		t.Fatalf("got NumThread: %d, wanted at least 1", value)
-	}
-}
-
-func TestRuntimeMemStatsBlocking(t *testing.T) {
-	if g := runtime.GOMAXPROCS(0); g < 2 {
-		t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g)
-	}
-	ch := make(chan int)
-	go testRuntimeMemStatsBlocking(ch)
-	var memStats runtime.MemStats
-	t0 := time.Now()
-	runtime.ReadMemStats(&memStats)
-	t1 := time.Now()
-	t.Log("i++ during runtime.ReadMemStats:", <-ch)
-	go testRuntimeMemStatsBlocking(ch)
-	d := t1.Sub(t0)
-	t.Log(d)
-	time.Sleep(d)
-	t.Log("i++ during time.Sleep:", <-ch)
-}
-
-func testRuntimeMemStatsBlocking(ch chan int) {
-	i := 0
-	for {
-		select {
-		case ch <- i:
-			return
-		default:
-			i++
-		}
-	}
-}
diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go
new file mode 100644
index 0000000000..c68939af1e
--- /dev/null
+++ b/metrics/runtimehistogram.go
@@ -0,0 +1,319 @@
+package metrics
+
+import (
+	"math"
+	"runtime/metrics"
+	"sort"
+	"sync/atomic"
+)
+
+func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram {
+	if r == nil {
+		r = DefaultRegistry
+	}
+	constructor := func() Histogram { return newRuntimeHistogram(scale) }
+	return r.GetOrRegister(name, constructor).(*runtimeHistogram)
+}
+
+// runtimeHistogram wraps a runtime/metrics histogram.
+type runtimeHistogram struct {
+	v           atomic.Value
+	scaleFactor float64
+}
+
+func newRuntimeHistogram(scale float64) *runtimeHistogram {
+	h := &runtimeHistogram{scaleFactor: scale}
+	h.update(&metrics.Float64Histogram{})
+	return h
+}
+
+func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) {
+	if mh == nil {
+		// The update value can be nil if the current Go version doesn't support a
+		// requested metric. It's just easier to handle nil here than putting
+		// conditionals everywhere.
+		return
+	}
+
+	s := runtimeHistogramSnapshot{
+		Counts:  make([]uint64, len(mh.Counts)),
+		Buckets: make([]float64, len(mh.Buckets)),
+	}
+	copy(s.Counts, mh.Counts)
+	copy(s.Buckets, mh.Buckets)
+	for i, b := range s.Buckets {
+		s.Buckets[i] = b * h.scaleFactor
+	}
+	h.v.Store(&s)
+}
+
+func (h *runtimeHistogram) load() *runtimeHistogramSnapshot {
+	return h.v.Load().(*runtimeHistogramSnapshot)
+}
+
+func (h *runtimeHistogram) Clear() {
+	panic("runtimeHistogram does not support Clear")
+}
+func (h *runtimeHistogram) Update(int64) {
+	panic("runtimeHistogram does not support Update")
+}
+func (h *runtimeHistogram) Sample() Sample {
+	return NilSample{}
+}
+
+// Snapshot returns a non-changing cop of the histogram.
+func (h *runtimeHistogram) Snapshot() Histogram {
+	return h.load()
+}
+
+// Count returns the sample count.
+func (h *runtimeHistogram) Count() int64 {
+	return h.load().Count()
+}
+
+// Mean returns an approximation of the mean.
+func (h *runtimeHistogram) Mean() float64 {
+	return h.load().Mean()
+}
+
+// StdDev approximates the standard deviation of the histogram.
+func (h *runtimeHistogram) StdDev() float64 {
+	return h.load().StdDev()
+}
+
+// Variance approximates the variance of the histogram.
+func (h *runtimeHistogram) Variance() float64 {
+	return h.load().Variance()
+}
+
+// Percentile computes the p'th percentile value.
+func (h *runtimeHistogram) Percentile(p float64) float64 {
+	return h.load().Percentile(p)
+}
+
+// Percentiles computes all requested percentile values.
+func (h *runtimeHistogram) Percentiles(ps []float64) []float64 {
+	return h.load().Percentiles(ps)
+}
+
+// Max returns the highest sample value.
+func (h *runtimeHistogram) Max() int64 {
+	return h.load().Max()
+}
+
+// Min returns the lowest sample value.
+func (h *runtimeHistogram) Min() int64 {
+	return h.load().Min()
+}
+
+// Sum returns the sum of all sample values.
+func (h *runtimeHistogram) Sum() int64 {
+	return h.load().Sum()
+}
+
+type runtimeHistogramSnapshot metrics.Float64Histogram
+
+func (h *runtimeHistogramSnapshot) Clear() {
+	panic("runtimeHistogram does not support Clear")
+}
+func (h *runtimeHistogramSnapshot) Update(int64) {
+	panic("runtimeHistogram does not support Update")
+}
+func (h *runtimeHistogramSnapshot) Sample() Sample {
+	return NilSample{}
+}
+
+func (h *runtimeHistogramSnapshot) Snapshot() Histogram {
+	return h
+}
+
+// Count returns the sample count.
+func (h *runtimeHistogramSnapshot) Count() int64 {
+	var count int64
+	for _, c := range h.Counts {
+		count += int64(c)
+	}
+	return count
+}
+
+// Mean returns an approximation of the mean.
+func (h *runtimeHistogramSnapshot) Mean() float64 {
+	if len(h.Counts) == 0 {
+		return 0
+	}
+	mean, _ := h.mean()
+	return mean
+}
+
+// mean computes the mean and also the total sample count.
+func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) {
+	var sum float64
+	for i, c := range h.Counts {
+		midpoint := h.midpoint(i)
+		sum += midpoint * float64(c)
+		totalCount += float64(c)
+	}
+	return sum / totalCount, totalCount
+}
+
+func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 {
+	high := h.Buckets[bucket+1]
+	low := h.Buckets[bucket]
+	if math.IsInf(high, 1) {
+		// The edge of the highest bucket can be +Inf, and it's supposed to mean that this
+		// bucket contains all remaining samples > low. We can't get the middle of an
+		// infinite range, so just return the lower bound of this bucket instead.
+		return low
+	}
+	if math.IsInf(low, -1) {
+		// Similarly, we can get -Inf in the left edge of the lowest bucket,
+		// and it means the bucket contains all remaining values < high.
+		return high
+	}
+	return (low + high) / 2
+}
+
+// StdDev approximates the standard deviation of the histogram.
+func (h *runtimeHistogramSnapshot) StdDev() float64 {
+	return math.Sqrt(h.Variance())
+}
+
+// Variance approximates the variance of the histogram.
+func (h *runtimeHistogramSnapshot) Variance() float64 {
+	if len(h.Counts) == 0 {
+		return 0
+	}
+
+	mean, totalCount := h.mean()
+	if totalCount <= 1 {
+		// There is no variance when there are zero or one items.
+		return 0
+	}
+
+	var sum float64
+	for i, c := range h.Counts {
+		midpoint := h.midpoint(i)
+		d := midpoint - mean
+		sum += float64(c) * (d * d)
+	}
+	return sum / (totalCount - 1)
+}
+
+// Percentile computes the p'th percentile value.
+func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 {
+	threshold := float64(h.Count()) * p
+	values := [1]float64{threshold}
+	h.computePercentiles(values[:])
+	return values[0]
+}
+
+// Percentiles computes all requested percentile values.
+func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 {
+	// Compute threshold values. We need these to be sorted
+	// for the percentile computation, but restore the original
+	// order later, so keep the indexes as well.
+	count := float64(h.Count())
+	thresholds := make([]float64, len(ps))
+	indexes := make([]int, len(ps))
+	for i, percentile := range ps {
+		thresholds[i] = count * math.Max(0, math.Min(1.0, percentile))
+		indexes[i] = i
+	}
+	sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes})
+
+	// Now compute. The result is stored back into the thresholds slice.
+	h.computePercentiles(thresholds)
+
+	// Put the result back into the requested order.
+	sort.Sort(floatsByIndex{thresholds, indexes})
+	return thresholds
+}
+
+func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) {
+	var totalCount float64
+	for i, count := range h.Counts {
+		totalCount += float64(count)
+
+		for len(thresh) > 0 && thresh[0] < totalCount {
+			thresh[0] = h.Buckets[i]
+			thresh = thresh[1:]
+		}
+		if len(thresh) == 0 {
+			return
+		}
+	}
+}
+
+// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods
+// below need to return int64 to satisfy the interface. The histogram provided by runtime
+// also doesn't keep track of individual samples, so results are approximated.
+
+// Max returns the highest sample value.
+func (h *runtimeHistogramSnapshot) Max() int64 {
+	for i := len(h.Counts) - 1; i >= 0; i-- {
+		count := h.Counts[i]
+		if count > 0 {
+			edge := h.Buckets[i+1]
+			if math.IsInf(edge, 1) {
+				edge = h.Buckets[i]
+			}
+			return int64(math.Ceil(edge))
+		}
+	}
+	return 0
+}
+
+// Min returns the lowest sample value.
+func (h *runtimeHistogramSnapshot) Min() int64 {
+	for i, count := range h.Counts {
+		if count > 0 {
+			return int64(math.Floor(h.Buckets[i]))
+		}
+	}
+	return 0
+}
+
+// Sum returns the sum of all sample values.
+func (h *runtimeHistogramSnapshot) Sum() int64 {
+	var sum float64
+	for i := range h.Counts {
+		sum += h.Buckets[i] * float64(h.Counts[i])
+	}
+	return int64(math.Ceil(sum))
+}
+
+type floatsAscendingKeepingIndex struct {
+	values  []float64
+	indexes []int
+}
+
+func (s floatsAscendingKeepingIndex) Len() int {
+	return len(s.values)
+}
+
+func (s floatsAscendingKeepingIndex) Less(i, j int) bool {
+	return s.values[i] < s.values[j]
+}
+
+func (s floatsAscendingKeepingIndex) Swap(i, j int) {
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
+}
+
+type floatsByIndex struct {
+	values  []float64
+	indexes []int
+}
+
+func (s floatsByIndex) Len() int {
+	return len(s.values)
+}
+
+func (s floatsByIndex) Less(i, j int) bool {
+	return s.indexes[i] < s.indexes[j]
+}
+
+func (s floatsByIndex) Swap(i, j int) {
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
+}
diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go
new file mode 100644
index 0000000000..d53a014383
--- /dev/null
+++ b/metrics/runtimehistogram_test.go
@@ -0,0 +1,133 @@
+package metrics
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"runtime/metrics"
+	"testing"
+)
+
+var _ Histogram = (*runtimeHistogram)(nil)
+
+type runtimeHistogramTest struct {
+	h metrics.Float64Histogram
+
+	Count       int64
+	Min         int64
+	Max         int64
+	Sum         int64
+	Mean        float64
+	Variance    float64
+	StdDev      float64
+	Percentiles []float64 // .5 .8 .9 .99 .995
+}
+
+// This test checks the results of statistical functions implemented
+// by runtimeHistogramSnapshot.
+func TestRuntimeHistogramStats(t *testing.T) {
+	tests := []runtimeHistogramTest{
+		0: {
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{},
+				Buckets: []float64{},
+			},
+			Count:       0,
+			Max:         0,
+			Min:         0,
+			Sum:         0,
+			Mean:        0,
+			Variance:    0,
+			StdDev:      0,
+			Percentiles: []float64{0, 0, 0, 0, 0},
+		},
+		1: {
+			// This checks the case where the highest bucket is +Inf.
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{0, 1, 2},
+				Buckets: []float64{0, 0.5, 1, math.Inf(1)},
+			},
+			Count:       3,
+			Max:         1,
+			Min:         0,
+			Sum:         3,
+			Mean:        0.9166666,
+			Percentiles: []float64{1, 1, 1, 1, 1},
+			Variance:    0.020833,
+			StdDev:      0.144433,
+		},
+		2: {
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{8, 6, 3, 1},
+				Buckets: []float64{12, 16, 18, 24, 25},
+			},
+			Count:       18,
+			Max:         25,
+			Min:         12,
+			Sum:         270,
+			Mean:        16.75,
+			Variance:    10.3015,
+			StdDev:      3.2096,
+			Percentiles: []float64{16, 18, 18, 24, 24},
+		},
+	}
+
+	for i, test := range tests {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			s := runtimeHistogramSnapshot(test.h)
+
+			if v := s.Count(); v != test.Count {
+				t.Errorf("Count() = %v, want %v", v, test.Count)
+			}
+			if v := s.Min(); v != test.Min {
+				t.Errorf("Min() = %v, want %v", v, test.Min)
+			}
+			if v := s.Max(); v != test.Max {
+				t.Errorf("Max() = %v, want %v", v, test.Max)
+			}
+			if v := s.Sum(); v != test.Sum {
+				t.Errorf("Sum() = %v, want %v", v, test.Sum)
+			}
+			if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) {
+				t.Errorf("Mean() = %v, want %v", v, test.Mean)
+			}
+			if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) {
+				t.Errorf("Variance() = %v, want %v", v, test.Variance)
+			}
+			if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) {
+				t.Errorf("StdDev() = %v, want %v", v, test.StdDev)
+			}
+			ps := []float64{.5, .8, .9, .99, .995}
+			if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) {
+				t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles)
+			}
+		})
+	}
+}
+
+func approxEqual(x, y, ε float64) bool {
+	if math.IsInf(x, -1) && math.IsInf(y, -1) {
+		return true
+	}
+	if math.IsInf(x, 1) && math.IsInf(y, 1) {
+		return true
+	}
+	if math.IsNaN(x) && math.IsNaN(y) {
+		return true
+	}
+	return math.Abs(x-y) < ε
+}
+
+// This test verifies that requesting Percentiles in unsorted order
+// returns them in the requested order.
+func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) {
+	p := runtimeHistogramSnapshot{
+		Counts:  []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+	}
+	result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2})
+	expected := []float64{10, 2, 5, 1, 2}
+	if !reflect.DeepEqual(result, expected) {
+		t.Fatal("wrong result:", result)
+	}
+}