container/intsets: popcount: use POPCNT on amd64, Hacker's Delight algorithm on 386
This function accounts for 2% of "godoc -analysis=pointer" and this change makes it twice as fast---and simpler. Added test and benchmark. Change-Id: I8578fa42dce34df057d81f6c522a7b4e0506d09d Reviewed-on: https://go-review.googlesource.com/15211 Run-TryBot: Robert Griesemer <gri@golang.org> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
		
							parent
							
								
									3f8a7a0787
								
							
						
					
					
						commit
						b7f0150d16
					
				|  | @ -0,0 +1,20 @@ | |||
| // Copyright 2015 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build amd64
 | ||||
| 
 | ||||
| package intsets | ||||
| 
 | ||||
| func popcnt(x word) int | ||||
| func havePOPCNT() bool | ||||
| 
 | ||||
| var hasPOPCNT = havePOPCNT() | ||||
| 
 | ||||
| // popcount returns the population count (number of set bits) of x.
 | ||||
| func popcount(x word) int { | ||||
| 	if hasPOPCNT { | ||||
| 		return popcnt(x) | ||||
| 	} | ||||
| 	return popcountTable(x) // faster than Hacker's Delight
 | ||||
| } | ||||
|  | @ -0,0 +1,28 @@ | |||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // func havePOPCNT() bool | ||||
| TEXT ·havePOPCNT(SB),4,$0 | ||||
| 	MOVQ	$1, AX | ||||
| 	CPUID | ||||
| 	SHRQ	$23, CX | ||||
| 	ANDQ	$1, CX | ||||
| 	MOVB	CX, ret+0(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // func popcnt(word) int | ||||
| TEXT ·popcnt(SB),NOSPLIT,$0-8 | ||||
| 	XORQ	AX, AX | ||||
| 	MOVQ	x+0(FP), SI | ||||
| 	// POPCNT (SI), AX is not recognized by Go assembler, | ||||
| 	// so we assemble it ourselves. | ||||
| 	BYTE	$0xf3 | ||||
| 	BYTE	$0x48 | ||||
| 	BYTE	$0x0f | ||||
| 	BYTE	$0xb8 | ||||
| 	BYTE	$0xc6 | ||||
| 	MOVQ	AX, ret+8(FP) | ||||
| 	RET | ||||
|  | @ -0,0 +1,32 @@ | |||
| // Copyright 2015 The Go Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style
 | ||||
| // license that can be found in the LICENSE file.
 | ||||
| 
 | ||||
| // +build !amd64
 | ||||
| 
 | ||||
| package intsets | ||||
| 
 | ||||
| import "runtime" | ||||
| 
 | ||||
| // We compared three algorithms---Hacker's Delight, table lookup,
 | ||||
| // and AMD64's SSE4.1 hardware POPCNT---on a 2.67GHz Xeon X5550.
 | ||||
| //
 | ||||
| // % GOARCH=amd64 go test -run=NONE -bench=Popcount
 | ||||
| // POPCNT               5.12 ns/op
 | ||||
| // Table                8.53 ns/op
 | ||||
| // HackersDelight       9.96 ns/op
 | ||||
| //
 | ||||
| // % GOARCH=386 go test -run=NONE -bench=Popcount
 | ||||
| // Table               10.4  ns/op
 | ||||
| // HackersDelight       5.23 ns/op
 | ||||
| //
 | ||||
| // (AMD64's ABM1 hardware supports ntz and nlz too,
 | ||||
| // but they aren't critical.)
 | ||||
| 
 | ||||
| // popcount returns the population count (number of set bits) of x.
 | ||||
| func popcount(x word) int { | ||||
| 	if runtime.GOARCH == "386" { | ||||
| 		return popcountHD(uint32(x)) | ||||
| 	} | ||||
| 	return popcountTable(x) | ||||
| } | ||||
|  | @ -4,6 +4,16 @@ | |||
| 
 | ||||
| package intsets | ||||
| 
 | ||||
| // From Hacker's Delight, fig 5.2.
 | ||||
| func popcountHD(x uint32) int { | ||||
| 	x -= (x >> 1) & 0x55555555 | ||||
| 	x = (x & 0x33333333) + ((x >> 2) & 0x33333333) | ||||
| 	x = (x + (x >> 4)) & 0x0f0f0f0f | ||||
| 	x = x + (x >> 8) | ||||
| 	x = x + (x >> 16) | ||||
| 	return int(x & 0x0000003f) | ||||
| } | ||||
| 
 | ||||
| var a [1 << 8]byte | ||||
| 
 | ||||
| func init() { | ||||
|  | @ -18,8 +28,7 @@ func init() { | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| // popcount returns the population count (number of set bits) of x.
 | ||||
| func popcount(x word) int { | ||||
| func popcountTable(x word) int { | ||||
| 	return int(a[byte(x>>(0*8))] + | ||||
| 		a[byte(x>>(1*8))] + | ||||
| 		a[byte(x>>(2*8))] + | ||||
|  |  | |||
|  | @ -4,7 +4,10 @@ | |||
| 
 | ||||
| package intsets | ||||
| 
 | ||||
| import "testing" | ||||
| import ( | ||||
| 	"math/rand" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func TestNLZ(t *testing.T) { | ||||
| 	// Test the platform-specific edge case.
 | ||||
|  | @ -23,3 +26,33 @@ func TestNLZ(t *testing.T) { | |||
| 
 | ||||
| // Backdoor for testing.
 | ||||
| func (s *Sparse) Check() error { return s.check() } | ||||
| 
 | ||||
| func dumbPopcount(x word) int { | ||||
| 	var popcnt int | ||||
| 	for i := uint(0); i < bitsPerWord; i++ { | ||||
| 		if x&(1<<i) != 0 { | ||||
| 			popcnt++ | ||||
| 		} | ||||
| 	} | ||||
| 	return popcnt | ||||
| } | ||||
| 
 | ||||
| func TestPopcount(t *testing.T) { | ||||
| 	for i := 0; i < 1e5; i++ { | ||||
| 		x := word(rand.Uint32()) | ||||
| 		if bitsPerWord == 64 { | ||||
| 			x = x | (word(rand.Uint32()) << 32) | ||||
| 		} | ||||
| 		want := dumbPopcount(x) | ||||
| 		got := popcount(x) | ||||
| 		if got != want { | ||||
| 			t.Errorf("popcount(%d) = %d, want %d", x, got, want) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func BenchmarkPopcount(b *testing.B) { | ||||
| 	for i := 0; i < b.N; i++ { | ||||
| 		popcount(word(i)) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue