Skip to content

Commit d7c7d88

Browse files
bmakam-qdtcherrymui
authored andcommitted
cmd/compile: intrinsify math/big.mulWW on ARM64
Performance numbers on amberwing: pkg: math/big name old time/op new time/op delta QuoRem 3.08µs ± 0% 2.93µs ± 1% -4.89% (p=0.008 n=5+5) ModSqrt225_Tonelli 721µs ± 0% 718µs ± 0% -0.46% (p=0.008 n=5+5) ModSqrt224_3Mod4 218µs ± 0% 217µs ± 0% -0.27% (p=0.008 n=5+5) ModSqrt5430_Tonelli 2.91s ± 0% 2.91s ± 0% ~ (p=0.222 n=5+5) ModSqrt5430_3Mod4 970ms ± 0% 970ms ± 0% ~ (p=0.151 n=5+5) Sqrt 45.9µs ± 0% 43.8µs ± 0% -4.63% (p=0.008 n=5+5) IntSqr/1 19.9ns ± 0% 17.3ns ± 0% -13.07% (p=0.008 n=5+5) IntSqr/2 52.6ns ± 0% 50.8ns ± 0% -3.35% (p=0.008 n=5+5) IntSqr/3 70.4ns ± 0% 69.4ns ± 0% ~ (p=0.079 n=4+5) IntSqr/5 103ns ± 0% 99ns ± 0% -3.98% (p=0.008 n=5+5) IntSqr/8 179ns ± 0% 178ns ± 0% -0.56% (p=0.008 n=5+5) IntSqr/10 272ns ± 0% 272ns ± 0% ~ (all equal) IntSqr/20 763ns ± 0% 787ns ± 0% +3.15% (p=0.016 n=5+4) IntSqr/30 1.25µs ± 1% 1.29µs ± 1% +3.27% (p=0.008 n=5+5) IntSqr/50 2.64µs ± 0% 2.71µs ± 0% +2.61% (p=0.008 n=5+5) IntSqr/80 5.67µs ± 0% 5.72µs ± 0% +0.88% (p=0.008 n=5+5) IntSqr/100 8.05µs ± 0% 8.09µs ± 0% +0.45% (p=0.008 n=5+5) IntSqr/200 28.0µs ± 0% 28.1µs ± 0% ~ (p=0.151 n=5+5) IntSqr/300 59.4µs ± 0% 59.6µs ± 0% +0.36% (p=0.008 n=5+5) IntSqr/500 141µs ± 0% 141µs ± 0% +0.08% (p=0.008 n=5+5) IntSqr/800 280µs ± 0% 280µs ± 0% -0.12% (p=0.008 n=5+5) IntSqr/1000 429µs ± 0% 428µs ± 0% -0.27% (p=0.008 n=5+5) pkg: crypto-ecdsa name old time/op new time/op delta SignP384 7.85ms ± 1% 7.61ms ± 1% -3.12% (p=0.008 n=5+5) Change-Id: I1ab30856cc0e570f6312f0bd8914779b55adbc16 Reviewed-on: https://go-review.googlesource.com/104135 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
1 parent e6ab614 commit d7c7d88

File tree

6 files changed

+52
-2
lines changed

6 files changed

+52
-2
lines changed

src/cmd/compile/internal/arm64/ssa.go

+15
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
413413
p.Reg = v.Args[0].Reg()
414414
p.To.Type = obj.TYPE_REG
415415
p.To.Reg = v.Reg()
416+
case ssa.OpARM64LoweredMuluhilo:
417+
r0 := v.Args[0].Reg()
418+
r1 := v.Args[1].Reg()
419+
p := s.Prog(arm64.AUMULH)
420+
p.From.Type = obj.TYPE_REG
421+
p.From.Reg = r1
422+
p.Reg = r0
423+
p.To.Type = obj.TYPE_REG
424+
p.To.Reg = v.Reg0()
425+
p1 := s.Prog(arm64.AMUL)
426+
p1.From.Type = obj.TYPE_REG
427+
p1.From.Reg = r1
428+
p1.Reg = r0
429+
p1.To.Type = obj.TYPE_REG
430+
p1.To.Reg = v.Reg1()
416431
case ssa.OpARM64LoweredAtomicExchange64,
417432
ssa.OpARM64LoweredAtomicExchange32:
418433
// LDAXR (Rarg0), Rout

src/cmd/compile/internal/gc/ssa.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -3207,7 +3207,7 @@ func init() {
32073207
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
32083208
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
32093209
},
3210-
sys.ArchAMD64)
3210+
sys.ArchAMD64, sys.ArchARM64)
32113211
add("math/big", "divWW",
32123212
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
32133213
return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])

src/cmd/compile/internal/ssa/gen/ARM64.rules

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
(Hmul64u x y) -> (UMULH x y)
3030
(Hmul32 x y) -> (SRAconst (MULL <typ.Int64> x y) [32])
3131
(Hmul32u x y) -> (SRAconst (UMULL <typ.UInt64> x y) [32])
32+
(Mul64uhilo x y) -> (LoweredMuluhilo x y)
3233

3334
(Div64 x y) -> (DIV x y)
3435
(Div64u x y) -> (UDIV x y)
@@ -1791,4 +1792,4 @@
17911792
(FSUBS a (FNMULS x y)) -> (FMADDS a x y)
17921793
(FSUBD a (FNMULD x y)) -> (FMADDD a x y)
17931794
(FSUBS (FNMULS x y) a) -> (FNMADDS a x y)
1794-
(FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
1795+
(FSUBD (FNMULD x y) a) -> (FNMADDD a x y)

src/cmd/compile/internal/ssa/gen/ARM64Ops.go

+2
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ func init() {
142142
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
143143
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
144144
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
145+
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
145146
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
146147
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
147148
gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
@@ -203,6 +204,7 @@ func init() {
203204
{name: "EON", argLength: 2, reg: gp21, asm: "EON"}, // arg0 ^ ^arg1
204205
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0 | ^arg1
205206

207+
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
206208
// unary ops
207209
{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
208210
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0

src/cmd/compile/internal/ssa/opGen.go

+16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteARM64.go

+16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)