Skip to content

Commit 8eec2bb

Browse files
committed
cmd/compile: added some intrinsics to SSA back end
One intrinsic was needed to help get the very best performance out of a future GC; as long as that one was being added, I also added Bswap since that is sometimes a handy thing to have. I had intended to fill out the bit-scan intrinsic family, but the mismatch between the "scan forward" instruction and "count leading zeroes" was large enough to cause me to leave it out -- it poses a dilemma that I'd rather dodge right now. These intrinsics are not exposed for general use. That's a separate issue requiring an API proposal change ( https://github.com/golang/proposal ) All intrinsics are tested, both that they are substituted on the appropriate architecture, and that they produce the expected result. Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee Reviewed-on: https://go-review.googlesource.com/20564 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
1 parent 2e90192 commit 8eec2bb

File tree

15 files changed

+1143
-36
lines changed

15 files changed

+1143
-36
lines changed

src/cmd/compile/internal/amd64/prog.go

+38-20
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,44 @@ var progtable = [x86.ALAST & obj.AMask]obj.ProgInfo{
3636

3737
// NOP is an internal no-op that also stands
3838
// for USED and SET annotations, not the Intel opcode.
39-
obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
40-
x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
41-
x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
42-
x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
43-
x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
44-
x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
45-
x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
46-
x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
47-
x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
48-
x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
49-
x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
50-
x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
51-
x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
52-
x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
53-
obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
54-
x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
55-
x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
56-
x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
57-
x86.ACLD & obj.AMask: {Flags: gc.OK},
58-
x86.ASTD & obj.AMask: {Flags: gc.OK},
39+
obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
40+
x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
41+
x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
42+
x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
43+
x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
44+
x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
45+
x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
46+
x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
47+
x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
48+
x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
49+
x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
50+
x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
51+
x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
52+
x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
53+
54+
x86.ABSFL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
55+
x86.ABSFQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
56+
x86.ABSFW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
57+
x86.ABSRL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
58+
x86.ABSRQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
59+
x86.ABSRW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
60+
x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr},
61+
x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr},
62+
63+
obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
64+
x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
65+
x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
66+
x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
67+
x86.ACLD & obj.AMask: {Flags: gc.OK},
68+
x86.ASTD & obj.AMask: {Flags: gc.OK},
69+
70+
x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
71+
x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
72+
x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
73+
x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
74+
x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
75+
x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
76+
5977
x86.ACMPB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry},
6078
x86.ACMPL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry},
6179
x86.ACMPQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry},

src/cmd/compile/internal/amd64/ssa.go

+33-3
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
477477
p.From.Offset = v.AuxInt2Int64()
478478
p.To.Type = obj.TYPE_REG
479479
p.To.Reg = r
480+
481+
case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
482+
ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
483+
r := gc.SSARegNum(v)
484+
x := gc.SSARegNum(v.Args[0])
485+
// Arg0 is in/out, move in to out if not already same
486+
if r != x {
487+
p := gc.Prog(moveByType(v.Type))
488+
p.From.Type = obj.TYPE_REG
489+
p.From.Reg = x
490+
p.To.Type = obj.TYPE_REG
491+
p.To.Reg = r
492+
}
493+
494+
// Constant into AX, after arg0 movement in case arg0 is in AX
495+
p := gc.Prog(moveByType(v.Type))
496+
p.From.Type = obj.TYPE_CONST
497+
p.From.Offset = v.AuxInt2Int64()
498+
p.To.Type = obj.TYPE_REG
499+
p.To.Reg = x86.REG_AX
500+
501+
p = gc.Prog(v.Op.Asm())
502+
p.From.Type = obj.TYPE_REG
503+
p.From.Reg = x86.REG_AX
504+
p.To.Type = obj.TYPE_REG
505+
p.To.Reg = r
506+
480507
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
481508
r := gc.SSARegNum(v)
482509
x := gc.SSARegNum(v.Args[0])
@@ -955,6 +982,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
955982
gc.Maxarg = v.AuxInt
956983
}
957984
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB,
985+
ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
958986
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB:
959987
x := gc.SSARegNum(v.Args[0])
960988
r := gc.SSARegNum(v)
@@ -968,7 +996,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
968996
p := gc.Prog(v.Op.Asm())
969997
p.To.Type = obj.TYPE_REG
970998
p.To.Reg = r
971-
case ssa.OpAMD64SQRTSD:
999+
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
1000+
ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
1001+
ssa.OpAMD64SQRTSD:
9721002
p := gc.Prog(v.Op.Asm())
9731003
p.From.Type = obj.TYPE_REG
9741004
p.From.Reg = gc.SSARegNum(v.Args[0])
@@ -1008,9 +1038,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
10081038
opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX)
10091039

10101040
case ssa.OpAMD64InvertFlags:
1011-
v.Fatalf("InvertFlags should never make it to codegen %v", v)
1041+
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
10121042
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1013-
v.Fatalf("Flag* ops should never make it to codegen %v", v)
1043+
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
10141044
case ssa.OpAMD64REPSTOSQ:
10151045
gc.Prog(x86.AREP)
10161046
gc.Prog(x86.ASTOSQ)

src/cmd/compile/internal/gc/go.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ func (v Val) Ctype() Ctype {
5555
}
5656

5757
type Pkg struct {
58-
Name string // package name
59-
Path string // string literal used in import statement
58+
Name string // package name, e.g. "sys"
59+
Path string // string literal used in import statement, e.g. "runtime/internal/sys"
6060
Pathsym *Sym
6161
Prefix string // escaped path for use in symbol table
6262
Imported bool // export data of this package was parsed
@@ -469,6 +469,9 @@ const (
469469

470470
// Set, use, or kill of carry bit.
471471
// Kill means we never look at the carry bit after this kind of instruction.
472+
// Originally for understanding ADC, RCR, and so on, but now also
473+
// tracks set, use, and kill of the zero and overflow bits as well.
474+
// TODO rename to {Set,Use,Kill}Flags
472475
SetCarry = 1 << 24
473476
UseCarry = 1 << 25
474477
KillCarry = 1 << 26

src/cmd/compile/internal/gc/inl.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ func inlnode(n *Node) *Node {
453453
if Debug['m'] > 3 {
454454
fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
455455
}
456-
if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case
456+
if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case
457457
n = mkinlcall(n, n.Left, n.Isddd)
458458
} else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions
459459
if n.Left.Sym.Def != nil {

src/cmd/compile/internal/gc/ssa.go

+76-1
Original file line numberDiff line numberDiff line change
@@ -2052,7 +2052,13 @@ func (s *state) expr(n *Node) *ssa.Value {
20522052
p, l, c := s.slice(n.Left.Type, v, i, j, k)
20532053
return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
20542054

2055-
case OCALLFUNC, OCALLINTER, OCALLMETH:
2055+
case OCALLFUNC:
2056+
if isIntrinsicCall1(n) {
2057+
return s.intrinsicCall1(n)
2058+
}
2059+
fallthrough
2060+
2061+
case OCALLINTER, OCALLMETH:
20562062
a := s.call(n, callNormal)
20572063
return s.newValue2(ssa.OpLoad, n.Type, a, s.mem())
20582064

@@ -2373,6 +2379,75 @@ const (
23732379
callGo
23742380
)
23752381

2382+
// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
2383+
// that can be handled by the SSA backend.
2384+
// SSA uses this, but so does the front end to see if should not
2385+
// inline a function because it is a candidate for intrinsic
2386+
// substitution.
2387+
func isSSAIntrinsic1(s *Sym) bool {
2388+
// The test below is not quite accurate -- in the event that
2389+
// a function is disabled on a per-function basis, for example
2390+
// because of hash-keyed binary failure search, SSA might be
2391+
// disabled for that function but it would not be noted here,
2392+
// and thus an inlining would not occur (in practice, inlining
2393+
// so far has only been noticed for Bswap32 and the 16-bit count
2394+
// leading/trailing instructions, but heuristics might change
2395+
// in the future or on different architectures).
2396+
if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' {
2397+
return false
2398+
}
2399+
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
2400+
switch s.Name {
2401+
case
2402+
"Ctz64", "Ctz32", "Ctz16",
2403+
"Bswap64", "Bswap32":
2404+
return true
2405+
}
2406+
}
2407+
return false
2408+
}
2409+
2410+
func isIntrinsicCall1(n *Node) bool {
2411+
if n == nil || n.Left == nil {
2412+
return false
2413+
}
2414+
return isSSAIntrinsic1(n.Left.Sym)
2415+
}
2416+
2417+
// intrinsicFirstArg extracts arg from n.List and eval
2418+
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
2419+
x := n.List.First()
2420+
if x.Op == OAS {
2421+
x = x.Right
2422+
}
2423+
return s.expr(x)
2424+
}
2425+
2426+
// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
2427+
// into the intrinsic
2428+
func (s *state) intrinsicCall1(n *Node) *ssa.Value {
2429+
var result *ssa.Value
2430+
switch n.Left.Sym.Name {
2431+
case "Ctz64":
2432+
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
2433+
case "Ctz32":
2434+
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
2435+
case "Ctz16":
2436+
result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
2437+
case "Bswap64":
2438+
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
2439+
case "Bswap32":
2440+
result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
2441+
}
2442+
if result == nil {
2443+
Fatalf("Unknown special call: %v", n.Left.Sym)
2444+
}
2445+
if ssa.IntrinsicsDebug > 0 {
2446+
Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
2447+
}
2448+
return result
2449+
}
2450+
23762451
// Calls the function n using the specified call type.
23772452
// Returns the address of the return value (or nil if none).
23782453
func (s *state) call(n *Node, k callKind) *ssa.Value {

src/cmd/compile/internal/ssa/compile.go

+18
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ type pass struct {
120120
// Run consistency checker between each phase
121121
var checkEnabled = false
122122

123+
// Debug output
124+
var IntrinsicsDebug int
125+
var IntrinsicsDisable bool
126+
123127
// PhaseOption sets the specified flag in the specified ssa phase,
124128
// returning empty string if this was successful or a string explaining
125129
// the error if it was not.
@@ -157,6 +161,20 @@ func PhaseOption(phase, flag string, val int) string {
157161
}
158162
}
159163

164+
if phase == "intrinsics" {
165+
switch flag {
166+
case "on":
167+
IntrinsicsDisable = val == 0
168+
case "off":
169+
IntrinsicsDisable = val != 0
170+
case "debug":
171+
IntrinsicsDebug = val
172+
default:
173+
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
174+
}
175+
return ""
176+
}
177+
160178
underphase := strings.Replace(phase, "_", " ", -1)
161179
var re *regexp.Regexp
162180
if phase[0] == '~' {

src/cmd/compile/internal/ssa/gen/AMD64.rules

+32
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,38 @@
9292
(Com16 x) -> (NOTW x)
9393
(Com8 x) -> (NOTB x)
9494

95+
// CMPQconst 0 below is redundant because BSF sets Z but how to remove?
96+
(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
97+
(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
98+
(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
99+
100+
(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
101+
(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
102+
(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
103+
104+
(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
105+
(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
106+
(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
107+
108+
(CMOVQEQconst x (FlagLT_ULT)) -> x
109+
(CMOVLEQconst x (FlagLT_ULT)) -> x
110+
(CMOVWEQconst x (FlagLT_ULT)) -> x
111+
112+
(CMOVQEQconst x (FlagLT_UGT)) -> x
113+
(CMOVLEQconst x (FlagLT_UGT)) -> x
114+
(CMOVWEQconst x (FlagLT_UGT)) -> x
115+
116+
(CMOVQEQconst x (FlagGT_ULT)) -> x
117+
(CMOVLEQconst x (FlagGT_ULT)) -> x
118+
(CMOVWEQconst x (FlagGT_ULT)) -> x
119+
120+
(CMOVQEQconst x (FlagGT_UGT)) -> x
121+
(CMOVLEQconst x (FlagGT_UGT)) -> x
122+
(CMOVWEQconst x (FlagGT_UGT)) -> x
123+
124+
(Bswap64 x) -> (BSWAPQ x)
125+
(Bswap32 x) -> (BSWAPL x)
126+
95127
(Sqrt x) -> (SQRTSD x)
96128

97129
// Note: we always extend to 64 bits even though some ops don't need that many result bits.

src/cmd/compile/internal/ssa/gen/AMD64Ops.go

+26-3
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,13 @@ func init() {
103103
gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
104104
clobbers: ax | flags}
105105

106-
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
107-
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
108-
flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
106+
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
107+
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
108+
flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
109+
110+
// for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp.
111+
gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
112+
109113
readflags = regInfo{inputs: flagsonly, outputs: gponly}
110114
flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
111115

@@ -307,6 +311,25 @@ func init() {
307311
{name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
308312
{name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
309313

314+
{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
315+
{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
316+
{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero
317+
318+
{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
319+
{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
320+
{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero
321+
322+
// Note ASM for ops moves whole register
323+
{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
324+
{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
325+
{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
326+
{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
327+
{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
328+
{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set
329+
330+
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
331+
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes
332+
310333
{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
311334

312335
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.

src/cmd/compile/internal/ssa/gen/genericOps.go

+11
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,17 @@ var genericOps = []opData{
237237
{name: "Com32", argLength: 1},
238238
{name: "Com64", argLength: 1},
239239

240+
{name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
241+
{name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
242+
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
243+
244+
{name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
245+
{name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
246+
{name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
247+
248+
{name: "Bswap32", argLength: 1}, // Swap bytes
249+
{name: "Bswap64", argLength: 1}, // Swap bytes
250+
240251
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
241252

242253
// Data movement, max argument length for Phi is indefinite so just pick

0 commit comments

Comments
 (0)