Skip to content

Commit 9e09812

Browse files
committed
all: REVERSE MERGE dev.simd (c456ab7) into master
This commit is a REVERSE MERGE. It merges dev.simd back into its parent branch, master. This marks the end of development on dev.simd. Merge List: + 2025-12-08 c456ab7 [dev.simd] all: merge master (a33bbf1) into dev.simd + 2025-12-08 1d8711e [dev.simd] internal/buildcfg: don't enable SIMD experiment by default + 2025-12-08 f38e968 [dev.simd] cmd/compile: zero only low 128-bit of X15 + 2025-12-08 144cf17 [dev.simd] simd, cmd/compile: move "simd" to "simd/archsimd" + 2025-12-08 3417b48 [dev.simd] simd: add carryless multiply + 2025-12-05 f51ee08 [dev.simd] simd: replace checking loops with call to slice-checker + 2025-12-03 2b91d96 [dev.simd] internal/buildcfg: turn GOEXPERIMENT=simd back on Change-Id: Ife3f2ca4f6d8ce131335c0f868358db6a6a1a534
2 parents a33bbf1 + c456ab7 commit 9e09812

File tree

133 files changed

+2434
-2267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+2434
-2267
lines changed

src/cmd/compile/internal/amd64/simdssa.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/amd64/ssa.go

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"cmd/internal/obj"
1919
"cmd/internal/obj/x86"
2020
"internal/abi"
21-
"internal/buildcfg"
2221
)
2322

2423
// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
@@ -1718,7 +1717,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
17181717
case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
17191718
s.Prog(v.Op.Asm())
17201719

1721-
case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512: // no code emitted
1720+
case ssa.OpAMD64Zero128: // no code emitted
1721+
1722+
case ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
1723+
p := s.Prog(v.Op.Asm())
1724+
p.From.Type = obj.TYPE_REG
1725+
p.From.Reg = simdReg(v)
1726+
p.AddRestSourceReg(simdReg(v))
1727+
p.To.Type = obj.TYPE_REG
1728+
p.To.Reg = simdReg(v)
17221729

17231730
case ssa.OpAMD64VMOVSSf2v, ssa.OpAMD64VMOVSDf2v:
17241731
// These are for initializing the least 32/64 bits of a SIMD register from a "float".
@@ -1871,34 +1878,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
18711878

18721879
// zeroX15 zeroes the X15 register.
18731880
func zeroX15(s *ssagen.State) {
1874-
if !buildcfg.Experiment.SIMD {
1875-
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1876-
return
1877-
}
1878-
vxorps := func(s *ssagen.State) {
1879-
p := s.Prog(x86.AVXORPS)
1880-
p.From.Type = obj.TYPE_REG
1881-
p.From.Reg = x86.REG_X15
1882-
p.AddRestSourceReg(x86.REG_X15)
1883-
p.To.Type = obj.TYPE_REG
1884-
p.To.Reg = x86.REG_X15
1885-
}
1886-
if buildcfg.GOAMD64 >= 3 {
1887-
vxorps(s)
1888-
return
1889-
}
1890-
// AVX may not be available, check before zeroing the high bits.
1891-
p := s.Prog(x86.ACMPB)
1892-
p.From.Type = obj.TYPE_MEM
1893-
p.From.Name = obj.NAME_EXTERN
1894-
p.From.Sym = ir.Syms.X86HasAVX
1895-
p.To.Type = obj.TYPE_CONST
1896-
p.To.Offset = 1
1897-
jmp := s.Prog(x86.AJNE)
1898-
jmp.To.Type = obj.TYPE_BRANCH
1899-
vxorps(s)
1900-
sse := opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1901-
jmp.To.SetTarget(sse)
1881+
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
19021882
}
19031883

19041884
// Example instruction: VRSQRTPS X1, X1

src/cmd/compile/internal/inline/inl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ type hairyVisitor struct {
445445

446446
func isDebugFn(fn *ir.Func) bool {
447447
// if n := fn.Nname; n != nil {
448-
// if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd" {
448+
// if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd/archsimd" {
449449
// fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
450450
// return true
451451
// }

src/cmd/compile/internal/ssa/_gen/AMD64Ops.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ func init() {
214214
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
215215
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
216216

217+
v01 = regInfo{inputs: nil, outputs: vonly}
217218
v11 = regInfo{inputs: vonly, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
218219
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
219220
vk = regInfo{inputs: vzonly, outputs: maskonly}
@@ -232,6 +233,7 @@ func init() {
232233
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
233234
v2flags = regInfo{inputs: []regMask{vz, vz}}
234235

236+
w01 = regInfo{inputs: nil, outputs: wonly}
235237
w11 = regInfo{inputs: wonly, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
236238
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
237239
wk = regInfo{inputs: wzonly, outputs: maskonly}
@@ -1398,12 +1400,15 @@ func init() {
13981400
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
13991401
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
14001402

1403+
// X15 is the zero register up to 128-bit. For larger values, we zero it on the fly.
14011404
{name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
1402-
{name: "Zero256", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
1403-
{name: "Zero512", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
1405+
{name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"},
1406+
{name: "Zero512", argLength: 0, reg: w01, asm: "VPXORQ"},
14041407

1408+
// Move a 32/64 bit float to a 128-bit SIMD register.
14051409
{name: "VMOVSDf2v", argLength: 1, reg: fpv, asm: "VMOVSD"},
14061410
{name: "VMOVSSf2v", argLength: 1, reg: fpv, asm: "VMOVSS"},
1411+
14071412
{name: "VMOVQ", argLength: 1, reg: gpv, asm: "VMOVQ"},
14081413
{name: "VMOVD", argLength: 1, reg: gpv, asm: "VMOVD"},
14091414

src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,9 @@
13331333
(blendMaskedInt16x32 x y mask) => (VPBLENDMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
13341334
(blendMaskedInt32x16 x y mask) => (VPBLENDMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
13351335
(blendMaskedInt64x8 x y mask) => (VPBLENDMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
1336+
(carrylessMultiplyUint64x2 ...) => (VPCLMULQDQ128 ...)
1337+
(carrylessMultiplyUint64x4 ...) => (VPCLMULQDQ256 ...)
1338+
(carrylessMultiplyUint64x8 ...) => (VPCLMULQDQ512 ...)
13361339
(concatSelectedConstantFloat32x4 ...) => (VSHUFPS128 ...)
13371340
(concatSelectedConstantFloat64x2 ...) => (VSHUFPD128 ...)
13381341
(concatSelectedConstantInt32x4 ...) => (VSHUFPS128 ...)

src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/_gen/simdgenericOps.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 77 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteAMD64.go

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssagen/intrinsics.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
16441644
// Only enable intrinsics, if SIMD experiment.
16451645
simdIntrinsics(addF)
16461646

1647-
addF("simd", "ClearAVXUpperBits",
1647+
addF(simdPackage, "ClearAVXUpperBits",
16481648
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
16491649
s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
16501650
return nil
@@ -1668,15 +1668,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
16681668
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
16691669
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
16701670

1671+
// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
16711672
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
1672-
addF("simd", method,
1673+
addF(simdPackage, method,
16731674
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
16741675
x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
16751676
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
1676-
return select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
1677-
} else {
1678-
return s.callResult(n, callNormal)
1677+
z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
1678+
if z != nil {
1679+
return z
1680+
}
16791681
}
1682+
return s.callResult(n, callNormal)
16801683
},
16811684
sys.AMD64)
16821685
}
@@ -1693,15 +1696,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
16931696
sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
16941697
sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
16951698

1699+
// sfp2 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
16961700
sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
1697-
addF("simd", method,
1701+
addF(simdPackage, method,
16981702
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
16991703
x, a, b, y := args[0], args[1], args[2], args[3]
17001704
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
1701-
return select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
1702-
} else {
1703-
return s.callResult(n, callNormal)
1705+
z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
1706+
if z != nil {
1707+
return z
1708+
}
17041709
}
1710+
return s.callResult(n, callNormal)
17051711
},
17061712
sys.AMD64)
17071713
}
@@ -1767,6 +1773,9 @@ const (
17671773

17681774
func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
17691775
a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
1776+
if a > 3 || b > 3 {
1777+
return nil
1778+
}
17701779
pattern := (a&2)>>1 + (b & 2)
17711780
a, b = a&1, b&1
17721781

@@ -1785,6 +1794,9 @@ func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type
17851794

17861795
func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
17871796
a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
1797+
if a > 7 || b > 7 || c > 7 || d > 7 {
1798+
return nil
1799+
}
17881800
pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
17891801

17901802
a, b, c, d = a&3, b&3, c&3, d&3
@@ -2154,7 +2166,7 @@ func findIntrinsic(sym *types.Sym) intrinsicBuilder {
21542166
fn := sym.Name
21552167
if ssa.IntrinsicsDisable {
21562168
if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") ||
2157-
pkg == "internal/simd" || pkg == "simd" { // TODO after simd has been moved to package simd, remove internal/simd
2169+
pkg == simdPackage {
21582170
// These runtime functions don't have definitions, must be intrinsics.
21592171
} else {
21602172
return nil

0 commit comments

Comments
 (0)