diff --git a/src/tests/JIT/opt/SVE/PredicateInstructions.cs b/src/tests/JIT/opt/SVE/PredicateInstructions.cs index 787e4c00a50f88..03f82eda1b0bce 100644 --- a/src/tests/JIT/opt/SVE/PredicateInstructions.cs +++ b/src/tests/JIT/opt/SVE/PredicateInstructions.cs @@ -11,6 +11,9 @@ public class PredicateInstructions { + private static readonly float[] s_floatValues = new float[64]; + private static readonly double[] s_doubleValues = new double[64]; + [MethodImpl(MethodImplOptions.NoInlining)] [Fact] public static void TestPredicateInstructions() @@ -40,6 +43,17 @@ public static void TestPredicateInstructions() UnzipEvenZipLowMask(vecs, vecs); TransposeEvenAndMask(vecs, vecs, vecs); + PredicateCastFloatLoad(s_floatValues, 0, s_floatValues.Length); + PredicateCastFloatLocalLoad(s_floatValues, 0, s_floatValues.Length); + PointerCastFloatLoad(s_floatValues, 0, s_floatValues.Length); + WhileLessThanSingleFloatLoad(s_floatValues, 0, s_floatValues.Length); + PredicateCastFloatLoop(s_floatValues, s_floatValues, s_floatValues.Length); + + PredicateCastDoubleLoad(s_doubleValues, 0, s_doubleValues.Length); + PredicateCastDoubleLocalLoad(s_doubleValues, 0, s_doubleValues.Length); + PointerCastDoubleLoad(s_doubleValues, 0, s_doubleValues.Length); + WhileLessThanDoubleLoad(s_doubleValues, 0, s_doubleValues.Length); + PredicateCastDoubleLoop(s_doubleValues, s_doubleValues, s_doubleValues.Length); } } @@ -179,4 +193,172 @@ static Vector TransposeEvenAndMask(Vector v, Vector a, Vect Sve.And(Sve.CompareGreaterThan(a, b), Sve.CompareEqual(a, b)), Sve.CompareLessThan(a, b))); } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PredicateCastFloatLoad(float[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.s, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1w { {{z[0-9]+}}.s }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (float* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskUInt32(index, length); + return Sve.LoadVector((Vector)mask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PointerCastFloatLoad(float[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.s, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1w { {{z[0-9]+}}.s }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (float* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskUInt32(index, length); + return (Vector)Sve.LoadVector(mask, (uint*)(ptr + index)); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PredicateCastFloatLocalLoad(float[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.s, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1w { {{z[0-9]+}}.s }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (float* ptr = values) + { + Vector uintMask = Sve.CreateWhileLessThanMaskUInt32(index, length); + Vector floatMask = (Vector)uintMask; + return Sve.LoadVector(floatMask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector WhileLessThanSingleFloatLoad(float[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.s, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1w { {{z[0-9]+}}.s }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (float* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskSingle(index, length); + return Sve.LoadVector(mask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe void PredicateCastFloatLoop(float[] input, float[] output, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.s, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1w { {{z[0-9]+}}.s }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + //ARM64-NOT: mov {{z[0-9]+}}.s, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: st1w { {{z[0-9]+}}.s }, {{p[0-9]+}}, [{{x[0-9]+}}] + fixed (float* inputPtr = input, outputPtr = output) + { + int i = 0; + int count = (int)Sve.Count32BitElements(); + + while (i < length) + { + Vector loopMask = Sve.CreateWhileLessThanMaskUInt32(i, length); + Vector floatMask = (Vector)loopMask; + Vector value = Sve.LoadVector(floatMask, inputPtr + i); + Sve.StoreAndZip(floatMask, outputPtr + i, value); + + i += count; + } + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PredicateCastDoubleLoad(double[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.d, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1d { {{z[0-9]+}}.d }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (double* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskUInt64(index, length); + return Sve.LoadVector((Vector)mask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PointerCastDoubleLoad(double[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.d, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1d { {{z[0-9]+}}.d }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (double* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskUInt64(index, length); + return (Vector)Sve.LoadVector(mask, (ulong*)(ptr + index)); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector PredicateCastDoubleLocalLoad(double[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.d, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1d { {{z[0-9]+}}.d }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (double* ptr = values) + { + Vector ulongMask = Sve.CreateWhileLessThanMaskUInt64(index, length); + Vector doubleMask = (Vector)ulongMask; + return Sve.LoadVector(doubleMask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe Vector WhileLessThanDoubleLoad(double[] values, int index, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.d, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1d { {{z[0-9]+}}.d }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + fixed (double* ptr = values) + { + Vector mask = Sve.CreateWhileLessThanMaskDouble(index, length); + return Sve.LoadVector(mask, ptr + index); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe void PredicateCastDoubleLoop(double[] input, double[] output, int length) + { + //ARM64-FULL-LINE: whilelt {{p[0-9]+}}.d, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: ld1d { {{z[0-9]+}}.d }, {{p[0-9]+}}/z, [{{x[0-9]+}}] + //ARM64-NOT: mov {{z[0-9]+}}.d, {{p[0-9]+}}/z, #1 + //ARM64-NOT: {{^ *}}cmp + //ARM64-FULL-LINE: st1d { {{z[0-9]+}}.d }, {{p[0-9]+}}, [{{x[0-9]+}}] + fixed (double* inputPtr = input, outputPtr = output) + { + int i = 0; + int count = (int)Sve.Count64BitElements(); + + while (i < length) + { + Vector loopMask = Sve.CreateWhileLessThanMaskUInt64(i, length); + Vector doubleMask = (Vector)loopMask; + Vector value = Sve.LoadVector(doubleMask, inputPtr + i); + Sve.StoreAndZip(doubleMask, outputPtr + i, value); + + i += count; + } + } + } }