1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4 5declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 6declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 7declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 8declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 9 10declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 11declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 12 13; Tests showing replacement of variable rotates with immediate splat versions. 14 15define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 16; KNL-LABEL: test_splat_rol_v16i32: 17; KNL: # %bb.0: 18; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 19; KNL-NEXT: kmovw %edi, %k1 20; KNL-NEXT: vprold $5, %zmm0, %zmm3 {%k1} 21; KNL-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z} 22; KNL-NEXT: vprold $7, %zmm0, %zmm2 23; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 24; KNL-NEXT: retq 25; 26; SKX-LABEL: test_splat_rol_v16i32: 27; SKX: # %bb.0: 28; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 29; SKX-NEXT: kmovd %edi, %k1 30; SKX-NEXT: vprold $5, %zmm0, %zmm3 {%k1} 31; SKX-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z} 32; SKX-NEXT: vprold $7, %zmm0, %zmm2 33; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 34; SKX-NEXT: retq 35 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) 36 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2) 37 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1) 38 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 39 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 40 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 41 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 42} 43 44define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 45; KNL-LABEL: test_splat_rol_v8i64: 46; KNL: # %bb.0: 47; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 48; KNL-NEXT: kmovw %edi, %k1 49; KNL-NEXT: vprolq $5, %zmm0, %zmm3 {%k1} 50; KNL-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z} 51; KNL-NEXT: vprolq $7, %zmm0, %zmm2 52; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 53; KNL-NEXT: retq 54; 55; SKX-LABEL: test_splat_rol_v8i64: 56; SKX: # %bb.0: 57; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 58; SKX-NEXT: kmovd %edi, %k1 59; SKX-NEXT: vprolq $5, %zmm0, %zmm3 {%k1} 60; SKX-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z} 61; SKX-NEXT: vprolq $7, %zmm0, %zmm2 62; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 63; SKX-NEXT: retq 64 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) 65 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2) 66 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1) 67 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 68 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 69 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 70 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 71} 72 73define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 74; KNL-LABEL: test_splat_ror_v16i32: 75; KNL: # %bb.0: 76; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 77; KNL-NEXT: kmovw %edi, %k1 78; KNL-NEXT: vprord $5, %zmm0, %zmm3 {%k1} 79; KNL-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z} 80; KNL-NEXT: vprord $7, %zmm0, %zmm2 81; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 82; KNL-NEXT: retq 83; 84; SKX-LABEL: test_splat_ror_v16i32: 85; SKX: # %bb.0: 86; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 87; SKX-NEXT: kmovd %edi, %k1 88; SKX-NEXT: vprord $5, %zmm0, %zmm3 {%k1} 89; SKX-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z} 90; SKX-NEXT: vprord $7, %zmm0, %zmm2 91; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 92; SKX-NEXT: retq 93 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) 94 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2) 95 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1) 96 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 97 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 98 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 99 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 100} 101 102define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 103; KNL-LABEL: test_splat_ror_v8i64: 104; KNL: # %bb.0: 105; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 106; KNL-NEXT: kmovw %edi, %k1 107; KNL-NEXT: vprorq $5, %zmm0, %zmm3 {%k1} 108; KNL-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z} 109; KNL-NEXT: vprorq $7, %zmm0, %zmm2 110; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 111; KNL-NEXT: retq 112; 113; SKX-LABEL: test_splat_ror_v8i64: 114; SKX: # %bb.0: 115; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 116; SKX-NEXT: kmovd %edi, %k1 117; SKX-NEXT: vprorq $5, %zmm0, %zmm3 {%k1} 118; SKX-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z} 119; SKX-NEXT: vprorq $7, %zmm0, %zmm2 120; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 121; SKX-NEXT: retq 122 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) 123 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2) 124 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1) 125 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 126 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 127 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 128 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 129} 130 131; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions. 132 133define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 134; KNL-LABEL: test_splat_bounds_rol_v16i32: 135; KNL: # %bb.0: 136; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 137; KNL-NEXT: kmovw %edi, %k1 138; KNL-NEXT: vprold $1, %zmm0, %zmm3 {%k1} 139; KNL-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z} 140; KNL-NEXT: vprold $30, %zmm0, %zmm2 141; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 142; KNL-NEXT: retq 143; 144; SKX-LABEL: test_splat_bounds_rol_v16i32: 145; SKX: # %bb.0: 146; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 147; SKX-NEXT: kmovd %edi, %k1 148; SKX-NEXT: vprold $1, %zmm0, %zmm3 {%k1} 149; SKX-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z} 150; SKX-NEXT: vprold $30, %zmm0, %zmm2 151; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 152; SKX-NEXT: retq 153 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) 154 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) 155 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) 156 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 157 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 158 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 159 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 160} 161 162define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 163; KNL-LABEL: test_splat_bounds_rol_v8i64: 164; KNL: # %bb.0: 165; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 166; KNL-NEXT: kmovw %edi, %k1 167; KNL-NEXT: vprolq $62, %zmm0, %zmm3 {%k1} 168; KNL-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z} 169; KNL-NEXT: vprolq $63, %zmm0, %zmm2 170; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 171; KNL-NEXT: retq 172; 173; SKX-LABEL: test_splat_bounds_rol_v8i64: 174; SKX: # %bb.0: 175; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 176; SKX-NEXT: kmovd %edi, %k1 177; SKX-NEXT: vprolq $62, %zmm0, %zmm3 {%k1} 178; SKX-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z} 179; SKX-NEXT: vprolq $63, %zmm0, %zmm2 180; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 181; SKX-NEXT: retq 182 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) 183 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) 184 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) 185 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 186 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 187 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 188 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 189} 190 191define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 192; KNL-LABEL: test_splat_bounds_ror_v16i32: 193; KNL: # %bb.0: 194; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 195; KNL-NEXT: kmovw %edi, %k1 196; KNL-NEXT: vprord $1, %zmm0, %zmm3 {%k1} 197; KNL-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z} 198; KNL-NEXT: vprord $30, %zmm0, %zmm2 199; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 200; KNL-NEXT: retq 201; 202; SKX-LABEL: test_splat_bounds_ror_v16i32: 203; SKX: # %bb.0: 204; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 205; SKX-NEXT: kmovd %edi, %k1 206; SKX-NEXT: vprord $1, %zmm0, %zmm3 {%k1} 207; SKX-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z} 208; SKX-NEXT: vprord $30, %zmm0, %zmm2 209; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 210; SKX-NEXT: retq 211 %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) 212 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) 213 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) 214 %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 215 %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 216 %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 217 ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 218} 219 220define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 221; KNL-LABEL: test_splat_bounds_ror_v8i64: 222; KNL: # %bb.0: 223; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 224; KNL-NEXT: kmovw %edi, %k1 225; KNL-NEXT: vprorq $62, %zmm0, %zmm3 {%k1} 226; KNL-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z} 227; KNL-NEXT: vprorq $63, %zmm0, %zmm2 228; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 229; KNL-NEXT: retq 230; 231; SKX-LABEL: test_splat_bounds_ror_v8i64: 232; SKX: # %bb.0: 233; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 234; SKX-NEXT: kmovd %edi, %k1 235; SKX-NEXT: vprorq $62, %zmm0, %zmm3 {%k1} 236; SKX-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z} 237; SKX-NEXT: vprorq $63, %zmm0, %zmm2 238; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 239; SKX-NEXT: retq 240 %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) 241 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) 242 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) 243 %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 244 %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 245 %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 246 ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 247} 248 249; Constant folding 250; We also test with a target shuffle so that this can't be constant folded upon creation, it must 251; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively. 252 253define <8 x i64> @test_fold_rol_v8i64() { 254; CHECK-LABEL: test_fold_rol_v8i64: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808] 257; CHECK-NEXT: retq 258 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1) 259 ret <8 x i64> %res 260} 261 262define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) { 263; CHECK-LABEL: test_fold_rol_v16i32: 264; CHECK: # %bb.0: 265; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 266; CHECK-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 267; CHECK-NEXT: retq 268 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1) 269 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1) 270 ret <16 x i32> %res1 271} 272 273define <8 x i64> @test_fold_ror_v8i64() { 274; CHECK-LABEL: test_fold_ror_v8i64: 275; CHECK: # %bb.0: 276; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1] 277; CHECK-NEXT: vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 278; CHECK-NEXT: retq 279 %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1) 280 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1) 281 ret <8 x i64> %res1 282} 283 284define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) { 285; CHECK-LABEL: test_fold_ror_v16i32: 286; CHECK: # %bb.0: 287; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 288; CHECK-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 289; CHECK-NEXT: retq 290 %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1) 291 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1) 292 ret <16 x i32> %res1 293} 294