1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s 3; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s 4 5; Wrong add/shift amount. Should be 32 for shift of 6. 6define <vscale x 2 x i64> @neg_urshr_1(<vscale x 2 x i64> %x) { 7; CHECK-LABEL: neg_urshr_1: 8; CHECK: // %bb.0: 9; CHECK-NEXT: add z0.d, z0.d, #16 // =0x10 10; CHECK-NEXT: lsr z0.d, z0.d, #6 11; CHECK-NEXT: ret 12 %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 16) 13 %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) 14 ret <vscale x 2 x i64> %sh 15} 16 17; Vector Shift. 18define <vscale x 2 x i64> @neg_urshr_2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 19; CHECK-LABEL: neg_urshr_2: 20; CHECK: // %bb.0: 21; CHECK-NEXT: add z0.d, z0.d, #32 // =0x20 22; CHECK-NEXT: ptrue p0.d 23; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d 24; CHECK-NEXT: ret 25 %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) 26 %sh = lshr <vscale x 2 x i64> %add, %y 27 ret <vscale x 2 x i64> %sh 28} 29 30; Vector Add. 31define <vscale x 2 x i64> @neg_urshr_3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 32; CHECK-LABEL: neg_urshr_3: 33; CHECK: // %bb.0: 34; CHECK-NEXT: add z0.d, z0.d, z1.d 35; CHECK-NEXT: lsr z0.d, z0.d, #6 36; CHECK-NEXT: ret 37 %add = add nuw nsw <vscale x 2 x i64> %x, %y 38 %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) 39 ret <vscale x 2 x i64> %sh 40} 41 42; Add has two uses. 43define <vscale x 2 x i64> @neg_urshr_4(<vscale x 2 x i64> %x, ptr %p) { 44; CHECK-LABEL: neg_urshr_4: 45; CHECK: // %bb.0: 46; CHECK-NEXT: mov z1.d, z0.d 47; CHECK-NEXT: ptrue p0.d 48; CHECK-NEXT: add z1.d, z1.d, #32 // =0x20 49; CHECK-NEXT: lsr z0.d, z1.d, #6 50; CHECK-NEXT: st1d { z1.d }, p0, [x0] 51; CHECK-NEXT: ret 52 %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) 53 %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) 54 store <vscale x 2 x i64> %add, ptr %p 55 ret <vscale x 2 x i64> %sh 56} 57 58; Add can overflow. 59define <vscale x 2 x i64> @neg_urshr_5(<vscale x 2 x i64> %x) { 60; CHECK-LABEL: neg_urshr_5: 61; CHECK: // %bb.0: 62; CHECK-NEXT: add z0.d, z0.d, #32 // =0x20 63; CHECK-NEXT: lsr z0.d, z0.d, #6 64; CHECK-NEXT: ret 65 %add = add <vscale x 2 x i64> %x, splat (i64 32) 66 %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) 67 ret <vscale x 2 x i64> %sh 68} 69 70define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i8> %x) { 71; SVE-LABEL: urshr_i8: 72; SVE: // %bb.0: 73; SVE-NEXT: add z0.b, z0.b, #32 // =0x20 74; SVE-NEXT: lsr z0.b, z0.b, #6 75; SVE-NEXT: ret 76; 77; SVE2-LABEL: urshr_i8: 78; SVE2: // %bb.0: 79; SVE2-NEXT: ptrue p0.b 80; SVE2-NEXT: urshr z0.b, p0/m, z0.b, #6 81; SVE2-NEXT: ret 82 %add = add nuw nsw <vscale x 16 x i8> %x, splat (i8 32) 83 %sh = lshr <vscale x 16 x i8> %add, splat (i8 6) 84 ret <vscale x 16 x i8> %sh 85} 86 87define <vscale x 16 x i8> @urshr_8_wide_trunc(<vscale x 16 x i8> %x) { 88; SVE-LABEL: urshr_8_wide_trunc: 89; SVE: // %bb.0: 90; SVE-NEXT: uunpkhi z1.h, z0.b 91; SVE-NEXT: uunpklo z0.h, z0.b 92; SVE-NEXT: add z0.h, z0.h, #32 // =0x20 93; SVE-NEXT: add z1.h, z1.h, #32 // =0x20 94; SVE-NEXT: lsr z1.h, z1.h, #6 95; SVE-NEXT: lsr z0.h, z0.h, #6 96; SVE-NEXT: uzp1 z0.b, z0.b, z1.b 97; SVE-NEXT: ret 98; 99; SVE2-LABEL: urshr_8_wide_trunc: 100; SVE2: // %bb.0: 101; SVE2-NEXT: ptrue p0.b 102; SVE2-NEXT: urshr z0.b, p0/m, z0.b, #6 103; SVE2-NEXT: ret 104 %ext = zext <vscale x 16 x i8> %x to <vscale x 16 x i16> 105 %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 32) 106 %sh = lshr <vscale x 16 x i16> %add, splat (i16 6) 107 %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8> 108 ret <vscale x 16 x i8> %sht 109} 110 111define <vscale x 16 x i8> @urshr_8_wide_trunc_nomerge(<vscale x 16 x i16> %ext) { 112; SVE-LABEL: urshr_8_wide_trunc_nomerge: 113; SVE: // %bb.0: 114; SVE-NEXT: add z0.h, z0.h, #256 // =0x100 115; SVE-NEXT: add z1.h, z1.h, #256 // =0x100 116; SVE-NEXT: lsr z1.h, z1.h, #9 117; SVE-NEXT: lsr z0.h, z0.h, #9 118; SVE-NEXT: uzp1 z0.b, z0.b, z1.b 119; SVE-NEXT: ret 120; 121; SVE2-LABEL: urshr_8_wide_trunc_nomerge: 122; SVE2: // %bb.0: 123; SVE2-NEXT: ptrue p0.h 124; SVE2-NEXT: urshr z1.h, p0/m, z1.h, #9 125; SVE2-NEXT: urshr z0.h, p0/m, z0.h, #9 126; SVE2-NEXT: uzp1 z0.b, z0.b, z1.b 127; SVE2-NEXT: ret 128 %add = add nuw nsw <vscale x 16 x i16> %ext, splat (i16 256) 129 %sh = lshr <vscale x 16 x i16> %add, splat (i16 9) 130 %sht = trunc <vscale x 16 x i16> %sh to <vscale x 16 x i8> 131 ret <vscale x 16 x i8> %sht 132} 133 134define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i16> %x) { 135; SVE-LABEL: urshr_i16: 136; SVE: // %bb.0: 137; SVE-NEXT: add z0.h, z0.h, #32 // =0x20 138; SVE-NEXT: lsr z0.h, z0.h, #6 139; SVE-NEXT: ret 140; 141; SVE2-LABEL: urshr_i16: 142; SVE2: // %bb.0: 143; SVE2-NEXT: ptrue p0.h 144; SVE2-NEXT: urshr z0.h, p0/m, z0.h, #6 145; SVE2-NEXT: ret 146 %add = add nuw nsw <vscale x 8 x i16> %x, splat (i16 32) 147 %sh = lshr <vscale x 8 x i16> %add, splat (i16 6) 148 ret <vscale x 8 x i16> %sh 149} 150 151define <vscale x 8 x i16> @urshr_16_wide_trunc(<vscale x 8 x i16> %x) { 152; SVE-LABEL: urshr_16_wide_trunc: 153; SVE: // %bb.0: 154; SVE-NEXT: uunpkhi z1.s, z0.h 155; SVE-NEXT: uunpklo z0.s, z0.h 156; SVE-NEXT: add z0.s, z0.s, #32 // =0x20 157; SVE-NEXT: add z1.s, z1.s, #32 // =0x20 158; SVE-NEXT: lsr z1.s, z1.s, #6 159; SVE-NEXT: lsr z0.s, z0.s, #6 160; SVE-NEXT: uzp1 z0.h, z0.h, z1.h 161; SVE-NEXT: ret 162; 163; SVE2-LABEL: urshr_16_wide_trunc: 164; SVE2: // %bb.0: 165; SVE2-NEXT: ptrue p0.h 166; SVE2-NEXT: urshr z0.h, p0/m, z0.h, #6 167; SVE2-NEXT: ret 168 %ext = zext <vscale x 8 x i16> %x to <vscale x 8 x i32> 169 %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 32) 170 %sh = lshr <vscale x 8 x i32> %add, splat (i32 6) 171 %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16> 172 ret <vscale x 8 x i16> %sht 173} 174 175define <vscale x 8 x i16> @urshr_16_wide_trunc_nomerge(<vscale x 8 x i32> %ext) { 176; SVE-LABEL: urshr_16_wide_trunc_nomerge: 177; SVE: // %bb.0: 178; SVE-NEXT: mov z2.s, #0x10000 179; SVE-NEXT: add z0.s, z0.s, z2.s 180; SVE-NEXT: add z1.s, z1.s, z2.s 181; SVE-NEXT: lsr z1.s, z1.s, #17 182; SVE-NEXT: lsr z0.s, z0.s, #17 183; SVE-NEXT: uzp1 z0.h, z0.h, z1.h 184; SVE-NEXT: ret 185; 186; SVE2-LABEL: urshr_16_wide_trunc_nomerge: 187; SVE2: // %bb.0: 188; SVE2-NEXT: ptrue p0.s 189; SVE2-NEXT: urshr z1.s, p0/m, z1.s, #17 190; SVE2-NEXT: urshr z0.s, p0/m, z0.s, #17 191; SVE2-NEXT: uzp1 z0.h, z0.h, z1.h 192; SVE2-NEXT: ret 193 %add = add nuw nsw <vscale x 8 x i32> %ext, splat (i32 65536) 194 %sh = lshr <vscale x 8 x i32> %add, splat (i32 17) 195 %sht = trunc <vscale x 8 x i32> %sh to <vscale x 8 x i16> 196 ret <vscale x 8 x i16> %sht 197} 198 199define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i32> %x) { 200; SVE-LABEL: urshr_i32: 201; SVE: // %bb.0: 202; SVE-NEXT: add z0.s, z0.s, #32 // =0x20 203; SVE-NEXT: lsr z0.s, z0.s, #6 204; SVE-NEXT: ret 205; 206; SVE2-LABEL: urshr_i32: 207; SVE2: // %bb.0: 208; SVE2-NEXT: ptrue p0.s 209; SVE2-NEXT: urshr z0.s, p0/m, z0.s, #6 210; SVE2-NEXT: ret 211 %add = add nuw nsw <vscale x 4 x i32> %x, splat (i32 32) 212 %sh = lshr <vscale x 4 x i32> %add, splat (i32 6) 213 ret <vscale x 4 x i32> %sh 214} 215 216define <vscale x 4 x i32> @urshr_32_wide_trunc(<vscale x 4 x i32> %x) { 217; SVE-LABEL: urshr_32_wide_trunc: 218; SVE: // %bb.0: 219; SVE-NEXT: uunpkhi z1.d, z0.s 220; SVE-NEXT: uunpklo z0.d, z0.s 221; SVE-NEXT: add z0.d, z0.d, #32 // =0x20 222; SVE-NEXT: add z1.d, z1.d, #32 // =0x20 223; SVE-NEXT: lsr z1.d, z1.d, #6 224; SVE-NEXT: lsr z0.d, z0.d, #6 225; SVE-NEXT: uzp1 z0.s, z0.s, z1.s 226; SVE-NEXT: ret 227; 228; SVE2-LABEL: urshr_32_wide_trunc: 229; SVE2: // %bb.0: 230; SVE2-NEXT: ptrue p0.s 231; SVE2-NEXT: urshr z0.s, p0/m, z0.s, #6 232; SVE2-NEXT: ret 233 %ext = zext <vscale x 4 x i32> %x to <vscale x 4 x i64> 234 %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 32) 235 %sh = lshr <vscale x 4 x i64> %add, splat (i64 6) 236 %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32> 237 ret <vscale x 4 x i32> %sht 238} 239 240define <vscale x 4 x i32> @urshr_32_wide_trunc_nomerge(<vscale x 4 x i64> %ext) { 241; SVE-LABEL: urshr_32_wide_trunc_nomerge: 242; SVE: // %bb.0: 243; SVE-NEXT: mov z2.d, #0x100000000 244; SVE-NEXT: add z0.d, z0.d, z2.d 245; SVE-NEXT: add z1.d, z1.d, z2.d 246; SVE-NEXT: lsr z1.d, z1.d, #33 247; SVE-NEXT: lsr z0.d, z0.d, #33 248; SVE-NEXT: uzp1 z0.s, z0.s, z1.s 249; SVE-NEXT: ret 250; 251; SVE2-LABEL: urshr_32_wide_trunc_nomerge: 252; SVE2: // %bb.0: 253; SVE2-NEXT: ptrue p0.d 254; SVE2-NEXT: urshr z1.d, p0/m, z1.d, #33 255; SVE2-NEXT: urshr z0.d, p0/m, z0.d, #33 256; SVE2-NEXT: uzp1 z0.s, z0.s, z1.s 257; SVE2-NEXT: ret 258 %add = add nuw nsw <vscale x 4 x i64> %ext, splat (i64 4294967296) 259 %sh = lshr <vscale x 4 x i64> %add, splat (i64 33) 260 %sht = trunc <vscale x 4 x i64> %sh to <vscale x 4 x i32> 261 ret <vscale x 4 x i32> %sht 262} 263 264define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i64> %x) { 265; SVE-LABEL: urshr_i64: 266; SVE: // %bb.0: 267; SVE-NEXT: add z0.d, z0.d, #32 // =0x20 268; SVE-NEXT: lsr z0.d, z0.d, #6 269; SVE-NEXT: ret 270; 271; SVE2-LABEL: urshr_i64: 272; SVE2: // %bb.0: 273; SVE2-NEXT: ptrue p0.d 274; SVE2-NEXT: urshr z0.d, p0/m, z0.d, #6 275; SVE2-NEXT: ret 276 %add = add nuw nsw <vscale x 2 x i64> %x, splat (i64 32) 277 %sh = lshr <vscale x 2 x i64> %add, splat (i64 6) 278 ret <vscale x 2 x i64> %sh 279} 280