1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s 3 4declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 5 6define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 7; OPT-LABEL: @constant_load_i1( 8; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1 9; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1 10; OPT-NEXT: ret void 11; 12 %val = load i1, ptr addrspace(4) %in 13 store i1 %val, ptr addrspace(1) %out 14 ret void 15} 16 17define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 18; OPT-LABEL: @constant_load_i1_align2( 19; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 2 20; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2 21; OPT-NEXT: ret void 22; 23 %val = load i1, ptr addrspace(4) %in, align 2 24 store i1 %val, ptr addrspace(1) %out, align 2 25 ret void 26} 27 28define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 29; OPT-LABEL: @constant_load_i1_align4( 30; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 31; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i1 32; OPT-NEXT: store i1 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4 33; OPT-NEXT: ret void 34; 35 %val = load i1, ptr addrspace(4) %in, align 4 36 store i1 %val, ptr addrspace(1) %out, align 4 37 ret void 38} 39 40define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 41; OPT-LABEL: @constant_load_i8( 42; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 1 43; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1 44; OPT-NEXT: ret void 45; 46 %val = load i8, ptr addrspace(4) %in 47 store i8 %val, ptr addrspace(1) %out 48 ret void 49} 50 51define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 52; OPT-LABEL: @constant_load_i8_align2( 53; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 2 54; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2 55; OPT-NEXT: ret void 56; 57 %val = load i8, ptr addrspace(4) %in, align 2 58 store i8 %val, ptr addrspace(1) %out, align 2 59 ret void 60} 61 62define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 63; OPT-LABEL: @constant_load_i8align4( 64; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 65; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 66; OPT-NEXT: store i8 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4 67; OPT-NEXT: ret void 68; 69 %val = load i8, ptr addrspace(4) %in, align 4 70 store i8 %val, ptr addrspace(1) %out, align 4 71 ret void 72} 73 74define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 75; OPT-LABEL: @constant_load_v2i8( 76; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2 77; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 78; OPT-NEXT: ret void 79; 80 %ld = load <2 x i8>, ptr addrspace(4) %in 81 store <2 x i8> %ld, ptr addrspace(1) %out 82 ret void 83} 84 85define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 86; OPT-LABEL: @constant_load_v2i8_align4( 87; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 88; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 89; OPT-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to <2 x i8> 90; OPT-NEXT: store <2 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 91; OPT-NEXT: ret void 92; 93 %ld = load <2 x i8>, ptr addrspace(4) %in, align 4 94 store <2 x i8> %ld, ptr addrspace(1) %out, align 4 95 ret void 96} 97 98define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 99; OPT-LABEL: @constant_load_v3i8( 100; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 101; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24 102; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8> 103; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 104; OPT-NEXT: ret void 105; 106 %ld = load <3 x i8>, ptr addrspace(4) %in 107 store <3 x i8> %ld, ptr addrspace(1) %out 108 ret void 109} 110 111define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 112; OPT-LABEL: @constant_load_v3i8_align4( 113; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 114; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24 115; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8> 116; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 117; OPT-NEXT: ret void 118; 119 %ld = load <3 x i8>, ptr addrspace(4) %in, align 4 120 store <3 x i8> %ld, ptr addrspace(1) %out, align 4 121 ret void 122} 123 124define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 125; OPT-LABEL: @constant_load_i16( 126; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(4) [[IN:%.*]], align 2 127; OPT-NEXT: [[EXT:%.*]] = sext i16 [[LD]] to i32 128; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 129; OPT-NEXT: ret void 130; 131 %ld = load i16, ptr addrspace(4) %in 132 %ext = sext i16 %ld to i32 133 store i32 %ext, ptr addrspace(1) %out 134 ret void 135} 136 137define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 138; OPT-LABEL: @constant_load_i16_align4( 139; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 140; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 141; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 142; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 143; OPT-NEXT: ret void 144; 145 %ld = load i16, ptr addrspace(4) %in, align 4 146 %ext = sext i16 %ld to i32 147 store i32 %ext, ptr addrspace(1) %out, align 4 148 ret void 149} 150 151define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 152; OPT-LABEL: @constant_load_f16( 153; OPT-NEXT: [[LD:%.*]] = load half, ptr addrspace(4) [[IN:%.*]], align 2 154; OPT-NEXT: store half [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 155; OPT-NEXT: ret void 156; 157 %ld = load half, ptr addrspace(4) %in 158 store half %ld, ptr addrspace(1) %out 159 ret void 160} 161 162define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 163; OPT-LABEL: @constant_load_v2f16( 164; OPT-NEXT: [[LD:%.*]] = load <2 x half>, ptr addrspace(4) [[IN:%.*]], align 4 165; OPT-NEXT: store <2 x half> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 166; OPT-NEXT: ret void 167; 168 %ld = load <2 x half>, ptr addrspace(4) %in 169 store <2 x half> %ld, ptr addrspace(1) %out 170 ret void 171} 172 173define amdgpu_kernel void @load_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) { 174; OPT-LABEL: @load_volatile( 175; OPT-NEXT: [[A:%.*]] = load volatile i16, ptr addrspace(4) [[IN:%.*]], align 2 176; OPT-NEXT: store i16 [[A]], ptr addrspace(1) [[OUT:%.*]], align 2 177; OPT-NEXT: ret void 178; 179 %a = load volatile i16, ptr addrspace(4) %in 180 store i16 %a, ptr addrspace(1) %out 181 ret void 182} 183 184define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 185; OPT-LABEL: @constant_load_v2i8_volatile( 186; OPT-NEXT: [[LD:%.*]] = load volatile <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2 187; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 188; OPT-NEXT: ret void 189; 190 %ld = load volatile <2 x i8>, ptr addrspace(4) %in 191 store <2 x i8> %ld, ptr addrspace(1) %out 192 ret void 193} 194 195define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 196; OPT-LABEL: @constant_load_v2i8_addrspace1( 197; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(1) [[IN:%.*]], align 2 198; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 199; OPT-NEXT: ret void 200; 201 %ld = load <2 x i8>, ptr addrspace(1) %in 202 store <2 x i8> %ld, ptr addrspace(1) %out 203 ret void 204} 205 206define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 { 207; OPT-LABEL: @use_dispatch_ptr( 208; OPT-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 209; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4 210; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 211; OPT-NEXT: [[LD:%.*]] = zext i8 [[TMP3]] to i32 212; OPT-NEXT: store i32 [[LD]], ptr addrspace(1) [[PTR:%.*]], align 4 213; OPT-NEXT: ret void 214; 215 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 216 %val = load i8, ptr addrspace(4) %dispatch.ptr, align 4 217 %ld = zext i8 %val to i32 218 store i32 %ld, ptr addrspace(1) %ptr 219 ret void 220} 221 222define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 223; OPT-LABEL: @constant_load_i16_align4_range( 224; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0:![0-9]+]] 225; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 226; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 227; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 228; OPT-NEXT: ret void 229; 230 %ld = load i16, ptr addrspace(4) %in, align 4, !range !0 231 %ext = sext i16 %ld to i32 232 store i32 %ext, ptr addrspace(1) %out 233 ret void 234} 235 236define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 237; OPT-LABEL: @constant_load_i16_align4_range_max( 238; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0]] 239; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 240; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 241; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 242; OPT-NEXT: ret void 243; 244 %ld = load i16, ptr addrspace(4) %in, align 4, !range !1 245 %ext = sext i16 %ld to i32 246 store i32 %ext, ptr addrspace(1) %out 247 ret void 248} 249 250define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 251; OPT-LABEL: @constant_load_i16_align4_complex_range( 252; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG1:![0-9]+]] 253; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 254; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 255; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 256; OPT-NEXT: ret void 257; 258 %ld = load i16, ptr addrspace(4) %in, align 4, !range !2 259 %ext = sext i16 %ld to i32 260 store i32 %ext, ptr addrspace(1) %out 261 ret void 262} 263 264define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 265; OPT-LABEL: @constant_load_i16_align4_range_from_0( 266; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 267; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 268; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 269; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 270; OPT-NEXT: ret void 271; 272 %ld = load i16, ptr addrspace(4) %in, align 4, !range !3 273 %ext = sext i16 %ld to i32 274 store i32 %ext, ptr addrspace(1) %out 275 ret void 276} 277 278define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 279; OPT-LABEL: @constant_load_i16_align4_range_from_neg( 280; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2:![0-9]+]] 281; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 282; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 283; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 284; OPT-NEXT: ret void 285; 286 %ld = load i16, ptr addrspace(4) %in, align 4, !range !4 287 %ext = sext i16 %ld to i32 288 store i32 %ext, ptr addrspace(1) %out 289 ret void 290} 291 292define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 293; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0( 294; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2]] 295; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 296; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 297; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 298; OPT-NEXT: ret void 299; 300 %ld = load i16, ptr addrspace(4) %in, align 4, !range !5 301 %ext = sext i16 %ld to i32 302 store i32 %ext, ptr addrspace(1) %out 303 ret void 304} 305 306define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { 307; OPT-LABEL: @constant_load_i16_align4_invariant( 308; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !invariant.load !3 309; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 310; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 311; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 312; OPT-NEXT: ret void 313; 314 %ld = load i16, ptr addrspace(4) %in, align 4, !invariant.load !6 315 %ext = sext i16 %ld to i32 316 store i32 %ext, ptr addrspace(1) %out 317 ret void 318} 319 320attributes #0 = { nounwind } 321 322; OPT: !0 = !{i32 5, i32 0} 323; OPT: !1 = !{i32 8, i32 0} 324; OPT: !2 = !{i32 65520, i32 0} 325; OPT: !3 = !{} 326 327!0 = !{i16 5, i16 500} 328!1 = !{i16 5, i16 -1} 329!2 = !{i16 8, i16 12, i16 42, i16 99} 330!3 = !{i16 0, i16 255} 331!4 = !{i16 -16, i16 16} 332!5 = !{i16 -16, i16 0} 333!6 = !{} 334