1; Verifies correctness of load/store of parameters and return values. 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} 4 5%s_i1 = type { i1 } 6%s_i8 = type { i8 } 7%s_i16 = type { i16 } 8%s_f16 = type { half } 9%s_i32 = type { i32 } 10%s_f32 = type { float } 11%s_i64 = type { i64 } 12%s_f64 = type { double } 13 14; More complicated types. i64 is used to increase natural alignment 15; requirement for the type. 16%s_i32x4 = type { i32, i32, i32, i32, i64} 17%s_i32f32 = type { i32, float, i32, float, i64} 18%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64} 19%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}> 20%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]} 21; All scalar parameters must be at least 32 bits in size. 22; i1 is loaded/stored as i8. 23 24; CHECK: .func (.param .b32 func_retval0) 25; CHECK-LABEL: test_i1( 26; CHECK-NEXT: .param .b32 test_i1_param_0 27; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0]; 28; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; 29; CHECK: setp.eq.b16 %p1, [[A]], 1 30; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] 31; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; 32; CHECK: .param .b32 param0; 33; CHECK: st.param.b32 [param0], [[C]] 34; CHECK: .param .b32 retval0; 35; CHECK: call.uni 36; CHECK-NEXT: test_i1, 37; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; 38; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; 39; CHECK: st.param.b32 [func_retval0], [[R]]; 40; CHECK: ret; 41define i1 @test_i1(i1 %a) { 42 %r = tail call i1 @test_i1(i1 %a); 43 ret i1 %r; 44} 45 46; Signed i1 is a somewhat special case. We only care about one bit and 47; then us neg.s32 to convert it to 32-bit -1 if it's set. 48; CHECK: .func (.param .b32 func_retval0) 49; CHECK-LABEL: test_i1s( 50; CHECK-NEXT: .param .b32 test_i1s_param_0 51; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; 52; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 53; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; 54; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; 55; CHECK: .param .b32 param0; 56; CHECK: st.param.b32 [param0], [[A]]; 57; CHECK: .param .b32 retval0; 58; CHECK: call.uni 59; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; 60; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; 61; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]]; 62; CHECK: st.param.b32 [func_retval0], [[R]]; 63; CHECK-NEXT: ret; 64define signext i1 @test_i1s(i1 signext %a) { 65 %r = tail call signext i1 @test_i1s(i1 signext %a); 66 ret i1 %r; 67} 68 69; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment. 70; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 71; CHECK-LABEL: test_v3i1( 72; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1] 73; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; 74; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v3i1_param_0] 75; CHECK: .param .align 1 .b8 param0[1]; 76; CHECK-DAG: st.param.b8 [param0], [[E0]]; 77; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; 78; CHECK: .param .align 1 .b8 retval0[1]; 79; CHECK: call.uni (retval0), 80; CHECK-NEXT: test_v3i1, 81; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; 82; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 83; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] 84; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 85; CHECK-NEXT: ret; 86define <3 x i1> @test_v3i1(<3 x i1> %a) { 87 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a); 88 ret <3 x i1> %r; 89} 90 91; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 92; CHECK-LABEL: test_v4i1( 93; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1] 94; CHECK: ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0] 95; CHECK: .param .align 1 .b8 param0[1]; 96; CHECK: st.param.b8 [param0], [[E0]]; 97; CHECK: .param .align 1 .b8 retval0[1]; 98; CHECK: call.uni (retval0), 99; CHECK: test_v4i1, 100; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; 101; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; 102; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 103; CHECK: ld.param.b8 [[RE3:%rs[0-9]+]], [retval0+3]; 104; CHECK: st.param.b8 [func_retval0], [[RE0]]; 105; CHECK: st.param.b8 [func_retval0+1], [[RE1]]; 106; CHECK: st.param.b8 [func_retval0+2], [[RE2]]; 107; CHECK: st.param.b8 [func_retval0+3], [[RE3]]; 108; CHECK-NEXT: ret; 109define <4 x i1> @test_v4i1(<4 x i1> %a) { 110 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a); 111 ret <4 x i1> %r; 112} 113 114; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 115; CHECK-LABEL: test_v5i1( 116; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1] 117; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; 118; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v5i1_param_0] 119; CHECK: .param .align 1 .b8 param0[1]; 120; CHECK-DAG: st.param.b8 [param0], [[E0]]; 121; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 122; CHECK: .param .align 1 .b8 retval0[1]; 123; CHECK: call.uni (retval0), 124; CHECK-NEXT: test_v5i1, 125; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; 126; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 127; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] 128; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 129; CHECK-NEXT: ret; 130define <5 x i1> @test_v5i1(<5 x i1> %a) { 131 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a); 132 ret <5 x i1> %r; 133} 134 135; CHECK: .func (.param .b32 func_retval0) 136; CHECK-LABEL: test_i2( 137; CHECK-NEXT: .param .b32 test_i2_param_0 138; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0]; 139; CHECK: .param .b32 param0; 140; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 141; CHECK: .param .b32 retval0; 142; CHECK: call.uni (retval0), 143; CHECK: test_i2, 144; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 145; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 146; CHECK-NEXT: ret; 147define i2 @test_i2(i2 %a) { 148 %r = tail call i2 @test_i2(i2 %a); 149 ret i2 %r; 150} 151 152; CHECK: .func (.param .b32 func_retval0) 153; CHECK-LABEL: test_i3( 154; CHECK-NEXT: .param .b32 test_i3_param_0 155; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0]; 156; CHECK: .param .b32 param0; 157; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 158; CHECK: .param .b32 retval0; 159; CHECK: call.uni (retval0), 160; CHECK: test_i3, 161; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 162; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 163; CHECK-NEXT: ret; 164define i3 @test_i3(i3 %a) { 165 %r = tail call i3 @test_i3(i3 %a); 166 ret i3 %r; 167} 168 169; Unsigned i8 is loaded directly into 32-bit register. 170; CHECK: .func (.param .b32 func_retval0) 171; CHECK-LABEL: test_i8( 172; CHECK-NEXT: .param .b32 test_i8_param_0 173; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0]; 174; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 175; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; 176; CHECK: .param .b32 param0; 177; CHECK: st.param.b32 [param0], [[A]]; 178; CHECK: .param .b32 retval0; 179; CHECK: call.uni (retval0), 180; CHECK: test_i8, 181; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; 182; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; 183; CHECK: st.param.b32 [func_retval0], [[R]]; 184; CHECK-NEXT: ret; 185define i8 @test_i8(i8 %a) { 186 %r = tail call i8 @test_i8(i8 %a); 187 ret i8 %r; 188} 189 190; signed i8 is loaded into 16-bit register which is then sign-extended to i32. 191; CHECK: .func (.param .b32 func_retval0) 192; CHECK-LABEL: test_i8s( 193; CHECK-NEXT: .param .b32 test_i8s_param_0 194; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; 195; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; 196; CHECK: .param .b32 param0; 197; CHECK: st.param.b32 [param0], [[A]]; 198; CHECK: .param .b32 retval0; 199; CHECK: call.uni (retval0), 200; CHECK: test_i8s, 201; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; 202; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? 203; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; 204; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]]; 205; CHECK: st.param.b32 [func_retval0], [[R]]; 206; CHECK-NEXT: ret; 207define signext i8 @test_i8s(i8 signext %a) { 208 %r = tail call signext i8 @test_i8s(i8 signext %a); 209 ret i8 %r; 210} 211 212; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 213; CHECK-LABEL: test_v3i8( 214; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] 215; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v3i8_param_0]; 216; CHECK: .param .align 4 .b8 param0[4]; 217; CHECK: st.param.b32 [param0], [[R]] 218; CHECK: .param .align 4 .b8 retval0[4]; 219; CHECK: call.uni (retval0), 220; CHECK-NEXT: test_v3i8, 221; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; 222; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very 223; interesting here, so it's skipped. 224; CHECK: st.param.b32 [func_retval0], 225; CHECK-NEXT: ret; 226define <3 x i8> @test_v3i8(<3 x i8> %a) { 227 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); 228 ret <3 x i8> %r; 229} 230 231; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 232; CHECK-LABEL: test_v4i8( 233; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] 234; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0] 235; CHECK: .param .align 4 .b8 param0[4]; 236; CHECK: st.param.b32 [param0], [[R]]; 237; CHECK: .param .align 4 .b8 retval0[4]; 238; CHECK: call.uni (retval0), 239; CHECK-NEXT: test_v4i8, 240; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0]; 241; CHECK: st.param.b32 [func_retval0], [[RET]]; 242; CHECK-NEXT: ret; 243define <4 x i8> @test_v4i8(<4 x i8> %a) { 244 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); 245 ret <4 x i8> %r; 246} 247 248; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 249; CHECK-LABEL: test_v5i8( 250; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] 251; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_v5i8_param_0] 252; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; 253; CHECK: .param .align 8 .b8 param0[8]; 254; CHECK-DAG: st.param.v4.b8 [param0], 255; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 256; CHECK: .param .align 8 .b8 retval0[8]; 257; CHECK: call.uni (retval0), 258; CHECK-NEXT: test_v5i8, 259; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; 260; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 261; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 262; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 263; CHECK-NEXT: ret; 264define <5 x i8> @test_v5i8(<5 x i8> %a) { 265 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a); 266 ret <5 x i8> %r; 267} 268 269; CHECK: .func (.param .b32 func_retval0) 270; CHECK-LABEL: test_i11( 271; CHECK-NEXT: .param .b32 test_i11_param_0 272; CHECK: ld.param.u16 {{%rs[0-9]+}}, [test_i11_param_0]; 273; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 274; CHECK: .param .b32 retval0; 275; CHECK: call.uni (retval0), 276; CHECK-NEXT: test_i11, 277; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 278; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 279; CHECK-NEXT: ret; 280define i11 @test_i11(i11 %a) { 281 %r = tail call i11 @test_i11(i11 %a); 282 ret i11 %r; 283} 284 285; CHECK: .func (.param .b32 func_retval0) 286; CHECK-LABEL: test_i16( 287; CHECK-NEXT: .param .b32 test_i16_param_0 288; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0]; 289; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; 290; CHECK: .param .b32 param0; 291; CHECK: st.param.b32 [param0], [[E32]]; 292; CHECK: .param .b32 retval0; 293; CHECK: call.uni (retval0), 294; CHECK-NEXT: test_i16, 295; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; 296; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; 297; CHECK: st.param.b32 [func_retval0], [[R]]; 298; CHECK-NEXT: ret; 299define i16 @test_i16(i16 %a) { 300 %r = tail call i16 @test_i16(i16 %a); 301 ret i16 %r; 302} 303 304; CHECK: .func (.param .b32 func_retval0) 305; CHECK-LABEL: test_i16s( 306; CHECK-NEXT: .param .b32 test_i16s_param_0 307; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; 308; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; 309; CHECK: .param .b32 param0; 310; CHECK: st.param.b32 [param0], [[E32]]; 311; CHECK: .param .b32 retval0; 312; CHECK: call.uni (retval0), 313; CHECK-NEXT: test_i16s, 314; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; 315; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; 316; CHECK: st.param.b32 [func_retval0], [[R]]; 317; CHECK-NEXT: ret; 318define signext i16 @test_i16s(i16 signext %a) { 319 %r = tail call signext i16 @test_i16s(i16 signext %a); 320 ret i16 %r; 321} 322 323; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 324; CHECK-LABEL: test_v3i16( 325; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] 326; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; 327; CHECK-DAG: ld.param.u32 [[R:%r[0-9]+]], [test_v3i16_param_0]; 328; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R]]; 329; CHECK: .param .align 8 .b8 param0[8]; 330; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; 331; CHECK: st.param.b16 [param0+4], [[E2]]; 332; CHECK: .param .align 8 .b8 retval0[8]; 333; CHECK: call.uni (retval0), 334; CHECK-NEXT: test_v3i16, 335; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0]; 336; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; 337; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]}; 338; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; 339; CHECK-NEXT: ret; 340define <3 x i16> @test_v3i16(<3 x i16> %a) { 341 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a); 342 ret <3 x i16> %r; 343} 344 345; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 346; CHECK-LABEL: test_v4i16( 347; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] 348; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0] 349; CHECK: .param .align 8 .b8 param0[8]; 350; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; 351; CHECK: .param .align 8 .b8 retval0[8]; 352; CHECK: call.uni (retval0), 353; CHECK-NEXT: test_v4i16, 354; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; 355; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} 356; CHECK-NEXT: ret; 357define <4 x i16> @test_v4i16(<4 x i16> %a) { 358 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a); 359 ret <4 x i16> %r; 360} 361 362; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 363; CHECK-LABEL: test_v5i16( 364; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] 365; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; 366; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] 367; CHECK: .param .align 16 .b8 param0[16]; 368; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 369; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 370; CHECK: .param .align 16 .b8 retval0[16]; 371; CHECK: call.uni (retval0), 372; CHECK-NEXT: test_v5i16, 373; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; 374; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; 375; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 376; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; 377; CHECK-NEXT: ret; 378define <5 x i16> @test_v5i16(<5 x i16> %a) { 379 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a); 380 ret <5 x i16> %r; 381} 382 383; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 384; CHECK-LABEL: test_f16( 385; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2] 386; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; 387; CHECK: .param .align 2 .b8 param0[2]; 388; CHECK: st.param.b16 [param0], [[E]]; 389; CHECK: .param .align 2 .b8 retval0[2]; 390; CHECK: call.uni (retval0), 391; CHECK-NEXT: test_f16, 392; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 393; CHECK: st.param.b16 [func_retval0], [[R]] 394; CHECK-NEXT: ret; 395define half @test_f16(half %a) { 396 %r = tail call half @test_f16(half %a); 397 ret half %r; 398} 399 400; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 401; CHECK-LABEL: test_v2f16( 402; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] 403; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; 404; CHECK: .param .align 4 .b8 param0[4]; 405; CHECK: st.param.b32 [param0], [[E]]; 406; CHECK: .param .align 4 .b8 retval0[4]; 407; CHECK: call.uni (retval0), 408; CHECK-NEXT: test_v2f16, 409; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; 410; CHECK: st.param.b32 [func_retval0], [[R]] 411; CHECK-NEXT: ret; 412define <2 x half> @test_v2f16(<2 x half> %a) { 413 %r = tail call <2 x half> @test_v2f16(<2 x half> %a); 414 ret <2 x half> %r; 415} 416 417; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 418; CHECK-LABEL: test_bf16( 419; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2] 420; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0]; 421; CHECK: .param .align 2 .b8 param0[2]; 422; CHECK: st.param.b16 [param0], [[E]]; 423; CHECK: .param .align 2 .b8 retval0[2]; 424; CHECK: call.uni (retval0), 425; CHECK-NEXT: test_bf16, 426; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 427; CHECK: st.param.b16 [func_retval0], [[R]] 428; CHECK-NEXT: ret; 429define bfloat @test_bf16(bfloat %a) { 430 %r = tail call bfloat @test_bf16(bfloat %a); 431 ret bfloat %r; 432} 433 434; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 435; CHECK-LABEL: test_v2bf16( 436; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4] 437; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0]; 438; CHECK: .param .align 4 .b8 param0[4]; 439; CHECK: st.param.b32 [param0], [[E]]; 440; CHECK: .param .align 4 .b8 retval0[4]; 441; CHECK: call.uni (retval0), 442; CHECK-NEXT: test_v2bf16, 443; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; 444; CHECK: st.param.b32 [func_retval0], [[R]] 445; CHECK-NEXT: ret; 446define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { 447 %r = tail call <2 x bfloat> @test_v2bf16(<2 x bfloat> %a); 448 ret <2 x bfloat> %r; 449} 450 451 452; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 453; CHECK-LABEL: test_v3f16( 454; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] 455; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0]; 456; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; 457; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; 458; CHECK: .param .align 8 .b8 param0[8]; 459; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; 460; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; 461; CHECK: .param .align 8 .b8 retval0[8]; 462; CHECK: call.uni (retval0), 463; CHECK: test_v3f16, 464; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0]; 465; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; 466; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]}; 467; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; 468; CHECK: ret; 469define <3 x half> @test_v3f16(<3 x half> %a) { 470 %r = tail call <3 x half> @test_v3f16(<3 x half> %a); 471 ret <3 x half> %r; 472} 473 474; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 475; CHECK-LABEL: test_v4f16( 476; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] 477; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; 478; CHECK: .param .align 8 .b8 param0[8]; 479; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; 480; CHECK: .param .align 8 .b8 retval0[8]; 481; CHECK: call.uni (retval0), 482; CHECK: test_v4f16, 483; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0]; 484; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]}; 485; CHECK: ret; 486define <4 x half> @test_v4f16(<4 x half> %a) { 487 %r = tail call <4 x half> @test_v4f16(<4 x half> %a); 488 ret <4 x half> %r; 489} 490 491; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 492; CHECK-LABEL: test_v5f16( 493; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] 494; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; 495; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; 496; CHECK: .param .align 16 .b8 param0[16]; 497; CHECK-DAG: st.param.v4.b16 [param0], 498; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 499; CHECK: .param .align 16 .b8 retval0[16]; 500; CHECK: call.uni (retval0), 501; CHECK: test_v5f16, 502; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; 503; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; 504; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 505; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; 506; CHECK: ret; 507define <5 x half> @test_v5f16(<5 x half> %a) { 508 %r = tail call <5 x half> @test_v5f16(<5 x half> %a); 509 ret <5 x half> %r; 510} 511 512; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 513; CHECK-LABEL: test_v8f16( 514; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] 515; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; 516; CHECK: .param .align 16 .b8 param0[16]; 517; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; 518; CHECK: .param .align 16 .b8 retval0[16]; 519; CHECK: call.uni (retval0), 520; CHECK: test_v8f16, 521; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0]; 522; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; 523; CHECK: ret; 524define <8 x half> @test_v8f16(<8 x half> %a) { 525 %r = tail call <8 x half> @test_v8f16(<8 x half> %a); 526 ret <8 x half> %r; 527} 528 529; CHECK:.func (.param .align 32 .b8 func_retval0[32]) 530; CHECK-LABEL: test_v9f16( 531; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] 532; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; 533; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; 534; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; 535; CHECK: .param .align 32 .b8 param0[32]; 536; CHECK-DAG: st.param.v4.b16 [param0], 537; CHECK-DAG: st.param.v4.b16 [param0+8], 538; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; 539; CHECK: .param .align 32 .b8 retval0[32]; 540; CHECK: call.uni (retval0), 541; CHECK: test_v9f16, 542; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; 543; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; 544; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; 545; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 546; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; 547; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; 548; CHECK: ret; 549define <9 x half> @test_v9f16(<9 x half> %a) { 550 %r = tail call <9 x half> @test_v9f16(<9 x half> %a); 551 ret <9 x half> %r; 552} 553 554; CHECK: .func (.param .b32 func_retval0) 555; CHECK-LABEL: test_i19( 556; CHECK-NEXT: .param .b32 test_i19_param_0 557; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i19_param_0]; 558; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i19_param_0+2]; 559; CHECK: .param .b32 param0; 560; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 561; CHECK: .param .b32 retval0; 562; CHECK: call.uni (retval0), 563; CHECK-NEXT: test_i19, 564; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 565; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 566; CHECK-NEXT: ret; 567define i19 @test_i19(i19 %a) { 568 %r = tail call i19 @test_i19(i19 %a); 569 ret i19 %r; 570} 571 572; CHECK: .func (.param .b32 func_retval0) 573; CHECK-LABEL: test_i23( 574; CHECK-NEXT: .param .b32 test_i23_param_0 575; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i23_param_0]; 576; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i23_param_0+2]; 577; CHECK: .param .b32 param0; 578; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 579; CHECK: .param .b32 retval0; 580; CHECK: call.uni (retval0), 581; CHECK-NEXT: test_i23, 582; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 583; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 584; CHECK-NEXT: ret; 585define i23 @test_i23(i23 %a) { 586 %r = tail call i23 @test_i23(i23 %a); 587 ret i23 %r; 588} 589 590; CHECK: .func (.param .b32 func_retval0) 591; CHECK-LABEL: test_i24( 592; CHECK-NEXT: .param .b32 test_i24_param_0 593; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i24_param_0+2]; 594; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i24_param_0]; 595; CHECK: .param .b32 param0; 596; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 597; CHECK: .param .b32 retval0; 598; CHECK: call.uni (retval0), 599; CHECK-NEXT: test_i24, 600; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 601; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 602; CHECK-NEXT: ret; 603define i24 @test_i24(i24 %a) { 604 %r = tail call i24 @test_i24(i24 %a); 605 ret i24 %r; 606} 607 608; CHECK: .func (.param .b32 func_retval0) 609; CHECK-LABEL: test_i29( 610; CHECK-NEXT: .param .b32 test_i29_param_0 611; CHECK: ld.param.u32 {{%r[0-9]+}}, [test_i29_param_0]; 612; CHECK: .param .b32 param0; 613; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 614; CHECK: .param .b32 retval0; 615; CHECK: call.uni (retval0), 616; CHECK-NEXT: test_i29, 617; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 618; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; 619; CHECK-NEXT: ret; 620define i29 @test_i29(i29 %a) { 621 %r = tail call i29 @test_i29(i29 %a); 622 ret i29 %r; 623} 624 625; CHECK: .func (.param .b32 func_retval0) 626; CHECK-LABEL: test_i32( 627; CHECK-NEXT: .param .b32 test_i32_param_0 628; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0]; 629; CHECK: .param .b32 param0; 630; CHECK: st.param.b32 [param0], [[E]]; 631; CHECK: .param .b32 retval0; 632; CHECK: call.uni (retval0), 633; CHECK-NEXT: test_i32, 634; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; 635; CHECK: st.param.b32 [func_retval0], [[R]]; 636; CHECK-NEXT: ret; 637define i32 @test_i32(i32 %a) { 638 %r = tail call i32 @test_i32(i32 %a); 639 ret i32 %r; 640} 641 642; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 643; CHECK-LABEL: test_v3i32( 644; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] 645; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; 646; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; 647; CHECK: .param .align 16 .b8 param0[16]; 648; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; 649; CHECK: st.param.b32 [param0+8], [[E2]]; 650; CHECK: .param .align 16 .b8 retval0[16]; 651; CHECK: call.uni (retval0), 652; CHECK-NEXT: test_v3i32, 653; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; 654; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 655; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; 656; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 657; CHECK-NEXT: ret; 658define <3 x i32> @test_v3i32(<3 x i32> %a) { 659 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a); 660 ret <3 x i32> %r; 661} 662 663; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 664; CHECK-LABEL: test_v4i32( 665; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] 666; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] 667; CHECK: .param .align 16 .b8 param0[16]; 668; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 669; CHECK: .param .align 16 .b8 retval0[16]; 670; CHECK: call.uni (retval0), 671; CHECK-NEXT: test_v4i32, 672; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; 673; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 674; CHECK-NEXT: ret; 675define <4 x i32> @test_v4i32(<4 x i32> %a) { 676 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a); 677 ret <4 x i32> %r; 678} 679 680; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 681; CHECK-LABEL: test_v5i32( 682; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32] 683; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; 684; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] 685; CHECK: .param .align 32 .b8 param0[32]; 686; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 687; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; 688; CHECK: .param .align 32 .b8 retval0[32]; 689; CHECK: call.uni (retval0), 690; CHECK-NEXT: test_v5i32, 691; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; 692; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 693; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 694; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]]; 695; CHECK-NEXT: ret; 696define <5 x i32> @test_v5i32(<5 x i32> %a) { 697 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a); 698 ret <5 x i32> %r; 699} 700 701; CHECK: .func (.param .b32 func_retval0) 702; CHECK-LABEL: test_f32( 703; CHECK-NEXT: .param .b32 test_f32_param_0 704; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0]; 705; CHECK: .param .b32 param0; 706; CHECK: st.param.f32 [param0], [[E]]; 707; CHECK: .param .b32 retval0; 708; CHECK: call.uni (retval0), 709; CHECK-NEXT: test_f32, 710; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0]; 711; CHECK: st.param.f32 [func_retval0], [[R]]; 712; CHECK-NEXT: ret; 713define float @test_f32(float %a) { 714 %r = tail call float @test_f32(float %a); 715 ret float %r; 716} 717 718; CHECK: .func (.param .b64 func_retval0) 719; CHECK-LABEL: test_i40( 720; CHECK-NEXT: .param .b64 test_i40_param_0 721; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i40_param_0+4]; 722; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i40_param_0]; 723; CHECK: .param .b64 param0; 724; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 725; CHECK: .param .b64 retval0; 726; CHECK: call.uni (retval0), 727; CHECK-NEXT: test_i40, 728; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 729; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 730; CHECK-NEXT: ret; 731define i40 @test_i40(i40 %a) { 732 %r = tail call i40 @test_i40(i40 %a); 733 ret i40 %r; 734} 735 736; CHECK: .func (.param .b64 func_retval0) 737; CHECK-LABEL: test_i47( 738; CHECK-NEXT: .param .b64 test_i47_param_0 739; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i47_param_0+4]; 740; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i47_param_0]; 741; CHECK: .param .b64 param0; 742; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 743; CHECK: .param .b64 retval0; 744; CHECK: call.uni (retval0), 745; CHECK-NEXT: test_i47, 746; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 747; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 748; CHECK-NEXT: ret; 749define i47 @test_i47(i47 %a) { 750 %r = tail call i47 @test_i47(i47 %a); 751 ret i47 %r; 752} 753 754; CHECK: .func (.param .b64 func_retval0) 755; CHECK-LABEL: test_i48( 756; CHECK-NEXT: .param .b64 test_i48_param_0 757; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i48_param_0+4]; 758; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i48_param_0]; 759; CHECK: .param .b64 param0; 760; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 761; CHECK: .param .b64 retval0; 762; CHECK: call.uni (retval0), 763; CHECK-NEXT: test_i48, 764; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 765; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 766; CHECK-NEXT: ret; 767define i48 @test_i48(i48 %a) { 768 %r = tail call i48 @test_i48(i48 %a); 769 ret i48 %r; 770} 771 772; CHECK: .func (.param .b64 func_retval0) 773; CHECK-LABEL: test_i51( 774; CHECK-NEXT: .param .b64 test_i51_param_0 775; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i51_param_0+6]; 776; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i51_param_0+4]; 777; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i51_param_0]; 778; CHECK: .param .b64 param0; 779; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 780; CHECK: .param .b64 retval0; 781; CHECK: call.uni (retval0), 782; CHECK-NEXT: test_i51, 783; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 784; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 785; CHECK-NEXT: ret; 786define i51 @test_i51(i51 %a) { 787 %r = tail call i51 @test_i51(i51 %a); 788 ret i51 %r; 789} 790 791; CHECK: .func (.param .b64 func_retval0) 792; CHECK-LABEL: test_i56( 793; CHECK-NEXT: .param .b64 test_i56_param_0 794; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i56_param_0+6]; 795; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i56_param_0+4]; 796; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i56_param_0]; 797; CHECK: .param .b64 param0; 798; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 799; CHECK: .param .b64 retval0; 800; CHECK: call.uni (retval0), 801; CHECK-NEXT: test_i56, 802; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 803; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 804; CHECK-NEXT: ret; 805define i56 @test_i56(i56 %a) { 806 %r = tail call i56 @test_i56(i56 %a); 807 ret i56 %r; 808} 809 810; CHECK: .func (.param .b64 func_retval0) 811; CHECK-LABEL: test_i57( 812; CHECK-NEXT: .param .b64 test_i57_param_0 813; CHECK: ld.param.u64 {{%rd[0-9]+}}, [test_i57_param_0]; 814; CHECK: .param .b64 param0; 815; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 816; CHECK: .param .b64 retval0; 817; CHECK: call.uni (retval0), 818; CHECK-NEXT: test_i57, 819; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 820; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; 821; CHECK-NEXT: ret; 822define i57 @test_i57(i57 %a) { 823 %r = tail call i57 @test_i57(i57 %a); 824 ret i57 %r; 825} 826 827; CHECK: .func (.param .b64 func_retval0) 828; CHECK-LABEL: test_i64( 829; CHECK-NEXT: .param .b64 test_i64_param_0 830; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0]; 831; CHECK: .param .b64 param0; 832; CHECK: st.param.b64 [param0], [[E]]; 833; CHECK: .param .b64 retval0; 834; CHECK: call.uni (retval0), 835; CHECK-NEXT: test_i64, 836; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; 837; CHECK: st.param.b64 [func_retval0], [[R]]; 838; CHECK-NEXT: ret; 839define i64 @test_i64(i64 %a) { 840 %r = tail call i64 @test_i64(i64 %a); 841 ret i64 %r; 842} 843 844; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 845; CHECK-LABEL: test_v3i64( 846; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32] 847; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; 848; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; 849; CHECK: .param .align 32 .b8 param0[32]; 850; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; 851; CHECK: st.param.b64 [param0+16], [[E2]]; 852; CHECK: .param .align 32 .b8 retval0[32]; 853; CHECK: call.uni (retval0), 854; CHECK-NEXT: test_v3i64, 855; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; 856; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; 857; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; 858; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 859; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; 860; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 861; CHECK-NEXT: ret; 862define <3 x i64> @test_v3i64(<3 x i64> %a) { 863 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a); 864 ret <3 x i64> %r; 865} 866 867; For i64 vector loads are limited by PTX to 2 elements. 868; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 869; CHECK-LABEL: test_v4i64( 870; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32] 871; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; 872; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; 873; CHECK: .param .align 32 .b8 param0[32]; 874; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; 875; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; 876; CHECK: .param .align 32 .b8 retval0[32]; 877; CHECK: call.uni (retval0), 878; CHECK-NEXT: test_v4i64, 879; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; 880; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; 881; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; 882; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; 883; CHECK-NEXT: ret; 884define <4 x i64> @test_v4i64(<4 x i64> %a) { 885 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a); 886 ret <4 x i64> %r; 887} 888 889; Aggregates, on the other hand, do not get extended. 890 891; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 892; CHECK-LABEL: test_s_i1( 893; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] 894; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; 895; CHECK: .param .align 1 .b8 param0[1]; 896; CHECK: st.param.b8 [param0], [[A]] 897; CHECK: .param .align 1 .b8 retval0[1]; 898; CHECK: call.uni 899; CHECK-NEXT: test_s_i1, 900; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; 901; CHECK: st.param.b8 [func_retval0], [[R]]; 902; CHECK-NEXT: ret; 903define %s_i1 @test_s_i1(%s_i1 %a) { 904 %r = tail call %s_i1 @test_s_i1(%s_i1 %a); 905 ret %s_i1 %r; 906} 907 908; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 909; CHECK-LABEL: test_s_i8( 910; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] 911; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; 912; CHECK: .param .align 1 .b8 param0[1]; 913; CHECK: st.param.b8 [param0], [[A]] 914; CHECK: .param .align 1 .b8 retval0[1]; 915; CHECK: call.uni 916; CHECK-NEXT: test_s_i8, 917; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; 918; CHECK: st.param.b8 [func_retval0], [[R]]; 919; CHECK-NEXT: ret; 920define %s_i8 @test_s_i8(%s_i8 %a) { 921 %r = tail call %s_i8 @test_s_i8(%s_i8 %a); 922 ret %s_i8 %r; 923} 924 925; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 926; CHECK-LABEL: test_s_i16( 927; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] 928; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; 929; CHECK: .param .align 2 .b8 param0[2]; 930; CHECK: st.param.b16 [param0], [[A]] 931; CHECK: .param .align 2 .b8 retval0[2]; 932; CHECK: call.uni 933; CHECK-NEXT: test_s_i16, 934; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 935; CHECK: st.param.b16 [func_retval0], [[R]]; 936; CHECK-NEXT: ret; 937define %s_i16 @test_s_i16(%s_i16 %a) { 938 %r = tail call %s_i16 @test_s_i16(%s_i16 %a); 939 ret %s_i16 %r; 940} 941 942; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 943; CHECK-LABEL: test_s_f16( 944; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] 945; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; 946; CHECK: .param .align 2 .b8 param0[2]; 947; CHECK: st.param.b16 [param0], [[A]] 948; CHECK: .param .align 2 .b8 retval0[2]; 949; CHECK: call.uni 950; CHECK-NEXT: test_s_f16, 951; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; 952; CHECK: st.param.b16 [func_retval0], [[R]]; 953; CHECK-NEXT: ret; 954define %s_f16 @test_s_f16(%s_f16 %a) { 955 %r = tail call %s_f16 @test_s_f16(%s_f16 %a); 956 ret %s_f16 %r; 957} 958 959; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 960; CHECK-LABEL: test_s_i32( 961; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] 962; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0]; 963; CHECK: .param .align 4 .b8 param0[4] 964; CHECK: st.param.b32 [param0], [[E]]; 965; CHECK: .param .align 4 .b8 retval0[4]; 966; CHECK: call.uni (retval0), 967; CHECK-NEXT: test_s_i32, 968; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; 969; CHECK: st.param.b32 [func_retval0], [[R]]; 970; CHECK-NEXT: ret; 971define %s_i32 @test_s_i32(%s_i32 %a) { 972 %r = tail call %s_i32 @test_s_i32(%s_i32 %a); 973 ret %s_i32 %r; 974} 975 976; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 977; CHECK-LABEL: test_s_f32( 978; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] 979; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0]; 980; CHECK: .param .align 4 .b8 param0[4] 981; CHECK: st.param.f32 [param0], [[E]]; 982; CHECK: .param .align 4 .b8 retval0[4]; 983; CHECK: call.uni (retval0), 984; CHECK-NEXT: test_s_f32, 985; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0]; 986; CHECK: st.param.f32 [func_retval0], [[R]]; 987; CHECK-NEXT: ret; 988define %s_f32 @test_s_f32(%s_f32 %a) { 989 %r = tail call %s_f32 @test_s_f32(%s_f32 %a); 990 ret %s_f32 %r; 991} 992 993; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 994; CHECK-LABEL: test_s_i64( 995; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] 996; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; 997; CHECK: .param .align 8 .b8 param0[8]; 998; CHECK: st.param.b64 [param0], [[E]]; 999; CHECK: .param .align 8 .b8 retval0[8]; 1000; CHECK: call.uni (retval0), 1001; CHECK-NEXT: test_s_i64, 1002; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; 1003; CHECK: st.param.b64 [func_retval0], [[R]]; 1004; CHECK-NEXT: ret; 1005define %s_i64 @test_s_i64(%s_i64 %a) { 1006 %r = tail call %s_i64 @test_s_i64(%s_i64 %a); 1007 ret %s_i64 %r; 1008} 1009 1010; Fields that have different types, but identical sizes are not vectorized. 1011; CHECK: .func (.param .align 8 .b8 func_retval0[24]) 1012; CHECK-LABEL: test_s_i32f32( 1013; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24] 1014; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16]; 1015; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12]; 1016; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8]; 1017; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4]; 1018; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; 1019; CHECK: .param .align 8 .b8 param0[24]; 1020; CHECK-DAG: st.param.b32 [param0], [[E0]]; 1021; CHECK-DAG: st.param.f32 [param0+4], [[E1]]; 1022; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; 1023; CHECK-DAG: st.param.f32 [param0+12], [[E3]]; 1024; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; 1025; CHECK: .param .align 8 .b8 retval0[24]; 1026; CHECK: call.uni (retval0), 1027; CHECK-NEXT: test_s_i32f32, 1028; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; 1029; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4]; 1030; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 1031; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12]; 1032; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 1033; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]]; 1034; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]]; 1035; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 1036; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]]; 1037; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 1038; CHECK: ret; 1039define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { 1040 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a); 1041 ret %s_i32f32 %r; 1042} 1043 1044; We do vectorize consecutive fields with matching types. 1045; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24]) 1046; CHECK-LABEL: test_s_i32x4( 1047; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24] 1048; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16]; 1049; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; 1050; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; 1051; CHECK: .param .align 8 .b8 param0[24]; 1052; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; 1053; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; 1054; CHECK: st.param.b64 [param0+16], [[E4]]; 1055; CHECK: .param .align 8 .b8 retval0[24]; 1056; CHECK: call.uni (retval0), 1057; CHECK-NEXT: test_s_i32x4, 1058; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; 1059; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; 1060; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 1061; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; 1062; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]}; 1063; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 1064; CHECK: ret; 1065 1066define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { 1067 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a); 1068 ret %s_i32x4 %r; 1069} 1070 1071; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32]) 1072; CHECK-LABEL: test_s_i1i32x4( 1073; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32] 1074; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24]; 1075; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16]; 1076; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12]; 1077; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; 1078; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; 1079; CHECK: .param .align 8 .b8 param0[32]; 1080; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; 1081; CHECK: st.param.b8 [param0+8], [[E2]]; 1082; CHECK: st.param.b32 [param0+12], [[E3]]; 1083; CHECK: st.param.b32 [param0+16], [[E4]]; 1084; CHECK: st.param.b64 [param0+24], [[E5]]; 1085; CHECK: .param .align 8 .b8 retval0[32]; 1086; CHECK: call.uni (retval0), 1087; CHECK: test_s_i1i32x4, 1088; CHECK: ( 1089; CHECK: param0 1090; CHECK: ); 1091; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; 1092; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; 1093; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; 1094; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 1095; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24]; 1096; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; 1097; CHECK: st.param.b8 [func_retval0+8], [[RE2]]; 1098; CHECK: st.param.b32 [func_retval0+12], [[RE3]]; 1099; CHECK: st.param.b32 [func_retval0+16], [[RE4]]; 1100; CHECK: st.param.b64 [func_retval0+24], [[RE5]]; 1101; CHECK: ret; 1102 1103define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { 1104 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a); 1105 ret %s_i8i32x4 %r; 1106} 1107 1108; -- All loads/stores from parameters aligned by one must be done one 1109; -- byte at a time. 1110; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25]) 1111; CHECK-LABEL: test_s_i1i32x4p( 1112; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25] 1113; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24]; 1114; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23]; 1115; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22]; 1116; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21]; 1117; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20]; 1118; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19]; 1119; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18]; 1120; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17]; 1121; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16]; 1122; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15]; 1123; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14]; 1124; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13]; 1125; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12]; 1126; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11]; 1127; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10]; 1128; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9]; 1129; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8]; 1130; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7]; 1131; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6]; 1132; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5]; 1133; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4]; 1134; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3]; 1135; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2]; 1136; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; 1137; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0]; 1138; CHECK: .param .align 1 .b8 param0[25]; 1139; CHECK-DAG: st.param.b8 [param0], 1140; CHECK-DAG: st.param.b8 [param0+1], 1141; CHECK-DAG: st.param.b8 [param0+2], 1142; CHECK-DAG: st.param.b8 [param0+3], 1143; CHECK-DAG: st.param.b8 [param0+4], 1144; CHECK-DAG: st.param.b8 [param0+5], 1145; CHECK-DAG: st.param.b8 [param0+6], 1146; CHECK-DAG: st.param.b8 [param0+7], 1147; CHECK-DAG: st.param.b8 [param0+8], 1148; CHECK-DAG: st.param.b8 [param0+9], 1149; CHECK-DAG: st.param.b8 [param0+10], 1150; CHECK-DAG: st.param.b8 [param0+11], 1151; CHECK-DAG: st.param.b8 [param0+12], 1152; CHECK-DAG: st.param.b8 [param0+13], 1153; CHECK-DAG: st.param.b8 [param0+14], 1154; CHECK-DAG: st.param.b8 [param0+15], 1155; CHECK-DAG: st.param.b8 [param0+16], 1156; CHECK-DAG: st.param.b8 [param0+17], 1157; CHECK-DAG: st.param.b8 [param0+18], 1158; CHECK-DAG: st.param.b8 [param0+19], 1159; CHECK-DAG: st.param.b8 [param0+20], 1160; CHECK-DAG: st.param.b8 [param0+21], 1161; CHECK-DAG: st.param.b8 [param0+22], 1162; CHECK-DAG: st.param.b8 [param0+23], 1163; CHECK-DAG: st.param.b8 [param0+24], 1164; CHECK: .param .align 1 .b8 retval0[25]; 1165; CHECK: call.uni (retval0), 1166; CHECK-NEXT: test_s_i1i32x4p, 1167; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0]; 1168; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1]; 1169; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2]; 1170; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3]; 1171; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4]; 1172; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5]; 1173; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6]; 1174; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7]; 1175; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8]; 1176; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9]; 1177; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10]; 1178; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11]; 1179; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12]; 1180; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13]; 1181; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14]; 1182; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15]; 1183; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16]; 1184; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17]; 1185; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18]; 1186; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19]; 1187; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20]; 1188; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21]; 1189; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22]; 1190; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23]; 1191; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24]; 1192; CHECK: } // callseq 1193; CHECK-DAG: st.param.b8 [func_retval0], 1194; CHECK-DAG: st.param.b8 [func_retval0+1], 1195; CHECK-DAG: st.param.b8 [func_retval0+2], 1196; CHECK-DAG: st.param.b8 [func_retval0+3], 1197; CHECK-DAG: st.param.b8 [func_retval0+4], 1198; CHECK-DAG: st.param.b8 [func_retval0+5], 1199; CHECK-DAG: st.param.b8 [func_retval0+6], 1200; CHECK-DAG: st.param.b8 [func_retval0+7], 1201; CHECK-DAG: st.param.b8 [func_retval0+8], 1202; CHECK-DAG: st.param.b8 [func_retval0+9], 1203; CHECK-DAG: st.param.b8 [func_retval0+10], 1204; CHECK-DAG: st.param.b8 [func_retval0+11], 1205; CHECK-DAG: st.param.b8 [func_retval0+12], 1206; CHECK-DAG: st.param.b8 [func_retval0+13], 1207; CHECK-DAG: st.param.b8 [func_retval0+14], 1208; CHECK-DAG: st.param.b8 [func_retval0+15], 1209; CHECK-DAG: st.param.b8 [func_retval0+16], 1210; CHECK-DAG: st.param.b8 [func_retval0+17], 1211; CHECK-DAG: st.param.b8 [func_retval0+18], 1212; CHECK-DAG: st.param.b8 [func_retval0+19], 1213; CHECK-DAG: st.param.b8 [func_retval0+20], 1214; CHECK-DAG: st.param.b8 [func_retval0+21], 1215; CHECK-DAG: st.param.b8 [func_retval0+22], 1216; CHECK-DAG: st.param.b8 [func_retval0+23], 1217; CHECK-DAG: st.param.b8 [func_retval0+24], 1218 1219define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { 1220 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a); 1221 ret %s_i8i32x4p %r; 1222} 1223 1224; Check that we can vectorize loads that span multiple aggregate fields. 1225; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80]) 1226; CHECK-LABEL: test_s_crossfield( 1227; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80] 1228; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64]; 1229; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48]; 1230; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32]; 1231; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16]; 1232; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; 1233; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; 1234; CHECK: .param .align 16 .b8 param0[80]; 1235; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; 1236; CHECK: st.param.b32 [param0+8], [[E2]]; 1237; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; 1238; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; 1239; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; 1240; CHECK: st.param.b32 [param0+64], [[E15]]; 1241; CHECK: .param .align 16 .b8 retval0[80]; 1242; CHECK: call.uni (retval0), 1243; CHECK: test_s_crossfield, 1244; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; 1245; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 1246; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; 1247; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32]; 1248; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48]; 1249; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64]; 1250; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; 1251; CHECK: st.param.b32 [func_retval0+8], [[RE2]]; 1252; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]}; 1253; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]}; 1254; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]}; 1255; CHECK: st.param.b32 [func_retval0+64], [[RE15]]; 1256; CHECK: ret; 1257 1258define %s_crossfield @test_s_crossfield(%s_crossfield %a) { 1259 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a); 1260 ret %s_crossfield %r; 1261} 1262