1; Verifies correctness of load/store of parameters and return values. 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} 4 5%s_i8i16p = type { <{ i16, i8, i16 }>, i64 } 6%s_i8i32p = type { <{ i32, i8, i32 }>, i64 } 7%s_i8i64p = type { <{ i64, i8, i64 }>, i64 } 8%s_i8f16p = type { <{ half, i8, half }>, i64 } 9%s_i8f16x2p = type { <{ <2 x half>, i8, <2 x half> }>, i64 } 10%s_i8f32p = type { <{ float, i8, float }>, i64 } 11%s_i8f64p = type { <{ double, i8, double }>, i64 } 12 13; -- All loads/stores from parameters aligned by one must be done one 14; byte at a time. 15; -- Notes: 16; -- There are two fields of interest in the packed part of the struct, one 17; with a proper offset and one without. The former should be loaded or 18; stored as a whole, and the latter by bytes. 19; -- Only loading and storing the said fields are checked in the following 20; series of tests so that they are more concise. 21 22; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16]) 23; CHECK-LABEL: test_s_i8i16p( 24; CHECK: .param .align 8 .b8 test_s_i8i16p_param_0[16] 25; CHECK-DAG: ld.param.u16 [[P0:%rs[0-9]+]], [test_s_i8i16p_param_0]; 26; CHECK-DAG: ld.param.u8 [[P2_0:%rs[0-9]+]], [test_s_i8i16p_param_0+3]; 27; CHECK-DAG: ld.param.u8 [[P2_1:%rs[0-9]+]], [test_s_i8i16p_param_0+4]; 28; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8; 29; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; 30; CHECK: { // callseq 31; CHECK: .param .align 8 .b8 param0[16]; 32; CHECK-DAG: st.param.b16 [param0], [[P0]]; 33; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; 34; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; 35; CHECK: .param .align 8 .b8 retval0[16]; 36; CHECK-NEXT: call.uni (retval0), 37; CHECK-NEXT: test_s_i8i16p, 38; CHECK-NEXT: ( 39; CHECK-NEXT: param0 40; CHECK-NEXT: ); 41; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; 42; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3]; 43; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4]; 44; CHECK: } // callseq 45; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; 46; CHECK-DAG: shl.b16 [[R2_1_shl:%rs[0-9]+]], [[R2_1]], 8; 47; CHECK-DAG: and.b16 [[R2_0_and:%rs[0-9]+]], [[R2_0]], 255; 48; CHECK-DAG: or.b16 [[R2:%rs[0-9]+]], [[R2_0_and]], [[R2_1_shl]]; 49; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2]]; 50; CHECK-DAG: and.b16 [[R2_1_and:%rs[0-9]+]], [[R2_1]], 255; 51; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2_1_and]]; 52; CHECK: ret; 53 54define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { 55 %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) 56 ret %s_i8i16p %r 57} 58 59; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) 60; CHECK-LABEL: test_s_i8i32p( 61; CHECK: .param .align 8 .b8 test_s_i8i32p_param_0[24] 62; CHECK-DAG: ld.param.u32 [[P0:%r[0-9]+]], [test_s_i8i32p_param_0]; 63; CHECK-DAG: ld.param.u8 [[P2_0:%r[0-9]+]], [test_s_i8i32p_param_0+5]; 64; CHECK-DAG: ld.param.u8 [[P2_1:%r[0-9]+]], [test_s_i8i32p_param_0+6]; 65; CHECK-DAG: ld.param.u8 [[P2_2:%r[0-9]+]], [test_s_i8i32p_param_0+7]; 66; CHECK-DAG: ld.param.u8 [[P2_3:%r[0-9]+]], [test_s_i8i32p_param_0+8]; 67; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; 68; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; 69; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; 70; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; 71; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; 72; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; 73; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; 74; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; 75; CHECK: { // callseq 76; CHECK-DAG: .param .align 8 .b8 param0[24]; 77; CHECK-DAG: st.param.b32 [param0], [[P0]]; 78; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; 79; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; 80; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; 81; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; 82; CHECK: .param .align 8 .b8 retval0[24]; 83; CHECK-NEXT: call.uni (retval0), 84; CHECK-NEXT: test_s_i8i32p, 85; CHECK-NEXT: ( 86; CHECK-NEXT: param0 87; CHECK-NEXT: ); 88; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; 89; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; 90; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; 91; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; 92; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; 93; CHECK: } // callseq 94; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; 95; CHECK-DAG: st.param.b8 [func_retval0+5], 96; CHECK-DAG: st.param.b8 [func_retval0+6], 97; CHECK-DAG: st.param.b8 [func_retval0+7], 98; CHECK-DAG: st.param.b8 [func_retval0+8], 99; CHECK: ret; 100 101define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { 102 %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) 103 ret %s_i8i32p %r 104} 105 106; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32]) 107; CHECK-LABEL: test_s_i8i64p( 108; CHECK: .param .align 8 .b8 test_s_i8i64p_param_0[32] 109; CHECK-DAG: ld.param.u64 [[P0:%rd[0-9]+]], [test_s_i8i64p_param_0]; 110; CHECK-DAG: ld.param.u8 [[P2_0:%rd[0-9]+]], [test_s_i8i64p_param_0+9]; 111; CHECK-DAG: ld.param.u8 [[P2_1:%rd[0-9]+]], [test_s_i8i64p_param_0+10]; 112; CHECK-DAG: ld.param.u8 [[P2_2:%rd[0-9]+]], [test_s_i8i64p_param_0+11]; 113; CHECK-DAG: ld.param.u8 [[P2_3:%rd[0-9]+]], [test_s_i8i64p_param_0+12]; 114; CHECK-DAG: ld.param.u8 [[P2_4:%rd[0-9]+]], [test_s_i8i64p_param_0+13]; 115; CHECK-DAG: ld.param.u8 [[P2_5:%rd[0-9]+]], [test_s_i8i64p_param_0+14]; 116; CHECK-DAG: ld.param.u8 [[P2_6:%rd[0-9]+]], [test_s_i8i64p_param_0+15]; 117; CHECK-DAG: ld.param.u8 [[P2_7:%rd[0-9]+]], [test_s_i8i64p_param_0+16]; 118; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8; 119; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16; 120; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24; 121; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]]; 122; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; 123; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]]; 124; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8; 125; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16; 126; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24; 127; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]]; 128; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]]; 129; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]]; 130; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32; 131; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]]; 132; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8; 133; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16; 134; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24; 135; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24; 136; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16; 137; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; 138; CHECK: { // callseq 139; CHECK: .param .align 8 .b8 param0[32]; 140; CHECK-DAG: st.param.b64 [param0], [[P0]]; 141; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; 142; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; 143; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; 144; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]]; 145; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]]; 146; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]]; 147; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; 148; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; 149; CHECK: .param .align 8 .b8 retval0[32]; 150; CHECK-NEXT: call.uni (retval0), 151; CHECK-NEXT: test_s_i8i64p, 152; CHECK-NEXT: ( 153; CHECK-NEXT: param0 154; CHECK-NEXT: ); 155; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; 156; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; 157; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; 158; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; 159; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12]; 160; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13]; 161; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14]; 162; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; 163; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; 164; CHECK: } // callseq 165; CHECK-DAG: st.param.b64 [func_retval0], [[R0]]; 166; CHECK-DAG: st.param.b8 [func_retval0+9], 167; CHECK-DAG: st.param.b8 [func_retval0+10], 168; CHECK-DAG: st.param.b8 [func_retval0+11], 169; CHECK-DAG: st.param.b8 [func_retval0+12], 170; CHECK-DAG: st.param.b8 [func_retval0+13], 171; CHECK-DAG: st.param.b8 [func_retval0+14], 172; CHECK-DAG: st.param.b8 [func_retval0+15], 173; CHECK-DAG: st.param.b8 [func_retval0+16], 174; CHECK: ret; 175 176define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { 177 %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) 178 ret %s_i8i64p %r 179} 180 181; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16]) 182; CHECK-LABEL: test_s_i8f16p( 183; CHECK: .param .align 8 .b8 test_s_i8f16p_param_0[16] 184; CHECK-DAG: ld.param.b16 [[P0:%rs[0-9]+]], [test_s_i8f16p_param_0]; 185; CHECK-DAG: ld.param.u8 [[P2_0:%rs[0-9]+]], [test_s_i8f16p_param_0+3]; 186; CHECK-DAG: ld.param.u8 [[P2_1:%rs[0-9]+]], [test_s_i8f16p_param_0+4]; 187; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8; 188; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; 189; CHECK: { // callseq 190; CHECK: .param .align 8 .b8 param0[16]; 191; CHECK-DAG: st.param.b16 [param0], [[P0]]; 192; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; 193; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; 194; CHECK: .param .align 8 .b8 retval0[16]; 195; CHECK-NEXT: call.uni (retval0), 196; CHECK-NEXT: test_s_i8f16p, 197; CHECK-NEXT: ( 198; CHECK-NEXT: param0 199; CHECK-NEXT: ); 200; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; 201; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3]; 202; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4]; 203; CHECK: } // callseq 204; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; 205; CHECK-DAG: shl.b16 [[R2I_1_shl:%rs[0-9]+]], [[R2I_1]], 8; 206; CHECK-DAG: and.b16 [[R2I_0_and:%rs[0-9]+]], [[R2I_0]], 255; 207; CHECK-DAG: or.b16 [[R2I:%rs[0-9]+]], [[R2I_0_and]], [[R2I_1_shl]]; 208; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2I]]; 209; CHECK-DAG: and.b16 [[R2I_1_and:%rs[0-9]+]], [[R2I_1]], 255; 210; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2I_1_and]]; 211; CHECK: ret; 212 213define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { 214 %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) 215 ret %s_i8f16p %r 216} 217 218; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) 219; CHECK-LABEL: test_s_i8f16x2p( 220; CHECK: .param .align 8 .b8 test_s_i8f16x2p_param_0[24] 221; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8f16x2p_param_0]; 222; CHECK-DAG: ld.param.u8 [[P2_0:%r[0-9]+]], [test_s_i8f16x2p_param_0+5]; 223; CHECK-DAG: ld.param.u8 [[P2_1:%r[0-9]+]], [test_s_i8f16x2p_param_0+6]; 224; CHECK-DAG: ld.param.u8 [[P2_2:%r[0-9]+]], [test_s_i8f16x2p_param_0+7]; 225; CHECK-DAG: ld.param.u8 [[P2_3:%r[0-9]+]], [test_s_i8f16x2p_param_0+8]; 226; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; 227; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; 228; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; 229; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; 230; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; 231; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; 232; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; 233; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; 234; CHECK: { // callseq 235; CHECK-DAG: .param .align 8 .b8 param0[24]; 236; CHECK-DAG: st.param.b32 [param0], [[P0]]; 237; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; 238; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; 239; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; 240; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; 241; CHECK: .param .align 8 .b8 retval0[24]; 242; CHECK-NEXT: call.uni (retval0), 243; CHECK-NEXT: test_s_i8f16x2p, 244; CHECK-NEXT: ( 245; CHECK-NEXT: param0 246; CHECK-NEXT: ); 247; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; 248; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; 249; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; 250; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; 251; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; 252; CHECK: } // callseq 253; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; 254; CHECK-DAG: st.param.b8 [func_retval0+5], 255; CHECK-DAG: st.param.b8 [func_retval0+6], 256; CHECK-DAG: st.param.b8 [func_retval0+7], 257; CHECK-DAG: st.param.b8 [func_retval0+8], 258; CHECK: ret; 259 260define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { 261 %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) 262 ret %s_i8f16x2p %r 263} 264 265; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) 266; CHECK-LABEL: test_s_i8f32p( 267; CHECK: .param .align 8 .b8 test_s_i8f32p_param_0[24] 268; CHECK-DAG: ld.param.f32 [[P0:%f[0-9]+]], [test_s_i8f32p_param_0]; 269; CHECK-DAG: ld.param.u8 [[P2_0:%r[0-9]+]], [test_s_i8f32p_param_0+5]; 270; CHECK-DAG: ld.param.u8 [[P2_1:%r[0-9]+]], [test_s_i8f32p_param_0+6]; 271; CHECK-DAG: ld.param.u8 [[P2_2:%r[0-9]+]], [test_s_i8f32p_param_0+7]; 272; CHECK-DAG: ld.param.u8 [[P2_3:%r[0-9]+]], [test_s_i8f32p_param_0+8]; 273; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; 274; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; 275; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; 276; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; 277; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; 278; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; 279; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; 280; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; 281; CHECK: { // callseq 282; CHECK-DAG: .param .align 8 .b8 param0[24]; 283; CHECK-DAG: st.param.f32 [param0], [[P0]]; 284; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; 285; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; 286; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; 287; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; 288; CHECK: .param .align 8 .b8 retval0[24]; 289; CHECK-NEXT: call.uni (retval0), 290; CHECK-NEXT: test_s_i8f32p, 291; CHECK-NEXT: ( 292; CHECK-NEXT: param0 293; CHECK-NEXT: ); 294; CHECK-DAG: ld.param.f32 [[R0:%f[0-9]+]], [retval0]; 295; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; 296; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; 297; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; 298; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; 299; CHECK: } // callseq 300; CHECK-DAG: st.param.f32 [func_retval0], [[R0]]; 301; CHECK-DAG: st.param.b8 [func_retval0+5], 302; CHECK-DAG: st.param.b8 [func_retval0+6], 303; CHECK-DAG: st.param.b8 [func_retval0+7], 304; CHECK-DAG: st.param.b8 [func_retval0+8], 305; CHECK: ret; 306 307define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { 308 %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) 309 ret %s_i8f32p %r 310} 311 312; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32]) 313; CHECK-LABEL: test_s_i8f64p( 314; CHECK: .param .align 8 .b8 test_s_i8f64p_param_0[32] 315; CHECK-DAG: ld.param.f64 [[P0:%fd[0-9]+]], [test_s_i8f64p_param_0]; 316; CHECK-DAG: ld.param.u8 [[P2_0:%rd[0-9]+]], [test_s_i8f64p_param_0+9]; 317; CHECK-DAG: ld.param.u8 [[P2_1:%rd[0-9]+]], [test_s_i8f64p_param_0+10]; 318; CHECK-DAG: ld.param.u8 [[P2_2:%rd[0-9]+]], [test_s_i8f64p_param_0+11]; 319; CHECK-DAG: ld.param.u8 [[P2_3:%rd[0-9]+]], [test_s_i8f64p_param_0+12]; 320; CHECK-DAG: ld.param.u8 [[P2_4:%rd[0-9]+]], [test_s_i8f64p_param_0+13]; 321; CHECK-DAG: ld.param.u8 [[P2_5:%rd[0-9]+]], [test_s_i8f64p_param_0+14]; 322; CHECK-DAG: ld.param.u8 [[P2_6:%rd[0-9]+]], [test_s_i8f64p_param_0+15]; 323; CHECK-DAG: ld.param.u8 [[P2_7:%rd[0-9]+]], [test_s_i8f64p_param_0+16]; 324; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8; 325; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16; 326; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24; 327; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]]; 328; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; 329; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]]; 330; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8; 331; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16; 332; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24; 333; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]]; 334; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]]; 335; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]]; 336; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32; 337; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]]; 338; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8; 339; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16; 340; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24; 341; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24; 342; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16; 343; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; 344; CHECK: { // callseq 345; CHECK: .param .align 8 .b8 param0[32]; 346; CHECK-DAG: st.param.f64 [param0], [[P0]]; 347; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; 348; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; 349; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; 350; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]]; 351; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]]; 352; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]]; 353; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; 354; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; 355; CHECK: .param .align 8 .b8 retval0[32]; 356; CHECK-NEXT: call.uni (retval0), 357; CHECK-NEXT: test_s_i8f64p, 358; CHECK-NEXT: ( 359; CHECK-NEXT: param0 360; CHECK-NEXT: ); 361; CHECK-DAG: ld.param.f64 [[R0:%fd[0-9]+]], [retval0]; 362; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; 363; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; 364; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; 365; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12]; 366; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13]; 367; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14]; 368; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; 369; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; 370; CHECK: } // callseq 371; CHECK-DAG: st.param.f64 [func_retval0], [[R0]]; 372; CHECK-DAG: st.param.b8 [func_retval0+9], 373; CHECK-DAG: st.param.b8 [func_retval0+10], 374; CHECK-DAG: st.param.b8 [func_retval0+11], 375; CHECK-DAG: st.param.b8 [func_retval0+12], 376; CHECK-DAG: st.param.b8 [func_retval0+13], 377; CHECK-DAG: st.param.b8 [func_retval0+14], 378; CHECK-DAG: st.param.b8 [func_retval0+15], 379; CHECK-DAG: st.param.b8 [func_retval0+16], 380; CHECK: ret; 381 382define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) { 383 %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) 384 ret %s_i8f64p %r 385} 386