1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s 3 4; -------------------------------------------------------------------- 5; llvm.amdgcn.rcp 6; -------------------------------------------------------------------- 7 8declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone 9declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone 10 11define float @test_constant_fold_rcp_f32_undef() nounwind { 12; CHECK-LABEL: @test_constant_fold_rcp_f32_undef( 13; CHECK-NEXT: ret float 0x7FF8000000000000 14; 15 %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone 16 ret float %val 17} 18 19define float @test_constant_fold_rcp_f32_1() nounwind { 20; CHECK-LABEL: @test_constant_fold_rcp_f32_1( 21; CHECK-NEXT: ret float 1.000000e+00 22; 23 %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone 24 ret float %val 25} 26 27define double @test_constant_fold_rcp_f64_1() nounwind { 28; CHECK-LABEL: @test_constant_fold_rcp_f64_1( 29; CHECK-NEXT: ret double 1.000000e+00 30; 31 %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone 32 ret double %val 33} 34 35define float @test_constant_fold_rcp_f32_half() nounwind { 36; CHECK-LABEL: @test_constant_fold_rcp_f32_half( 37; CHECK-NEXT: ret float 2.000000e+00 38; 39 %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone 40 ret float %val 41} 42 43define double @test_constant_fold_rcp_f64_half() nounwind { 44; CHECK-LABEL: @test_constant_fold_rcp_f64_half( 45; CHECK-NEXT: ret double 2.000000e+00 46; 47 %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone 48 ret double %val 49} 50 51define float @test_constant_fold_rcp_f32_43() nounwind { 52; CHECK-LABEL: @test_constant_fold_rcp_f32_43( 53; CHECK-NEXT: ret float 0x3F97D05F40000000 54; 55 %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone 56 ret float %val 57} 58 59define double @test_constant_fold_rcp_f64_43() nounwind { 60; CHECK-LABEL: @test_constant_fold_rcp_f64_43( 61; CHECK-NEXT: ret double 0x3F97D05F417D05F4 62; 63 %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone 64 ret double %val 65} 66 67define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { 68; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( 69; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]] 70; CHECK-NEXT: ret float [[VAL]] 71; 72 %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone 73 ret float %val 74} 75 76; -------------------------------------------------------------------- 77; llvm.amdgcn.sqrt 78; -------------------------------------------------------------------- 79 80declare half @llvm.amdgcn.sqrt.f16(half) nounwind readnone 81declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone 82declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone 83 84define half @test_constant_fold_sqrt_f16_undef() nounwind { 85; CHECK-LABEL: @test_constant_fold_sqrt_f16_undef( 86; CHECK-NEXT: ret half 0xH7E00 87; 88 %val = call half @llvm.amdgcn.sqrt.f16(half undef) nounwind readnone 89 ret half %val 90} 91 92define float @test_constant_fold_sqrt_f32_undef() nounwind { 93; CHECK-LABEL: @test_constant_fold_sqrt_f32_undef( 94; CHECK-NEXT: ret float 0x7FF8000000000000 95; 96 %val = call float @llvm.amdgcn.sqrt.f32(float undef) nounwind readnone 97 ret float %val 98} 99 100define double @test_constant_fold_sqrt_f64_undef() nounwind { 101; CHECK-LABEL: @test_constant_fold_sqrt_f64_undef( 102; CHECK-NEXT: ret double 0x7FF8000000000000 103; 104 %val = call double @llvm.amdgcn.sqrt.f64(double undef) nounwind readnone 105 ret double %val 106} 107 108define half @test_constant_fold_sqrt_f16_0() nounwind { 109; CHECK-LABEL: @test_constant_fold_sqrt_f16_0( 110; CHECK-NEXT: ret half 0xH0000 111; 112 %val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone 113 ret half %val 114} 115 116define float @test_constant_fold_sqrt_f32_0() nounwind { 117; CHECK-LABEL: @test_constant_fold_sqrt_f32_0( 118; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]] 119; CHECK-NEXT: ret float [[VAL]] 120; 121 %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone 122 ret float %val 123} 124 125define double @test_constant_fold_sqrt_f64_0() nounwind { 126; CHECK-LABEL: @test_constant_fold_sqrt_f64_0( 127; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]] 128; CHECK-NEXT: ret double [[VAL]] 129; 130 %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone 131 ret double %val 132} 133 134define half @test_constant_fold_sqrt_f16_neg0() nounwind { 135; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0( 136; CHECK-NEXT: ret half 0xH8000 137; 138 %val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone 139 ret half %val 140} 141 142define float @test_constant_fold_sqrt_f32_neg0() nounwind { 143; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0( 144; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]] 145; CHECK-NEXT: ret float [[VAL]] 146; 147 %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone 148 ret float %val 149} 150 151define double @test_constant_fold_sqrt_f64_neg0() nounwind { 152; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0( 153; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]] 154; CHECK-NEXT: ret double [[VAL]] 155; 156 %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone 157 ret double %val 158} 159 160define double @test_constant_fold_sqrt_snan_f64() nounwind { 161; CHECK-LABEL: @test_constant_fold_sqrt_snan_f64( 162; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) 163; CHECK-NEXT: ret double [[VAL]] 164; 165 %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) 166 ret double %val 167} 168 169define double @test_constant_fold_sqrt_qnan_f64() nounwind { 170; CHECK-LABEL: @test_constant_fold_sqrt_qnan_f64( 171; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) 172; CHECK-NEXT: ret double [[VAL]] 173; 174 %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) 175 ret double %val 176} 177 178define double @test_constant_fold_sqrt_neg1() nounwind { 179; CHECK-LABEL: @test_constant_fold_sqrt_neg1( 180; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -1.000000e+00) 181; CHECK-NEXT: ret double [[VAL]] 182; 183 %val = call double @llvm.amdgcn.sqrt.f64(double -1.0) 184 ret double %val 185} 186 187define half @test_amdgcn_sqrt_f16(half %arg) { 188; CHECK-LABEL: @test_amdgcn_sqrt_f16( 189; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.sqrt.f16(half [[ARG:%.*]]) 190; CHECK-NEXT: ret half [[VAL]] 191; 192 %val = call half @llvm.amdgcn.sqrt.f16(half %arg) 193 ret half %val 194} 195 196define half @test_amdgcn_sqrt_f16_flags(half %arg) { 197; CHECK-LABEL: @test_amdgcn_sqrt_f16_flags( 198; CHECK-NEXT: [[VAL:%.*]] = call nnan half @llvm.sqrt.f16(half [[ARG:%.*]]) 199; CHECK-NEXT: ret half [[VAL]] 200; 201 %val = call nnan half @llvm.amdgcn.sqrt.f16(half %arg) 202 ret half %val 203} 204 205define float @test_amdgcn_sqrt_f32(float %arg) { 206; CHECK-LABEL: @test_amdgcn_sqrt_f32( 207; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[ARG:%.*]]) 208; CHECK-NEXT: ret float [[VAL]] 209; 210 %val = call float @llvm.amdgcn.sqrt.f32(float %arg) 211 ret float %val 212} 213 214define double @test_amdgcn_sqrt_f64(double %arg) { 215; CHECK-LABEL: @test_amdgcn_sqrt_f64( 216; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double [[ARG:%.*]]) 217; CHECK-NEXT: ret double [[VAL]] 218; 219 %val = call double @llvm.amdgcn.sqrt.f64(double %arg) 220 ret double %val 221} 222 223; -------------------------------------------------------------------- 224; llvm.amdgcn.rsq 225; -------------------------------------------------------------------- 226 227declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone 228 229define float @test_constant_fold_rsq_f32_undef() nounwind { 230; CHECK-LABEL: @test_constant_fold_rsq_f32_undef( 231; CHECK-NEXT: ret float 0x7FF8000000000000 232; 233 %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone 234 ret float %val 235} 236 237; -------------------------------------------------------------------- 238; llvm.amdgcn.frexp.mant 239; -------------------------------------------------------------------- 240 241declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone 242declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone 243 244 245define float @test_constant_fold_frexp_mant_f32_undef() nounwind { 246; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef( 247; CHECK-NEXT: ret float undef 248; 249 %val = call float @llvm.amdgcn.frexp.mant.f32(float undef) 250 ret float %val 251} 252 253define double @test_constant_fold_frexp_mant_f64_undef() nounwind { 254; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef( 255; CHECK-NEXT: ret double undef 256; 257 %val = call double @llvm.amdgcn.frexp.mant.f64(double undef) 258 ret double %val 259} 260 261define float @test_constant_fold_frexp_mant_f32_0() nounwind { 262; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0( 263; CHECK-NEXT: ret float 0.000000e+00 264; 265 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0) 266 ret float %val 267} 268 269define double @test_constant_fold_frexp_mant_f64_0() nounwind { 270; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0( 271; CHECK-NEXT: ret double 0.000000e+00 272; 273 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0) 274 ret double %val 275} 276 277define float @test_constant_fold_frexp_mant_f32_n0() nounwind { 278; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0( 279; CHECK-NEXT: ret float -0.000000e+00 280; 281 %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0) 282 ret float %val 283} 284 285define double @test_constant_fold_frexp_mant_f64_n0() nounwind { 286; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0( 287; CHECK-NEXT: ret double -0.000000e+00 288; 289 %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0) 290 ret double %val 291} 292 293define float @test_constant_fold_frexp_mant_f32_1() nounwind { 294; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1( 295; CHECK-NEXT: ret float 5.000000e-01 296; 297 %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0) 298 ret float %val 299} 300 301define double @test_constant_fold_frexp_mant_f64_1() nounwind { 302; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1( 303; CHECK-NEXT: ret double 5.000000e-01 304; 305 %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0) 306 ret double %val 307} 308 309define float @test_constant_fold_frexp_mant_f32_n1() nounwind { 310; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1( 311; CHECK-NEXT: ret float -5.000000e-01 312; 313 %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0) 314 ret float %val 315} 316 317define double @test_constant_fold_frexp_mant_f64_n1() nounwind { 318; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1( 319; CHECK-NEXT: ret double -5.000000e-01 320; 321 %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0) 322 ret double %val 323} 324 325define float @test_constant_fold_frexp_mant_f32_nan() nounwind { 326; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan( 327; CHECK-NEXT: ret float 0x7FF8000000000000 328; 329 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000) 330 ret float %val 331} 332 333define double @test_constant_fold_frexp_mant_f64_nan() nounwind { 334; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan( 335; CHECK-NEXT: ret double 0x7FF8000000000000 336; 337 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000) 338 ret double %val 339} 340 341define float @test_constant_fold_frexp_mant_f32_inf() nounwind { 342; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf( 343; CHECK-NEXT: ret float 0x7FF0000000000000 344; 345 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000) 346 ret float %val 347} 348 349define double @test_constant_fold_frexp_mant_f64_inf() nounwind { 350; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf( 351; CHECK-NEXT: ret double 0x7FF0000000000000 352; 353 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000) 354 ret double %val 355} 356 357define float @test_constant_fold_frexp_mant_f32_ninf() nounwind { 358; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf( 359; CHECK-NEXT: ret float 0xFFF0000000000000 360; 361 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000) 362 ret float %val 363} 364 365define double @test_constant_fold_frexp_mant_f64_ninf() nounwind { 366; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf( 367; CHECK-NEXT: ret double 0xFFF0000000000000 368; 369 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000) 370 ret double %val 371} 372 373define float @test_constant_fold_frexp_mant_f32_max_num() nounwind { 374; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num( 375; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 376; 377 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000) 378 ret float %val 379} 380 381define double @test_constant_fold_frexp_mant_f64_max_num() nounwind { 382; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num( 383; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF 384; 385 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF) 386 ret double %val 387} 388 389define float @test_constant_fold_frexp_mant_f32_min_num() nounwind { 390; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num( 391; CHECK-NEXT: ret float 5.000000e-01 392; 393 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000) 394 ret float %val 395} 396 397define double @test_constant_fold_frexp_mant_f64_min_num() nounwind { 398; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num( 399; CHECK-NEXT: ret double 5.000000e-01 400; 401 %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324) 402 ret double %val 403} 404 405 406; -------------------------------------------------------------------- 407; llvm.amdgcn.frexp.exp 408; -------------------------------------------------------------------- 409 410declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone 411declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone 412 413define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind { 414; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef( 415; CHECK-NEXT: ret i32 undef 416; 417 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef) 418 ret i32 %val 419} 420 421define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind { 422; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef( 423; CHECK-NEXT: ret i32 undef 424; 425 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef) 426 ret i32 %val 427} 428 429define i32 @test_constant_fold_frexp_exp_f32_0() nounwind { 430; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0( 431; CHECK-NEXT: ret i32 0 432; 433 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0) 434 ret i32 %val 435} 436 437define i32 @test_constant_fold_frexp_exp_f64_0() nounwind { 438; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0( 439; CHECK-NEXT: ret i32 0 440; 441 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0) 442 ret i32 %val 443} 444 445define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind { 446; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0( 447; CHECK-NEXT: ret i32 0 448; 449 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0) 450 ret i32 %val 451} 452 453define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind { 454; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0( 455; CHECK-NEXT: ret i32 0 456; 457 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0) 458 ret i32 %val 459} 460 461define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind { 462; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024( 463; CHECK-NEXT: ret i32 11 464; 465 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0) 466 ret i32 %val 467} 468 469define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind { 470; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024( 471; CHECK-NEXT: ret i32 11 472; 473 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0) 474 ret i32 %val 475} 476 477define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind { 478; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024( 479; CHECK-NEXT: ret i32 11 480; 481 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0) 482 ret i32 %val 483} 484 485define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind { 486; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024( 487; CHECK-NEXT: ret i32 11 488; 489 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0) 490 ret i32 %val 491} 492 493define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind { 494; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024( 495; CHECK-NEXT: ret i32 -9 496; 497 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625) 498 ret i32 %val 499} 500 501define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind { 502; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024( 503; CHECK-NEXT: ret i32 -9 504; 505 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625) 506 ret i32 %val 507} 508 509define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind { 510; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan( 511; CHECK-NEXT: ret i32 0 512; 513 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000) 514 ret i32 %val 515} 516 517define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind { 518; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan( 519; CHECK-NEXT: ret i32 0 520; 521 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000) 522 ret i32 %val 523} 524 525define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind { 526; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf( 527; CHECK-NEXT: ret i32 0 528; 529 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000) 530 ret i32 %val 531} 532 533define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind { 534; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf( 535; CHECK-NEXT: ret i32 0 536; 537 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000) 538 ret i32 %val 539} 540 541define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind { 542; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf( 543; CHECK-NEXT: ret i32 0 544; 545 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000) 546 ret i32 %val 547} 548 549define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind { 550; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf( 551; CHECK-NEXT: ret i32 0 552; 553 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000) 554 ret i32 %val 555} 556 557define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind { 558; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num( 559; CHECK-NEXT: ret i32 128 560; 561 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000) 562 ret i32 %val 563} 564 565define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind { 566; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num( 567; CHECK-NEXT: ret i32 1024 568; 569 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF) 570 ret i32 %val 571} 572 573define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind { 574; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num( 575; CHECK-NEXT: ret i32 -148 576; 577 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000) 578 ret i32 %val 579} 580 581define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind { 582; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num( 583; CHECK-NEXT: ret i32 -1073 584; 585 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324) 586 ret i32 %val 587} 588 589; -------------------------------------------------------------------- 590; llvm.amdgcn.class 591; -------------------------------------------------------------------- 592 593declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone 594declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone 595 596define i1 @test_class_undef_mask_f32(float %x) nounwind { 597; CHECK-LABEL: @test_class_undef_mask_f32( 598; CHECK-NEXT: ret i1 false 599; 600 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef) 601 ret i1 %val 602} 603 604define i1 @test_class_poison_poison_f32(float %x) nounwind { 605; CHECK-LABEL: @test_class_poison_poison_f32( 606; CHECK-NEXT: ret i1 poison 607; 608 %val = call i1 @llvm.amdgcn.class.f32(float poison, i32 poison) 609 ret i1 %val 610} 611define i1 @test_class_val_poison_f32(float %arg) nounwind { 612; CHECK-LABEL: @test_class_val_poison_f32( 613; CHECK-NEXT: ret i1 poison 614; 615 %val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 poison) 616 ret i1 %val 617} 618 619define i1 @test_class_poison_val_f32(i32 %arg) nounwind { 620; CHECK-LABEL: @test_class_poison_val_f32( 621; CHECK-NEXT: ret i1 poison 622; 623 %val = call i1 @llvm.amdgcn.class.f32(float poison, i32 %arg) 624 ret i1 %val 625} 626 627define i1 @test_class_over_max_mask_f32(float %x) nounwind { 628; CHECK-LABEL: @test_class_over_max_mask_f32( 629; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 1) 630; CHECK-NEXT: ret i1 [[VAL]] 631; 632 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025) 633 ret i1 %val 634} 635 636define i1 @test_class_no_mask_f32(float %x) nounwind { 637; CHECK-LABEL: @test_class_no_mask_f32( 638; CHECK-NEXT: ret i1 false 639; 640 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0) 641 ret i1 %val 642} 643 644define i1 @test_class_full_mask_f32(float %x) nounwind { 645; CHECK-LABEL: @test_class_full_mask_f32( 646; CHECK-NEXT: ret i1 true 647; 648 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023) 649 ret i1 %val 650} 651 652define i1 @test_class_undef_no_mask_f32() nounwind { 653; CHECK-LABEL: @test_class_undef_no_mask_f32( 654; CHECK-NEXT: ret i1 false 655; 656 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0) 657 ret i1 %val 658} 659 660define i1 @test_class_undef_full_mask_f32() nounwind { 661; CHECK-LABEL: @test_class_undef_full_mask_f32( 662; CHECK-NEXT: ret i1 true 663; 664 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023) 665 ret i1 %val 666} 667 668define i1 @test_class_undef_val_f32() nounwind { 669; CHECK-LABEL: @test_class_undef_val_f32( 670; CHECK-NEXT: ret i1 undef 671; 672 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4) 673 ret i1 %val 674} 675 676define i1 @test_class_undef_val_f32_var(i32 %arg) nounwind { 677; CHECK-LABEL: @test_class_undef_val_f32_var( 678; CHECK-NEXT: [[VAL:%.*]] = icmp ne i32 [[ARG:%.*]], 0 679; CHECK-NEXT: ret i1 [[VAL]] 680; 681 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 %arg) 682 ret i1 %val 683} 684 685define i1 @test_class_val_undef_f32(float %arg) nounwind { 686; CHECK-LABEL: @test_class_val_undef_f32( 687; CHECK-NEXT: ret i1 false 688; 689 %val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 undef) 690 ret i1 %val 691} 692 693define i1 @test_class_undef_undef_f32() nounwind { 694; CHECK-LABEL: @test_class_undef_undef_f32( 695; CHECK-NEXT: ret i1 false 696; 697 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef) 698 ret i1 %val 699} 700 701define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind { 702; CHECK-LABEL: @test_class_var_mask_f32( 703; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 [[MASK:%.*]]) 704; CHECK-NEXT: ret i1 [[VAL]] 705; 706 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) 707 ret i1 %val 708} 709 710define i1 @test_class_isnan_f32(float %x) nounwind { 711; CHECK-LABEL: @test_class_isnan_f32( 712; CHECK-NEXT: [[VAL:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 713; CHECK-NEXT: ret i1 [[VAL]] 714; 715 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) 716 ret i1 %val 717} 718 719define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp { 720; CHECK-LABEL: @test_class_isnan_f32_strict( 721; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]] 722; CHECK-NEXT: ret i1 [[VAL]] 723; 724 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp 725 ret i1 %val 726} 727 728define i1 @test_class_is_p0_n0_f32(float %x) nounwind { 729; CHECK-LABEL: @test_class_is_p0_n0_f32( 730; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 731; CHECK-NEXT: ret i1 [[VAL]] 732; 733 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) 734 ret i1 %val 735} 736 737define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp { 738; CHECK-LABEL: @test_class_is_p0_n0_f32_strict( 739; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR16]] 740; CHECK-NEXT: ret i1 [[VAL]] 741; 742 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp 743 ret i1 %val 744} 745 746define i1 @test_constant_class_snan_test_snan_f64() nounwind { 747; CHECK-LABEL: @test_constant_class_snan_test_snan_f64( 748; CHECK-NEXT: ret i1 true 749; 750 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1) 751 ret i1 %val 752} 753 754define i1 @test_constant_class_qnan_test_qnan_f64() nounwind { 755; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64( 756; CHECK-NEXT: ret i1 true 757; 758 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2) 759 ret i1 %val 760} 761 762define i1 @test_constant_class_qnan_test_snan_f64() nounwind { 763; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64( 764; CHECK-NEXT: ret i1 false 765; 766 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1) 767 ret i1 %val 768} 769 770define i1 @test_constant_class_ninf_test_ninf_f64() nounwind { 771; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64( 772; CHECK-NEXT: ret i1 true 773; 774 %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4) 775 ret i1 %val 776} 777 778define i1 @test_constant_class_pinf_test_ninf_f64() nounwind { 779; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64( 780; CHECK-NEXT: ret i1 false 781; 782 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4) 783 ret i1 %val 784} 785 786define i1 @test_constant_class_qnan_test_ninf_f64() nounwind { 787; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64( 788; CHECK-NEXT: ret i1 false 789; 790 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4) 791 ret i1 %val 792} 793 794define i1 @test_constant_class_snan_test_ninf_f64() nounwind { 795; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64( 796; CHECK-NEXT: ret i1 false 797; 798 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4) 799 ret i1 %val 800} 801 802define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind { 803; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64( 804; CHECK-NEXT: ret i1 true 805; 806 %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8) 807 ret i1 %val 808} 809 810define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind { 811; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64( 812; CHECK-NEXT: ret i1 false 813; 814 %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8) 815 ret i1 %val 816} 817 818define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind { 819; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64( 820; CHECK-NEXT: ret i1 true 821; 822 %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16) 823 ret i1 %val 824} 825 826define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind { 827; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64( 828; CHECK-NEXT: ret i1 false 829; 830 %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16) 831 ret i1 %val 832} 833 834define i1 @test_constant_class_nzero_test_nzero_f64() nounwind { 835; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64( 836; CHECK-NEXT: ret i1 true 837; 838 %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32) 839 ret i1 %val 840} 841 842define i1 @test_constant_class_pzero_test_nzero_f64() nounwind { 843; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64( 844; CHECK-NEXT: ret i1 false 845; 846 %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32) 847 ret i1 %val 848} 849 850define i1 @test_constant_class_pzero_test_pzero_f64() nounwind { 851; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64( 852; CHECK-NEXT: ret i1 true 853; 854 %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64) 855 ret i1 %val 856} 857 858define i1 @test_constant_class_nzero_test_pzero_f64() nounwind { 859; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64( 860; CHECK-NEXT: ret i1 false 861; 862 %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64) 863 ret i1 %val 864} 865 866define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind { 867; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64( 868; CHECK-NEXT: ret i1 true 869; 870 %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128) 871 ret i1 %val 872} 873 874define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind { 875; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64( 876; CHECK-NEXT: ret i1 false 877; 878 %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128) 879 ret i1 %val 880} 881 882define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind { 883; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64( 884; CHECK-NEXT: ret i1 true 885; 886 %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256) 887 ret i1 %val 888} 889 890define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind { 891; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64( 892; CHECK-NEXT: ret i1 false 893; 894 %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256) 895 ret i1 %val 896} 897 898define i1 @test_constant_class_pinf_test_pinf_f64() nounwind { 899; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64( 900; CHECK-NEXT: ret i1 true 901; 902 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512) 903 ret i1 %val 904} 905 906define i1 @test_constant_class_ninf_test_pinf_f64() nounwind { 907; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64( 908; CHECK-NEXT: ret i1 false 909; 910 %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512) 911 ret i1 %val 912} 913 914define i1 @test_constant_class_qnan_test_pinf_f64() nounwind { 915; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64( 916; CHECK-NEXT: ret i1 false 917; 918 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512) 919 ret i1 %val 920} 921 922define i1 @test_constant_class_snan_test_pinf_f64() nounwind { 923; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64( 924; CHECK-NEXT: ret i1 false 925; 926 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512) 927 ret i1 %val 928} 929 930define i1 @test_class_is_snan_nnan_src(float %x) { 931; CHECK-LABEL: @test_class_is_snan_nnan_src( 932; CHECK-NEXT: ret i1 false 933; 934 %nnan = fadd nnan float %x, 1.0 935 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1) 936 ret i1 %class 937} 938 939define i1 @test_class_is_qnan_nnan_src(float %x) { 940; CHECK-LABEL: @test_class_is_qnan_nnan_src( 941; CHECK-NEXT: ret i1 false 942; 943 %nnan = fadd nnan float %x, 1.0 944 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2) 945 ret i1 %class 946} 947 948define i1 @test_class_is_nan_nnan_src(float %x) { 949; CHECK-LABEL: @test_class_is_nan_nnan_src( 950; CHECK-NEXT: ret i1 false 951; 952 %nnan = fadd nnan float %x, 1.0 953 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3) 954 ret i1 %class 955} 956 957define i1 @test_class_is_nan_other_nnan_src(float %x) { 958; CHECK-LABEL: @test_class_is_nan_other_nnan_src( 959; CHECK-NEXT: [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00 960; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[NNAN]], i32 264) 961; CHECK-NEXT: ret i1 [[CLASS]] 962; 963 %nnan = fadd nnan float %x, 1.0 964 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267) 965 ret i1 %class 966} 967 968; -------------------------------------------------------------------- 969; llvm.amdgcn.cos 970; -------------------------------------------------------------------- 971declare float @llvm.amdgcn.cos.f32(float) nounwind readnone 972declare float @llvm.fabs.f32(float) nounwind readnone 973 974define float @cos_fneg_f32(float %x) { 975; CHECK-LABEL: @cos_fneg_f32( 976; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) 977; CHECK-NEXT: ret float [[COS]] 978; 979 %x.fneg = fsub float -0.0, %x 980 %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg) 981 ret float %cos 982} 983 984define float @cos_unary_fneg_f32(float %x) { 985; CHECK-LABEL: @cos_unary_fneg_f32( 986; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) 987; CHECK-NEXT: ret float [[COS]] 988; 989 %x.fneg = fneg float %x 990 %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg) 991 ret float %cos 992} 993 994define float @cos_fabs_f32(float %x) { 995; CHECK-LABEL: @cos_fabs_f32( 996; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) 997; CHECK-NEXT: ret float [[COS]] 998; 999 %x.fabs = call float @llvm.fabs.f32(float %x) 1000 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs) 1001 ret float %cos 1002} 1003 1004define float @cos_fabs_fneg_f32(float %x) { 1005; CHECK-LABEL: @cos_fabs_fneg_f32( 1006; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) 1007; CHECK-NEXT: ret float [[COS]] 1008; 1009 %x.fabs = call float @llvm.fabs.f32(float %x) 1010 %x.fabs.fneg = fsub float -0.0, %x.fabs 1011 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg) 1012 ret float %cos 1013} 1014 1015define float @cos_fabs_unary_fneg_f32(float %x) { 1016; CHECK-LABEL: @cos_fabs_unary_fneg_f32( 1017; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) 1018; CHECK-NEXT: ret float [[COS]] 1019; 1020 %x.fabs = call float @llvm.fabs.f32(float %x) 1021 %x.fabs.fneg = fneg float %x.fabs 1022 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg) 1023 ret float %cos 1024} 1025 1026 1027; -------------------------------------------------------------------- 1028; llvm.amdgcn.sin 1029; -------------------------------------------------------------------- 1030declare float @llvm.amdgcn.sin.f32(float) nounwind readnone 1031 1032define float @sin_fneg_f32(float %x) { 1033; CHECK-LABEL: @sin_fneg_f32( 1034; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X:%.*]]) 1035; CHECK-NEXT: [[SIN:%.*]] = fneg float [[TMP1]] 1036; CHECK-NEXT: ret float [[SIN]] 1037; 1038 %x.fneg = fneg float %x 1039 %sin = call float @llvm.amdgcn.sin.f32(float %x.fneg) 1040 ret float %sin 1041} 1042 1043define float @sin_fabs_f32(float %x) { 1044; CHECK-LABEL: @sin_fabs_f32( 1045; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) 1046; CHECK-NEXT: [[SIN:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X_FABS]]) 1047; CHECK-NEXT: ret float [[SIN]] 1048; 1049 %x.fabs = call float @llvm.fabs.f32(float %x) 1050 %sin = call float @llvm.amdgcn.sin.f32(float %x.fabs) 1051 ret float %sin 1052} 1053 1054define float @sin_fabs_fneg_f32(float %x) { 1055; CHECK-LABEL: @sin_fabs_fneg_f32( 1056; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) 1057; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X_FABS]]) 1058; CHECK-NEXT: [[SIN:%.*]] = fneg float [[TMP1]] 1059; CHECK-NEXT: ret float [[SIN]] 1060; 1061 %x.fabs = call float @llvm.fabs.f32(float %x) 1062 %x.fabs.fneg = fneg float %x.fabs 1063 %sin = call float @llvm.amdgcn.sin.f32(float %x.fabs.fneg) 1064 ret float %sin 1065} 1066 1067define float @sin_fabs_fneg_fast_f32(float %x) { 1068; CHECK-LABEL: @sin_fabs_fneg_fast_f32( 1069; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) 1070; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.amdgcn.sin.f32(float [[X_FABS]]) 1071; CHECK-NEXT: [[SIN:%.*]] = fneg fast float [[TMP1]] 1072; CHECK-NEXT: ret float [[SIN]] 1073; 1074 %x.fabs = call fast float @llvm.fabs.f32(float %x) 1075 %x.fabs.fneg = fneg float %x.fabs 1076 %sin = call fast float @llvm.amdgcn.sin.f32(float %x.fabs.fneg) 1077 ret float %sin 1078} 1079 1080; -------------------------------------------------------------------- 1081; llvm.amdgcn.cvt.pkrtz 1082; -------------------------------------------------------------------- 1083 1084declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone 1085 1086define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) { 1087; CHECK-LABEL: @vars_lhs_cvt_pkrtz( 1088; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float [[Y:%.*]]) 1089; CHECK-NEXT: ret <2 x half> [[CVT]] 1090; 1091 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) 1092 ret <2 x half> %cvt 1093} 1094 1095define <2 x half> @constant_lhs_cvt_pkrtz(float %y) { 1096; CHECK-LABEL: @constant_lhs_cvt_pkrtz( 1097; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[Y:%.*]]) 1098; CHECK-NEXT: ret <2 x half> [[CVT]] 1099; 1100 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y) 1101 ret <2 x half> %cvt 1102} 1103 1104define <2 x half> @constant_rhs_cvt_pkrtz(float %x) { 1105; CHECK-LABEL: @constant_rhs_cvt_pkrtz( 1106; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float 0.000000e+00) 1107; CHECK-NEXT: ret <2 x half> [[CVT]] 1108; 1109 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0) 1110 ret <2 x half> %cvt 1111} 1112 1113define <2 x half> @undef_lhs_cvt_pkrtz(float %y) { 1114; CHECK-LABEL: @undef_lhs_cvt_pkrtz( 1115; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float [[Y:%.*]]) 1116; CHECK-NEXT: ret <2 x half> [[CVT]] 1117; 1118 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y) 1119 ret <2 x half> %cvt 1120} 1121 1122define <2 x half> @undef_rhs_cvt_pkrtz(float %x) { 1123; CHECK-LABEL: @undef_rhs_cvt_pkrtz( 1124; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float undef) 1125; CHECK-NEXT: ret <2 x half> [[CVT]] 1126; 1127 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef) 1128 ret <2 x half> %cvt 1129} 1130 1131define <2 x half> @undef_cvt_pkrtz() { 1132; CHECK-LABEL: @undef_cvt_pkrtz( 1133; CHECK-NEXT: ret <2 x half> undef 1134; 1135 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef) 1136 ret <2 x half> %cvt 1137} 1138 1139define <2 x half> @constant_splat0_cvt_pkrtz() { 1140; CHECK-LABEL: @constant_splat0_cvt_pkrtz( 1141; CHECK-NEXT: ret <2 x half> zeroinitializer 1142; 1143 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0) 1144 ret <2 x half> %cvt 1145} 1146 1147define <2 x half> @constant_cvt_pkrtz() { 1148; CHECK-LABEL: @constant_cvt_pkrtz( 1149; CHECK-NEXT: ret <2 x half> <half 0xH4000, half 0xH4400> 1150; 1151 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0) 1152 ret <2 x half> %cvt 1153} 1154 1155; Test constant values where rtz changes result 1156define <2 x half> @constant_rtz_pkrtz() { 1157; CHECK-LABEL: @constant_rtz_pkrtz( 1158; CHECK-NEXT: ret <2 x half> splat (half 0xH7BFF) 1159; 1160 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0) 1161 ret <2 x half> %cvt 1162} 1163 1164define <2 x half> @fpext_const_cvt_pkrtz(half %x) { 1165; CHECK-LABEL: @fpext_const_cvt_pkrtz( 1166; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0 1167; CHECK-NEXT: ret <2 x half> [[CVT]] 1168; 1169 %ext = fpext half %x to float 1170 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0) 1171 ret <2 x half> %cvt 1172} 1173 1174define <2 x half> @const_fpext_cvt_pkrtz(half %y) { 1175; CHECK-LABEL: @const_fpext_cvt_pkrtz( 1176; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 1177; CHECK-NEXT: ret <2 x half> [[CVT]] 1178; 1179 %ext = fpext half %y to float 1180 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) 1181 ret <2 x half> %cvt 1182} 1183 1184define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) { 1185; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz( 1186; CHECK-NEXT: [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 1187; CHECK-NEXT: [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1 1188; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]] 1189; CHECK-NEXT: ret <2 x half> [[ADD]] 1190; 1191 %ext = fpext half %y to float 1192 %cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) 1193 %cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext) 1194 %add = fadd <2 x half> %cvt1, %cvt2 1195 ret <2 x half> %add 1196} 1197 1198define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) { 1199; CHECK-LABEL: @fpext_fpext_cvt_pkrtz( 1200; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 1201; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1 1202; CHECK-NEXT: ret <2 x half> [[CVT]] 1203; 1204 %extx = fpext half %x to float 1205 %exty = fpext half %y to float 1206 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) 1207 ret <2 x half> %cvt 1208} 1209 1210define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) { 1211; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz( 1212; CHECK-NEXT: [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float 1213; CHECK-NEXT: [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float 1214; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]]) 1215; CHECK-NEXT: ret <2 x half> [[CVT]] 1216; 1217 %extx = fpext bfloat %x to float 1218 %exty = fpext bfloat %y to float 1219 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) 1220 ret <2 x half> %cvt 1221} 1222 1223define <2 x half> @poison_fpext_cvt_pkrtz(half %y) { 1224; CHECK-LABEL: @poison_fpext_cvt_pkrtz( 1225; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1 1226; CHECK-NEXT: ret <2 x half> [[CVT]] 1227; 1228 %ext = fpext half %y to float 1229 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext) 1230 ret <2 x half> %cvt 1231} 1232 1233define <2 x half> @fpext_poison_cvt_pkrtz(half %x) { 1234; CHECK-LABEL: @fpext_poison_cvt_pkrtz( 1235; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 1236; CHECK-NEXT: ret <2 x half> [[TMP1]] 1237; 1238 %ext = fpext half %x to float 1239 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison) 1240 ret <2 x half> %cvt 1241} 1242 1243; -------------------------------------------------------------------- 1244; llvm.amdgcn.cvt.pknorm.i16 1245; -------------------------------------------------------------------- 1246 1247declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone 1248 1249define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) { 1250; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16( 1251; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float [[Y:%.*]]) 1252; CHECK-NEXT: ret <2 x i16> [[CVT]] 1253; 1254 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y) 1255 ret <2 x i16> %cvt 1256} 1257 1258define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) { 1259; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16( 1260; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float [[X:%.*]], float undef) 1261; CHECK-NEXT: ret <2 x i16> [[CVT]] 1262; 1263 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef) 1264 ret <2 x i16> %cvt 1265} 1266 1267define <2 x i16> @undef_cvt_pknorm_i16() { 1268; CHECK-LABEL: @undef_cvt_pknorm_i16( 1269; CHECK-NEXT: ret <2 x i16> undef 1270; 1271 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef) 1272 ret <2 x i16> %cvt 1273} 1274 1275; -------------------------------------------------------------------- 1276; llvm.amdgcn.cvt.pknorm.u16 1277; -------------------------------------------------------------------- 1278 1279declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone 1280 1281define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) { 1282; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16( 1283; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float [[Y:%.*]]) 1284; CHECK-NEXT: ret <2 x i16> [[CVT]] 1285; 1286 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y) 1287 ret <2 x i16> %cvt 1288} 1289 1290define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) { 1291; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16( 1292; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float [[X:%.*]], float undef) 1293; CHECK-NEXT: ret <2 x i16> [[CVT]] 1294; 1295 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef) 1296 ret <2 x i16> %cvt 1297} 1298 1299define <2 x i16> @undef_cvt_pknorm_u16() { 1300; CHECK-LABEL: @undef_cvt_pknorm_u16( 1301; CHECK-NEXT: ret <2 x i16> undef 1302; 1303 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef) 1304 ret <2 x i16> %cvt 1305} 1306 1307; -------------------------------------------------------------------- 1308; llvm.amdgcn.cvt.pk.i16 1309; -------------------------------------------------------------------- 1310 1311declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone 1312 1313define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) { 1314; CHECK-LABEL: @undef_lhs_cvt_pk_i16( 1315; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 [[Y:%.*]]) 1316; CHECK-NEXT: ret <2 x i16> [[CVT]] 1317; 1318 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y) 1319 ret <2 x i16> %cvt 1320} 1321 1322define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) { 1323; CHECK-LABEL: @undef_rhs_cvt_pk_i16( 1324; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 [[X:%.*]], i32 undef) 1325; CHECK-NEXT: ret <2 x i16> [[CVT]] 1326; 1327 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef) 1328 ret <2 x i16> %cvt 1329} 1330 1331define <2 x i16> @undef_cvt_pk_i16() { 1332; CHECK-LABEL: @undef_cvt_pk_i16( 1333; CHECK-NEXT: ret <2 x i16> undef 1334; 1335 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef) 1336 ret <2 x i16> %cvt 1337} 1338 1339; -------------------------------------------------------------------- 1340; llvm.amdgcn.cvt.pk.u16 1341; -------------------------------------------------------------------- 1342 1343declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone 1344 1345define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) { 1346; CHECK-LABEL: @undef_lhs_cvt_pk_u16( 1347; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 [[Y:%.*]]) 1348; CHECK-NEXT: ret <2 x i16> [[CVT]] 1349; 1350 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y) 1351 ret <2 x i16> %cvt 1352} 1353 1354define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) { 1355; CHECK-LABEL: @undef_rhs_cvt_pk_u16( 1356; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 [[X:%.*]], i32 undef) 1357; CHECK-NEXT: ret <2 x i16> [[CVT]] 1358; 1359 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef) 1360 ret <2 x i16> %cvt 1361} 1362 1363define <2 x i16> @undef_cvt_pk_u16() { 1364; CHECK-LABEL: @undef_cvt_pk_u16( 1365; CHECK-NEXT: ret <2 x i16> undef 1366; 1367 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef) 1368 ret <2 x i16> %cvt 1369} 1370 1371; -------------------------------------------------------------------- 1372; llvm.amdgcn.ubfe 1373; -------------------------------------------------------------------- 1374 1375declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone 1376declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone 1377 1378define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) { 1379; CHECK-LABEL: @ubfe_var_i32( 1380; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 [[WIDTH:%.*]]) 1381; CHECK-NEXT: ret i32 [[BFE]] 1382; 1383 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width) 1384 ret i32 %bfe 1385} 1386 1387define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) { 1388; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32( 1389; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 5, i32 [[WIDTH:%.*]]) 1390; CHECK-NEXT: ret i32 [[BFE]] 1391; 1392 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width) 1393 ret i32 %bfe 1394} 1395 1396define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) { 1397; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32( 1398; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 5) 1399; CHECK-NEXT: ret i32 [[BFE]] 1400; 1401 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133) 1402 ret i32 %bfe 1403} 1404 1405define i32 @ubfe_width_0(i32 %src, i32 %offset) { 1406; CHECK-LABEL: @ubfe_width_0( 1407; CHECK-NEXT: ret i32 0 1408; 1409 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0) 1410 ret i32 %bfe 1411} 1412 1413define i32 @ubfe_width_31(i32 %src, i32 %offset) { 1414; CHECK-LABEL: @ubfe_width_31( 1415; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 31) 1416; CHECK-NEXT: ret i32 [[BFE]] 1417; 1418 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31) 1419 ret i32 %bfe 1420} 1421 1422define i32 @ubfe_width_32(i32 %src, i32 %offset) { 1423; CHECK-LABEL: @ubfe_width_32( 1424; CHECK-NEXT: ret i32 0 1425; 1426 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32) 1427 ret i32 %bfe 1428} 1429 1430define i32 @ubfe_width_33(i32 %src, i32 %offset) { 1431; CHECK-LABEL: @ubfe_width_33( 1432; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 1) 1433; CHECK-NEXT: ret i32 [[BFE]] 1434; 1435 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33) 1436 ret i32 %bfe 1437} 1438 1439define i32 @ubfe_offset_33(i32 %src, i32 %width) { 1440; CHECK-LABEL: @ubfe_offset_33( 1441; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 1, i32 [[WIDTH:%.*]]) 1442; CHECK-NEXT: ret i32 [[BFE]] 1443; 1444 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width) 1445 ret i32 %bfe 1446} 1447 1448define i32 @ubfe_offset_0(i32 %src, i32 %width) { 1449; CHECK-LABEL: @ubfe_offset_0( 1450; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) 1451; CHECK-NEXT: ret i32 [[BFE]] 1452; 1453 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width) 1454 ret i32 %bfe 1455} 1456 1457define i32 @ubfe_offset_32(i32 %src, i32 %width) { 1458; CHECK-LABEL: @ubfe_offset_32( 1459; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) 1460; CHECK-NEXT: ret i32 [[BFE]] 1461; 1462 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) 1463 ret i32 %bfe 1464} 1465 1466define i32 @ubfe_offset_31(i32 %src, i32 %width) { 1467; CHECK-LABEL: @ubfe_offset_31( 1468; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]]) 1469; CHECK-NEXT: ret i32 [[BFE]] 1470; 1471 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width) 1472 ret i32 %bfe 1473} 1474 1475define i32 @ubfe_offset_0_width_0(i32 %src) { 1476; CHECK-LABEL: @ubfe_offset_0_width_0( 1477; CHECK-NEXT: ret i32 0 1478; 1479 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0) 1480 ret i32 %bfe 1481} 1482 1483define i32 @ubfe_offset_0_width_3(i32 %src) { 1484; CHECK-LABEL: @ubfe_offset_0_width_3( 1485; CHECK-NEXT: [[BFE:%.*]] = and i32 [[SRC:%.*]], 7 1486; CHECK-NEXT: ret i32 [[BFE]] 1487; 1488 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3) 1489 ret i32 %bfe 1490} 1491 1492define i32 @ubfe_offset_3_width_1(i32 %src) { 1493; CHECK-LABEL: @ubfe_offset_3_width_1( 1494; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3 1495; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 1 1496; CHECK-NEXT: ret i32 [[BFE]] 1497; 1498 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1) 1499 ret i32 %bfe 1500} 1501 1502define i32 @ubfe_offset_3_width_4(i32 %src) { 1503; CHECK-LABEL: @ubfe_offset_3_width_4( 1504; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3 1505; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 15 1506; CHECK-NEXT: ret i32 [[BFE]] 1507; 1508 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4) 1509 ret i32 %bfe 1510} 1511 1512define i32 @ubfe_0_0_0() { 1513; CHECK-LABEL: @ubfe_0_0_0( 1514; CHECK-NEXT: ret i32 0 1515; 1516 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 1517 ret i32 %bfe 1518} 1519 1520define i32 @ubfe_neg1_5_7() { 1521; CHECK-LABEL: @ubfe_neg1_5_7( 1522; CHECK-NEXT: ret i32 127 1523; 1524 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7) 1525 ret i32 %bfe 1526} 1527 1528define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) { 1529; CHECK-LABEL: @ubfe_undef_src_i32( 1530; CHECK-NEXT: ret i32 undef 1531; 1532 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width) 1533 ret i32 %bfe 1534} 1535 1536define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) { 1537; CHECK-LABEL: @ubfe_undef_offset_i32( 1538; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 undef, i32 [[WIDTH:%.*]]) 1539; CHECK-NEXT: ret i32 [[BFE]] 1540; 1541 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width) 1542 ret i32 %bfe 1543} 1544 1545define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) { 1546; CHECK-LABEL: @ubfe_undef_width_i32( 1547; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 undef) 1548; CHECK-NEXT: ret i32 [[BFE]] 1549; 1550 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef) 1551 ret i32 %bfe 1552} 1553 1554define i64 @ubfe_offset_33_width_4_i64(i64 %src) { 1555; CHECK-LABEL: @ubfe_offset_33_width_4_i64( 1556; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[SRC:%.*]], 33 1557; CHECK-NEXT: [[BFE:%.*]] = and i64 [[TMP1]], 15 1558; CHECK-NEXT: ret i64 [[BFE]] 1559; 1560 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4) 1561 ret i64 %bfe 1562} 1563 1564define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) { 1565; CHECK-LABEL: @ubfe_offset_0_i64( 1566; CHECK-NEXT: [[BFE:%.*]] = call i64 @llvm.amdgcn.ubfe.i64(i64 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) 1567; CHECK-NEXT: ret i64 [[BFE]] 1568; 1569 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width) 1570 ret i64 %bfe 1571} 1572 1573define i64 @ubfe_offset_32_width_32_i64(i64 %src) { 1574; CHECK-LABEL: @ubfe_offset_32_width_32_i64( 1575; CHECK-NEXT: [[BFE:%.*]] = lshr i64 [[SRC:%.*]], 32 1576; CHECK-NEXT: ret i64 [[BFE]] 1577; 1578 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32) 1579 ret i64 %bfe 1580} 1581 1582; -------------------------------------------------------------------- 1583; llvm.amdgcn.sbfe 1584; -------------------------------------------------------------------- 1585 1586declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone 1587declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone 1588 1589define i32 @sbfe_offset_31(i32 %src, i32 %width) { 1590; CHECK-LABEL: @sbfe_offset_31( 1591; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.sbfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]]) 1592; CHECK-NEXT: ret i32 [[BFE]] 1593; 1594 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width) 1595 ret i32 %bfe 1596} 1597 1598define i32 @sbfe_neg1_5_7() { 1599; CHECK-LABEL: @sbfe_neg1_5_7( 1600; CHECK-NEXT: ret i32 -1 1601; 1602 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7) 1603 ret i32 %bfe 1604} 1605 1606define i64 @sbfe_offset_32_width_32_i64(i64 %src) { 1607; CHECK-LABEL: @sbfe_offset_32_width_32_i64( 1608; CHECK-NEXT: [[BFE:%.*]] = ashr i64 [[SRC:%.*]], 32 1609; CHECK-NEXT: ret i64 [[BFE]] 1610; 1611 %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32) 1612 ret i64 %bfe 1613} 1614 1615; -------------------------------------------------------------------- 1616; llvm.amdgcn.exp 1617; -------------------------------------------------------------------- 1618 1619declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly 1620 1621 1622 1623 1624define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) { 1625 ; enable src0..src3 constants 1626; CHECK-LABEL: @exp_disabled_inputs_to_undef( 1627; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false) 1628; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false) 1629; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false) 1630; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false) 1631; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float [[X:%.*]], float undef, float undef, float undef, i1 true, i1 false) 1632; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float [[Y:%.*]], float undef, float undef, i1 true, i1 false) 1633; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float [[Z:%.*]], float undef, i1 true, i1 false) 1634; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float [[W:%.*]], i1 true, i1 false) 1635; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false) 1636; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false) 1637; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false) 1638; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false) 1639; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false) 1640; CHECK-NEXT: ret void 1641; 1642 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1643 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1644 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1645 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1646 1647 ; enable src0..src3 variables 1648 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false) 1649 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false) 1650 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false) 1651 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false) 1652 1653 ; enable none 1654 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false) 1655 1656 ; enable different source combinations 1657 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1658 call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 1659 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 1660 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 1661 1662 ret void 1663} 1664 1665; -------------------------------------------------------------------- 1666; llvm.amdgcn.exp.compr 1667; -------------------------------------------------------------------- 1668 1669declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly 1670 1671 1672 1673define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) { 1674; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef( 1675; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) 1676; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) 1677; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) 1678; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) 1679; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) 1680; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> [[XY:%.*]], <2 x half> undef, i1 true, i1 false) 1681; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false) 1682; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false) 1683; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> [[ZW:%.*]], i1 true, i1 false) 1684; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[XY]], <2 x half> [[ZW]], i1 true, i1 false) 1685; CHECK-NEXT: ret void 1686; 1687 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) 1688 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) 1689 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) 1690 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) 1691 1692 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1693 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1694 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1695 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1696 1697 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1698 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) 1699 ret void 1700} 1701 1702; -------------------------------------------------------------------- 1703; llvm.amdgcn.fmed3 1704; -------------------------------------------------------------------- 1705 1706declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone 1707 1708define float @fmed3_f32(float %x, float %y, float %z) { 1709; CHECK-LABEL: @fmed3_f32( 1710; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) 1711; CHECK-NEXT: ret float [[MED3]] 1712; 1713 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) 1714 ret float %med3 1715} 1716 1717define float @fmed3_canonicalize_x_c0_c1_f32(float %x) { 1718; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32( 1719; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) 1720; CHECK-NEXT: ret float [[MED3]] 1721; 1722 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0) 1723 ret float %med3 1724} 1725 1726define float @fmed3_canonicalize_c0_x_c1_f32(float %x) { 1727; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32( 1728; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) 1729; CHECK-NEXT: ret float [[MED3]] 1730; 1731 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0) 1732 ret float %med3 1733} 1734 1735define float @fmed3_canonicalize_c0_c1_x_f32(float %x) { 1736; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32( 1737; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) 1738; CHECK-NEXT: ret float [[MED3]] 1739; 1740 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x) 1741 ret float %med3 1742} 1743 1744define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) { 1745; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32( 1746; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) 1747; CHECK-NEXT: ret float [[MED3]] 1748; 1749 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0) 1750 ret float %med3 1751} 1752 1753define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) { 1754; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32( 1755; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) 1756; CHECK-NEXT: ret float [[MED3]] 1757; 1758 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y) 1759 ret float %med3 1760} 1761 1762define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) { 1763; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32( 1764; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) 1765; CHECK-NEXT: ret float [[MED3]] 1766; 1767 %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y) 1768 ret float %med3 1769} 1770 1771define float @fmed3_undef_x_y_f32(float %x, float %y) { 1772; CHECK-LABEL: @fmed3_undef_x_y_f32( 1773; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1774; CHECK-NEXT: ret float [[MED3]] 1775; 1776 %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) 1777 ret float %med3 1778} 1779 1780define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) { 1781; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32( 1782; CHECK-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1783; CHECK-NEXT: ret float [[MED3]] 1784; 1785 %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) 1786 ret float %med3 1787} 1788 1789define float @fmed3_x_undef_y_f32(float %x, float %y) { 1790; CHECK-LABEL: @fmed3_x_undef_y_f32( 1791; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1792; CHECK-NEXT: ret float [[MED3]] 1793; 1794 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y) 1795 ret float %med3 1796} 1797 1798define float @fmed3_x_y_undef_f32(float %x, float %y) { 1799; CHECK-LABEL: @fmed3_x_y_undef_f32( 1800; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1801; CHECK-NEXT: ret float [[MED3]] 1802; 1803 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) 1804 ret float %med3 1805} 1806 1807define float @fmed3_qnan0_x_y_f32(float %x, float %y) { 1808; CHECK-LABEL: @fmed3_qnan0_x_y_f32( 1809; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1810; CHECK-NEXT: ret float [[MED3]] 1811; 1812 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) 1813 ret float %med3 1814} 1815 1816define float @fmed3_x_qnan0_y_f32(float %x, float %y) { 1817; CHECK-LABEL: @fmed3_x_qnan0_y_f32( 1818; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1819; CHECK-NEXT: ret float [[MED3]] 1820; 1821 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) 1822 ret float %med3 1823} 1824 1825define float @fmed3_x_y_qnan0_f32(float %x, float %y) { 1826; CHECK-LABEL: @fmed3_x_y_qnan0_f32( 1827; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1828; CHECK-NEXT: ret float [[MED3]] 1829; 1830 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) 1831 ret float %med3 1832} 1833 1834define float @fmed3_qnan1_x_y_f32(float %x, float %y) { 1835; CHECK-LABEL: @fmed3_qnan1_x_y_f32( 1836; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) 1837; CHECK-NEXT: ret float [[MED3]] 1838; 1839 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y) 1840 ret float %med3 1841} 1842 1843; This can return any of the qnans. 1844define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) { 1845; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32( 1846; CHECK-NEXT: ret float 0x7FF8030000000000 1847; 1848 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000) 1849 ret float %med3 1850} 1851 1852define float @fmed3_constant_src0_0_f32(float %x, float %y) { 1853; CHECK-LABEL: @fmed3_constant_src0_0_f32( 1854; CHECK-NEXT: ret float 5.000000e-01 1855; 1856 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0) 1857 ret float %med3 1858} 1859 1860define float @fmed3_constant_src0_1_f32(float %x, float %y) { 1861; CHECK-LABEL: @fmed3_constant_src0_1_f32( 1862; CHECK-NEXT: ret float 5.000000e-01 1863; 1864 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0) 1865 ret float %med3 1866} 1867 1868define float @fmed3_constant_src1_0_f32(float %x, float %y) { 1869; CHECK-LABEL: @fmed3_constant_src1_0_f32( 1870; CHECK-NEXT: ret float 5.000000e-01 1871; 1872 %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0) 1873 ret float %med3 1874} 1875 1876define float @fmed3_constant_src1_1_f32(float %x, float %y) { 1877; CHECK-LABEL: @fmed3_constant_src1_1_f32( 1878; CHECK-NEXT: ret float 5.000000e-01 1879; 1880 %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0) 1881 ret float %med3 1882} 1883 1884define float @fmed3_constant_src2_0_f32(float %x, float %y) { 1885; CHECK-LABEL: @fmed3_constant_src2_0_f32( 1886; CHECK-NEXT: ret float 5.000000e-01 1887; 1888 %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5) 1889 ret float %med3 1890} 1891 1892define float @fmed3_constant_src2_1_f32(float %x, float %y) { 1893; CHECK-LABEL: @fmed3_constant_src2_1_f32( 1894; CHECK-NEXT: ret float 5.000000e-01 1895; 1896 %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5) 1897 ret float %med3 1898} 1899 1900define float @fmed3_x_qnan0_qnan1_f32(float %x) { 1901; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32( 1902; CHECK-NEXT: ret float [[X:%.*]] 1903; 1904 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) 1905 ret float %med3 1906} 1907 1908define float @fmed3_qnan0_x_qnan1_f32(float %x) { 1909; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32( 1910; CHECK-NEXT: ret float [[X:%.*]] 1911; 1912 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) 1913 ret float %med3 1914} 1915 1916define float @fmed3_qnan0_qnan1_x_f32(float %x) { 1917; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32( 1918; CHECK-NEXT: ret float [[X:%.*]] 1919; 1920 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) 1921 ret float %med3 1922} 1923 1924define float @fmed3_nan_0_1_f32() { 1925; CHECK-LABEL: @fmed3_nan_0_1_f32( 1926; CHECK-NEXT: ret float 0.000000e+00 1927; 1928 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0) 1929 ret float %med3 1930} 1931 1932define float @fmed3_0_nan_1_f32() { 1933; CHECK-LABEL: @fmed3_0_nan_1_f32( 1934; CHECK-NEXT: ret float 0.000000e+00 1935; 1936 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0) 1937 ret float %med 1938} 1939 1940define float @fmed3_0_1_nan_f32() { 1941; CHECK-LABEL: @fmed3_0_1_nan_f32( 1942; CHECK-NEXT: ret float 1.000000e+00 1943; 1944 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000) 1945 ret float %med 1946} 1947 1948define float @fmed3_undef_0_1_f32() { 1949; CHECK-LABEL: @fmed3_undef_0_1_f32( 1950; CHECK-NEXT: ret float 0.000000e+00 1951; 1952 %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0) 1953 ret float %med3 1954} 1955 1956define float @fmed3_0_undef_1_f32() { 1957; CHECK-LABEL: @fmed3_0_undef_1_f32( 1958; CHECK-NEXT: ret float 0.000000e+00 1959; 1960 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0) 1961 ret float %med 1962} 1963 1964define float @fmed3_0_1_undef_f32() { 1965; CHECK-LABEL: @fmed3_0_1_undef_f32( 1966; CHECK-NEXT: ret float 1.000000e+00 1967; 1968 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef) 1969 ret float %med 1970} 1971 1972; -------------------------------------------------------------------- 1973; llvm.amdgcn.icmp 1974; -------------------------------------------------------------------- 1975 1976declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent 1977declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent 1978declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent 1979 1980define i64 @invalid_icmp_code(i32 %a, i32 %b) { 1981; CHECK-LABEL: @invalid_icmp_code( 1982; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31) 1983; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42) 1984; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] 1985; CHECK-NEXT: ret i64 [[OR]] 1986; 1987 %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31) 1988 %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42) 1989 %or = or i64 %under, %over 1990 ret i64 %or 1991} 1992 1993define i64 @icmp_constant_inputs_false() { 1994; CHECK-LABEL: @icmp_constant_inputs_false( 1995; CHECK-NEXT: ret i64 0 1996; 1997 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32) 1998 ret i64 %result 1999} 2000 2001define i64 @icmp_constant_inputs_true() { 2002; CHECK-LABEL: @icmp_constant_inputs_true( 2003; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]] 2004; CHECK-NEXT: ret i64 [[RESULT]] 2005; 2006 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) 2007 ret i64 %result 2008} 2009 2010define i64 @icmp_constant_to_rhs_slt(i32 %x) { 2011; CHECK-LABEL: @icmp_constant_to_rhs_slt( 2012; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38) 2013; CHECK-NEXT: ret i64 [[RESULT]] 2014; 2015 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40) 2016 ret i64 %result 2017} 2018 2019define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) { 2020; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32( 2021; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) 2022; CHECK-NEXT: ret i64 [[MASK]] 2023; 2024 %cmp = icmp eq i32 %a, %b 2025 %zext.cmp = zext i1 %cmp to i32 2026 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2027 ret i64 %mask 2028} 2029 2030define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) { 2031; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32( 2032; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) 2033; CHECK-NEXT: ret i64 [[MASK]] 2034; 2035 %cmp = icmp ne i32 %a, %b 2036 %zext.cmp = zext i1 %cmp to i32 2037 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2038 ret i64 %mask 2039} 2040 2041define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { 2042; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32( 2043; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41) 2044; CHECK-NEXT: ret i64 [[MASK]] 2045; 2046 %cmp = icmp sle i32 %a, %b 2047 %zext.cmp = zext i1 %cmp to i32 2048 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2049 ret i64 %mask 2050} 2051 2052define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) { 2053; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64( 2054; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) 2055; CHECK-NEXT: ret i64 [[MASK]] 2056; 2057 %cmp = icmp ugt i64 %a, %b 2058 %zext.cmp = zext i1 %cmp to i32 2059 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2060 ret i64 %mask 2061} 2062 2063define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) { 2064; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64( 2065; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) 2066; CHECK-NEXT: ret i64 [[MASK]] 2067; 2068 %cmp = icmp ugt i64 %a, %b 2069 %zext.cmp = zext i1 %cmp to i32 2070 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33) 2071 ret i64 %mask 2072} 2073 2074define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) { 2075; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32( 2076; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1) 2077; CHECK-NEXT: ret i64 [[MASK]] 2078; 2079 %cmp = fcmp oeq float %a, %b 2080 %zext.cmp = zext i1 %cmp to i32 2081 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2082 ret i64 %mask 2083} 2084 2085define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) { 2086; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32( 2087; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) 2088; CHECK-NEXT: ret i64 [[MASK]] 2089; 2090 %cmp = fcmp une float %a, %b 2091 %zext.cmp = zext i1 %cmp to i32 2092 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2093 ret i64 %mask 2094} 2095 2096define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) { 2097; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64( 2098; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4) 2099; CHECK-NEXT: ret i64 [[MASK]] 2100; 2101 %cmp = fcmp olt double %a, %b 2102 %zext.cmp = zext i1 %cmp to i32 2103 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2104 ret i64 %mask 2105} 2106 2107define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) { 2108; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32( 2109; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) 2110; CHECK-NEXT: ret i64 [[MASK]] 2111; 2112 %cmp = icmp eq i32 %a, %b 2113 %sext.cmp = sext i1 %cmp to i32 2114 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33) 2115 ret i64 %mask 2116} 2117 2118define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) { 2119; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32( 2120; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) 2121; CHECK-NEXT: ret i64 [[MASK]] 2122; 2123 %cmp = icmp eq i32 %a, %b 2124 %zext.cmp = zext i1 %cmp to i32 2125 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) 2126 ret i64 %mask 2127} 2128 2129define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) { 2130; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32( 2131; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) 2132; CHECK-NEXT: ret i64 [[MASK]] 2133; 2134 %cmp = icmp slt i32 %a, %b 2135 %zext.cmp = zext i1 %cmp to i32 2136 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) 2137 ret i64 %mask 2138} 2139 2140define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) { 2141; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32( 2142; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) 2143; CHECK-NEXT: ret i64 [[MASK]] 2144; 2145 %cmp = fcmp oeq float %a, %b 2146 %zext.cmp = zext i1 %cmp to i32 2147 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) 2148 ret i64 %mask 2149} 2150 2151define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) { 2152; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32( 2153; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2) 2154; CHECK-NEXT: ret i64 [[MASK]] 2155; 2156 %cmp = fcmp ule float %a, %b 2157 %zext.cmp = zext i1 %cmp to i32 2158 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) 2159 ret i64 %mask 2160} 2161 2162define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) { 2163; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32( 2164; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13) 2165; CHECK-NEXT: ret i64 [[MASK]] 2166; 2167 %cmp = fcmp ogt float %a, %b 2168 %zext.cmp = zext i1 %cmp to i32 2169 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) 2170 ret i64 %mask 2171} 2172 2173define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) { 2174; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32( 2175; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) 2176; CHECK-NEXT: ret i64 [[MASK]] 2177; 2178 %cmp = icmp eq i32 %a, %b 2179 %zext.cmp = zext i1 %cmp to i32 2180 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32) 2181 ret i64 %mask 2182} 2183 2184define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) { 2185; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32( 2186; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 2187; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33) 2188; CHECK-NEXT: ret i64 [[MASK]] 2189; 2190 %zext.cond = zext i1 %cond to i32 2191 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32) 2192 ret i64 %mask 2193} 2194 2195define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) { 2196; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32( 2197; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 2198; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32) 2199; CHECK-NEXT: ret i64 [[MASK]] 2200; 2201 %zext.cond = zext i1 %cond to i32 2202 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32) 2203 ret i64 %mask 2204} 2205 2206define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) { 2207; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32( 2208; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 2209; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32) 2210; CHECK-NEXT: ret i64 [[MASK]] 2211; 2212 %sext.cond = sext i1 %cond to i32 2213 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32) 2214 ret i64 %mask 2215} 2216 2217define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) { 2218; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32( 2219; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 2220; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33) 2221; CHECK-NEXT: ret i64 [[MASK]] 2222; 2223 %sext.cond = sext i1 %cond to i32 2224 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32) 2225 ret i64 %mask 2226} 2227 2228define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) { 2229; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64( 2230; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64 2231; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33) 2232; CHECK-NEXT: ret i64 [[MASK]] 2233; 2234 %sext.cond = sext i1 %cond to i64 2235 %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32) 2236 ret i64 %mask 2237} 2238 2239; TODO: Should be able to fold to false 2240define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) { 2241; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32( 2242; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] 2243; CHECK-NEXT: [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32 2244; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32) 2245; CHECK-NEXT: ret i64 [[MASK]] 2246; 2247 %cmp = icmp eq i32 %a, %b 2248 %sext.cmp = sext i1 %cmp to i32 2249 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32) 2250 ret i64 %mask 2251} 2252 2253define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) { 2254; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32( 2255; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) 2256; CHECK-NEXT: ret i64 [[MASK]] 2257; 2258 %cmp = icmp eq i32 %a, %b 2259 %sext.cmp = sext i1 %cmp to i32 2260 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) 2261 ret i64 %mask 2262} 2263 2264define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) { 2265; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32( 2266; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) 2267; CHECK-NEXT: ret i64 [[MASK]] 2268; 2269 %cmp = icmp sge i32 %a, %b 2270 %sext.cmp = sext i1 %cmp to i32 2271 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) 2272 ret i64 %mask 2273} 2274 2275define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { 2276; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32( 2277; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38) 2278; CHECK-NEXT: ret i64 [[MASK]] 2279; 2280 %cmp = icmp sle i32 %a, %b 2281 %not = xor i1 %cmp, true 2282 %zext.cmp = zext i1 %not to i32 2283 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2284 ret i64 %mask 2285} 2286 2287define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) { 2288; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4( 2289; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 2290; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 2291; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) 2292; CHECK-NEXT: ret i64 [[MASK]] 2293; 2294 %cmp = icmp eq i4 %a, %b 2295 %zext.cmp = zext i1 %cmp to i32 2296 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2297 ret i64 %mask 2298} 2299 2300define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) { 2301; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8( 2302; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 2303; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 2304; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) 2305; CHECK-NEXT: ret i64 [[MASK]] 2306; 2307 %cmp = icmp eq i8 %a, %b 2308 %zext.cmp = zext i1 %cmp to i32 2309 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2310 ret i64 %mask 2311} 2312 2313define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) { 2314; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16( 2315; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32) 2316; CHECK-NEXT: ret i64 [[MASK]] 2317; 2318 %cmp = icmp eq i16 %a, %b 2319 %zext.cmp = zext i1 %cmp to i32 2320 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2321 ret i64 %mask 2322} 2323 2324define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) { 2325; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36( 2326; CHECK-NEXT: [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64 2327; CHECK-NEXT: [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64 2328; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32) 2329; CHECK-NEXT: ret i64 [[MASK]] 2330; 2331 %cmp = icmp eq i36 %a, %b 2332 %zext.cmp = zext i1 %cmp to i32 2333 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2334 ret i64 %mask 2335} 2336 2337define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) { 2338; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128( 2339; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] 2340; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 2341; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) 2342; CHECK-NEXT: ret i64 [[MASK]] 2343; 2344 %cmp = icmp eq i128 %a, %b 2345 %zext.cmp = zext i1 %cmp to i32 2346 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2347 ret i64 %mask 2348} 2349 2350define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) { 2351; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16( 2352; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1) 2353; CHECK-NEXT: ret i64 [[MASK]] 2354; 2355 %cmp = fcmp oeq half %a, %b 2356 %zext.cmp = zext i1 %cmp to i32 2357 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2358 ret i64 %mask 2359} 2360 2361define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) { 2362; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128( 2363; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] 2364; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 2365; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) 2366; CHECK-NEXT: ret i64 [[MASK]] 2367; 2368 %cmp = fcmp oeq fp128 %a, %b 2369 %zext.cmp = zext i1 %cmp to i32 2370 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2371 ret i64 %mask 2372} 2373 2374define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) { 2375; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4( 2376; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16 2377; CHECK-NEXT: [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16 2378; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) 2379; CHECK-NEXT: ret i64 [[MASK]] 2380; 2381 %cmp = icmp slt i4 %a, %b 2382 %zext.cmp = zext i1 %cmp to i32 2383 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2384 ret i64 %mask 2385} 2386 2387define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) { 2388; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8( 2389; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16 2390; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16 2391; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) 2392; CHECK-NEXT: ret i64 [[MASK]] 2393; 2394 %cmp = icmp slt i8 %a, %b 2395 %zext.cmp = zext i1 %cmp to i32 2396 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2397 ret i64 %mask 2398} 2399 2400define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) { 2401; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16( 2402; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40) 2403; CHECK-NEXT: ret i64 [[MASK]] 2404; 2405 %cmp = icmp slt i16 %a, %b 2406 %zext.cmp = zext i1 %cmp to i32 2407 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2408 ret i64 %mask 2409} 2410 2411define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) { 2412; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4( 2413; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 2414; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 2415; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) 2416; CHECK-NEXT: ret i64 [[MASK]] 2417; 2418 %cmp = icmp ult i4 %a, %b 2419 %zext.cmp = zext i1 %cmp to i32 2420 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2421 ret i64 %mask 2422} 2423 2424define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) { 2425; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8( 2426; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 2427; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 2428; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) 2429; CHECK-NEXT: ret i64 [[MASK]] 2430; 2431 %cmp = icmp ult i8 %a, %b 2432 %zext.cmp = zext i1 %cmp to i32 2433 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2434 ret i64 %mask 2435} 2436 2437define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) { 2438; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16( 2439; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36) 2440; CHECK-NEXT: ret i64 [[MASK]] 2441; 2442 %cmp = icmp ult i16 %a, %b 2443 %zext.cmp = zext i1 %cmp to i32 2444 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) 2445 ret i64 %mask 2446} 2447 2448; 1-bit NE comparisons 2449 2450define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) { 2451; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1( 2452; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] 2453; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2454; CHECK-NEXT: ret i64 [[MASK]] 2455; 2456 %cmp = icmp eq i32 %a, %b 2457 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2458 ret i64 %mask 2459} 2460 2461define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) { 2462; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1( 2463; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] 2464; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2465; CHECK-NEXT: ret i64 [[MASK]] 2466; 2467 %cmp = icmp ne i32 %a, %b 2468 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2469 ret i64 %mask 2470} 2471 2472define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) { 2473; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1( 2474; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]] 2475; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2476; CHECK-NEXT: ret i64 [[MASK]] 2477; 2478 %cmp = icmp sle i32 %a, %b 2479 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2480 ret i64 %mask 2481} 2482 2483define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) { 2484; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64( 2485; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] 2486; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2487; CHECK-NEXT: ret i64 [[MASK]] 2488; 2489 %cmp = icmp ugt i64 %a, %b 2490 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2491 ret i64 %mask 2492} 2493 2494define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) { 2495; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64( 2496; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] 2497; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2498; CHECK-NEXT: ret i64 [[MASK]] 2499; 2500 %cmp = icmp ugt i64 %a, %b 2501 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33) 2502 ret i64 %mask 2503} 2504 2505define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) { 2506; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32( 2507; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]] 2508; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2509; CHECK-NEXT: ret i64 [[MASK]] 2510; 2511 %cmp = fcmp oeq float %a, %b 2512 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2513 ret i64 %mask 2514} 2515 2516define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) { 2517; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32( 2518; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]] 2519; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2520; CHECK-NEXT: ret i64 [[MASK]] 2521; 2522 %cmp = fcmp une float %a, %b 2523 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2524 ret i64 %mask 2525} 2526 2527define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) { 2528; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64( 2529; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] 2530; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2531; CHECK-NEXT: ret i64 [[MASK]] 2532; 2533 %cmp = fcmp olt double %a, %b 2534 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2535 ret i64 %mask 2536} 2537 2538define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) { 2539; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4( 2540; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]] 2541; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2542; CHECK-NEXT: ret i64 [[MASK]] 2543; 2544 %cmp = icmp eq i4 %a, %b 2545 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2546 ret i64 %mask 2547} 2548 2549define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) { 2550; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8( 2551; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]] 2552; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2553; CHECK-NEXT: ret i64 [[MASK]] 2554; 2555 %cmp = icmp eq i8 %a, %b 2556 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2557 ret i64 %mask 2558} 2559 2560define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) { 2561; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16( 2562; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]] 2563; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2564; CHECK-NEXT: ret i64 [[MASK]] 2565; 2566 %cmp = icmp eq i16 %a, %b 2567 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2568 ret i64 %mask 2569} 2570 2571define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) { 2572; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36( 2573; CHECK-NEXT: [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]] 2574; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2575; CHECK-NEXT: ret i64 [[MASK]] 2576; 2577 %cmp = icmp eq i36 %a, %b 2578 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2579 ret i64 %mask 2580} 2581 2582define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) { 2583; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128( 2584; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] 2585; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2586; CHECK-NEXT: ret i64 [[MASK]] 2587; 2588 %cmp = icmp eq i128 %a, %b 2589 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2590 ret i64 %mask 2591} 2592 2593define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) { 2594; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16( 2595; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]] 2596; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2597; CHECK-NEXT: ret i64 [[MASK]] 2598; 2599 %cmp = fcmp oeq half %a, %b 2600 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2601 ret i64 %mask 2602} 2603 2604define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) { 2605; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128( 2606; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] 2607; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2608; CHECK-NEXT: ret i64 [[MASK]] 2609; 2610 %cmp = fcmp oeq fp128 %a, %b 2611 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2612 ret i64 %mask 2613} 2614 2615define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) { 2616; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4( 2617; CHECK-NEXT: [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]] 2618; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2619; CHECK-NEXT: ret i64 [[MASK]] 2620; 2621 %cmp = icmp slt i4 %a, %b 2622 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2623 ret i64 %mask 2624} 2625 2626define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) { 2627; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8( 2628; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]] 2629; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2630; CHECK-NEXT: ret i64 [[MASK]] 2631; 2632 %cmp = icmp slt i8 %a, %b 2633 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2634 ret i64 %mask 2635} 2636 2637define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) { 2638; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16( 2639; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] 2640; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2641; CHECK-NEXT: ret i64 [[MASK]] 2642; 2643 %cmp = icmp slt i16 %a, %b 2644 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2645 ret i64 %mask 2646} 2647 2648define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) { 2649; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4( 2650; CHECK-NEXT: [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]] 2651; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2652; CHECK-NEXT: ret i64 [[MASK]] 2653; 2654 %cmp = icmp ult i4 %a, %b 2655 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2656 ret i64 %mask 2657} 2658 2659define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) { 2660; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8( 2661; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]] 2662; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2663; CHECK-NEXT: ret i64 [[MASK]] 2664; 2665 %cmp = icmp ult i8 %a, %b 2666 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2667 ret i64 %mask 2668} 2669 2670define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) { 2671; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16( 2672; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]] 2673; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) 2674; CHECK-NEXT: ret i64 [[MASK]] 2675; 2676 %cmp = icmp ult i16 %a, %b 2677 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) 2678 ret i64 %mask 2679} 2680 2681; -------------------------------------------------------------------- 2682; llvm.amdgcn.fcmp 2683; -------------------------------------------------------------------- 2684 2685declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent 2686 2687define i64 @invalid_fcmp_code(float %a, float %b) { 2688; CHECK-LABEL: @invalid_fcmp_code( 2689; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1) 2690; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16) 2691; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] 2692; CHECK-NEXT: ret i64 [[OR]] 2693; 2694 %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1) 2695 %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16) 2696 %or = or i64 %under, %over 2697 ret i64 %or 2698} 2699 2700define i64 @fcmp_constant_inputs_false() { 2701; CHECK-LABEL: @fcmp_constant_inputs_false( 2702; CHECK-NEXT: ret i64 0 2703; 2704 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1) 2705 ret i64 %result 2706} 2707 2708define i64 @fcmp_constant_inputs_true() { 2709; CHECK-LABEL: @fcmp_constant_inputs_true( 2710; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]] 2711; CHECK-NEXT: ret i64 [[RESULT]] 2712; 2713 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) 2714 ret i64 %result 2715} 2716 2717define i64 @fcmp_constant_to_rhs_olt(float %x) { 2718; CHECK-LABEL: @fcmp_constant_to_rhs_olt( 2719; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2) 2720; CHECK-NEXT: ret i64 [[RESULT]] 2721; 2722 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4) 2723 ret i64 %result 2724} 2725 2726; -------------------------------------------------------------------- 2727; llvm.amdgcn.ballot 2728; -------------------------------------------------------------------- 2729 2730declare i64 @llvm.amdgcn.ballot.i64(i1) nounwind readnone convergent 2731declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent 2732 2733define i64 @ballot_nocombine_64(i1 %i) { 2734; CHECK-LABEL: @ballot_nocombine_64( 2735; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) 2736; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 2737; CHECK-NEXT: ret i64 [[B]] 2738; 2739 %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i) 2740 ret i64 %b 2741} 2742 2743define i64 @ballot_zero_64() { 2744; CHECK-LABEL: @ballot_zero_64( 2745; CHECK-NEXT: ret i64 0 2746; 2747 %b = call i64 @llvm.amdgcn.ballot.i64(i1 0) 2748 ret i64 %b 2749} 2750 2751define i64 @ballot_one_64() { 2752; CHECK-LABEL: @ballot_one_64( 2753; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) 2754; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 2755; CHECK-NEXT: ret i64 [[B]] 2756; 2757 %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) 2758 ret i64 %b 2759} 2760 2761define i32 @ballot_nocombine_32(i1 %i) { 2762; CHECK-LABEL: @ballot_nocombine_32( 2763; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) 2764; CHECK-NEXT: ret i32 [[B]] 2765; 2766 %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i) 2767 ret i32 %b 2768} 2769 2770define i32 @ballot_zero_32() { 2771; CHECK-LABEL: @ballot_zero_32( 2772; CHECK-NEXT: ret i32 0 2773; 2774 %b = call i32 @llvm.amdgcn.ballot.i32(i1 0) 2775 ret i32 %b 2776} 2777 2778define i32 @ballot_one_32() { 2779; CHECK-LABEL: @ballot_one_32( 2780; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) 2781; CHECK-NEXT: ret i32 [[B]] 2782; 2783 %b = call i32 @llvm.amdgcn.ballot.i32(i1 1) 2784 ret i32 %b 2785} 2786 2787; -------------------------------------------------------------------- 2788; llvm.amdgcn.wqm.vote 2789; -------------------------------------------------------------------- 2790 2791declare i1 @llvm.amdgcn.wqm.vote(i1) 2792 2793define float @wqm_vote_true() { 2794; CHECK-LABEL: @wqm_vote_true( 2795; CHECK-NEXT: main_body: 2796; CHECK-NEXT: ret float 1.000000e+00 2797; 2798main_body: 2799 %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) 2800 %r = select i1 %w, float 1.0, float 0.0 2801 ret float %r 2802} 2803 2804define float @wqm_vote_false() { 2805; CHECK-LABEL: @wqm_vote_false( 2806; CHECK-NEXT: main_body: 2807; CHECK-NEXT: ret float 0.000000e+00 2808; 2809main_body: 2810 %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) 2811 %r = select i1 %w, float 1.0, float 0.0 2812 ret float %r 2813} 2814 2815define float @wqm_vote_undef() { 2816; CHECK-LABEL: @wqm_vote_undef( 2817; CHECK-NEXT: main_body: 2818; CHECK-NEXT: ret float 0.000000e+00 2819; 2820main_body: 2821 %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef) 2822 %r = select i1 %w, float 1.0, float 0.0 2823 ret float %r 2824} 2825 2826; -------------------------------------------------------------------- 2827; llvm.amdgcn.kill 2828; -------------------------------------------------------------------- 2829 2830declare void @llvm.amdgcn.kill(i1) 2831 2832define void @kill_true() { 2833; CHECK-LABEL: @kill_true( 2834; CHECK-NEXT: ret void 2835; 2836 call void @llvm.amdgcn.kill(i1 true) 2837 ret void 2838} 2839 2840; -------------------------------------------------------------------- 2841; llvm.amdgcn.readfirstlane 2842; -------------------------------------------------------------------- 2843 2844declare i32 @llvm.amdgcn.readfirstlane(i32) 2845 2846@gv = constant i32 0 2847 2848define amdgpu_kernel void @readfirstlane_constant(i32 %arg) { 2849; CHECK-LABEL: @readfirstlane_constant( 2850; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 2851; CHECK-NEXT: store volatile i32 [[VAR]], ptr undef, align 4 2852; CHECK-NEXT: store volatile i32 0, ptr undef, align 4 2853; CHECK-NEXT: store volatile i32 123, ptr undef, align 4 2854; CHECK-NEXT: store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4 2855; CHECK-NEXT: store volatile i32 undef, ptr undef, align 4 2856; CHECK-NEXT: ret void 2857; 2858 %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 2859 %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0) 2860 %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123) 2861 %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (ptr @gv to i32)) 2862 %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef) 2863 store volatile i32 %var, ptr undef 2864 store volatile i32 %zero, ptr undef 2865 store volatile i32 %imm, ptr undef 2866 store volatile i32 %constexpr, ptr undef 2867 store volatile i32 %undef, ptr undef 2868 ret void 2869} 2870 2871define i32 @readfirstlane_idempotent(i32 %arg) { 2872; CHECK-LABEL: @readfirstlane_idempotent( 2873; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 2874; CHECK-NEXT: ret i32 [[READ0]] 2875; 2876 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 2877 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) 2878 %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1) 2879 ret i32 %read2 2880} 2881 2882define i32 @readfirstlane_readlane(i32 %arg) { 2883; CHECK-LABEL: @readfirstlane_readlane( 2884; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 2885; CHECK-NEXT: ret i32 [[READ0]] 2886; 2887 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 2888 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) 2889 ret i32 %read1 2890} 2891 2892define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) { 2893; CHECK-LABEL: @readfirstlane_readfirstlane_different_block( 2894; CHECK-NEXT: bb0: 2895; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 2896; CHECK-NEXT: br label [[BB1:%.*]] 2897; CHECK: bb1: 2898; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[READ0]]) 2899; CHECK-NEXT: ret i32 [[READ1]] 2900; 2901bb0: 2902 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 2903 br label %bb1 2904 2905bb1: 2906 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) 2907 ret i32 %read1 2908} 2909 2910define i32 @readfirstlane_readlane_different_block(i32 %arg) { 2911; CHECK-LABEL: @readfirstlane_readlane_different_block( 2912; CHECK-NEXT: bb0: 2913; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 0) 2914; CHECK-NEXT: br label [[BB1:%.*]] 2915; CHECK: bb1: 2916; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[READ0]]) 2917; CHECK-NEXT: ret i32 [[READ1]] 2918; 2919bb0: 2920 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0) 2921 br label %bb1 2922 2923bb1: 2924 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) 2925 ret i32 %read1 2926} 2927 2928; -------------------------------------------------------------------- 2929; llvm.amdgcn.readlane 2930; -------------------------------------------------------------------- 2931 2932declare i32 @llvm.amdgcn.readlane(i32, i32) 2933 2934define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) { 2935; CHECK-LABEL: @readlane_constant( 2936; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 7) 2937; CHECK-NEXT: store volatile i32 [[VAR]], ptr undef, align 4 2938; CHECK-NEXT: store volatile i32 0, ptr undef, align 4 2939; CHECK-NEXT: store volatile i32 123, ptr undef, align 4 2940; CHECK-NEXT: store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4 2941; CHECK-NEXT: store volatile i32 undef, ptr undef, align 4 2942; CHECK-NEXT: ret void 2943; 2944 %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7) 2945 %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane) 2946 %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane) 2947 %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (ptr @gv to i32), i32 %lane) 2948 %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane) 2949 store volatile i32 %var, ptr undef 2950 store volatile i32 %zero, ptr undef 2951 store volatile i32 %imm, ptr undef 2952 store volatile i32 %constexpr, ptr undef 2953 store volatile i32 %undef, ptr undef 2954 ret void 2955} 2956 2957define i32 @readlane_idempotent(i32 %arg, i32 %lane) { 2958; CHECK-LABEL: @readlane_idempotent( 2959; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) 2960; CHECK-NEXT: ret i32 [[READ0]] 2961; 2962 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) 2963 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) 2964 ret i32 %read1 2965} 2966 2967define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) { 2968; CHECK-LABEL: @readlane_idempotent_different_lanes( 2969; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE0:%.*]]) 2970; CHECK-NEXT: ret i32 [[READ0]] 2971; 2972 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0) 2973 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1) 2974 ret i32 %read1 2975} 2976 2977define i32 @readlane_readfirstlane(i32 %arg) { 2978; CHECK-LABEL: @readlane_readfirstlane( 2979; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 2980; CHECK-NEXT: ret i32 [[READ0]] 2981; 2982 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 2983 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) 2984 ret i32 %read1 2985} 2986 2987define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) { 2988; CHECK-LABEL: @readlane_idempotent_different_block( 2989; CHECK-NEXT: bb0: 2990; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) 2991; CHECK-NEXT: br label [[BB1:%.*]] 2992; CHECK: bb1: 2993; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[READ0]], i32 [[LANE]]) 2994; CHECK-NEXT: ret i32 [[READ1]] 2995; 2996bb0: 2997 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) 2998 br label %bb1 2999 3000bb1: 3001 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) 3002 ret i32 %read1 3003} 3004 3005 3006define i32 @readlane_readfirstlane_different_block(i32 %arg) { 3007; CHECK-LABEL: @readlane_readfirstlane_different_block( 3008; CHECK-NEXT: bb0: 3009; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) 3010; CHECK-NEXT: br label [[BB1:%.*]] 3011; CHECK: bb1: 3012; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[READ0]], i32 0) 3013; CHECK-NEXT: ret i32 [[READ1]] 3014; 3015bb0: 3016 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) 3017 br label %bb1 3018 3019bb1: 3020 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) 3021 ret i32 %read1 3022} 3023 3024; -------------------------------------------------------------------- 3025; llvm.amdgcn.update.dpp.i32 3026; -------------------------------------------------------------------- 3027 3028declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) 3029 3030define amdgpu_kernel void @update_dpp_no_combine(ptr addrspace(1) %out, i32 %in1, i32 %in2) { 3031; CHECK-LABEL: @update_dpp_no_combine( 3032; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false) 3033; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 3034; CHECK-NEXT: ret void 3035; 3036 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) 3037 store i32 %tmp0, ptr addrspace(1) %out 3038 ret void 3039} 3040 3041define amdgpu_kernel void @update_dpp_drop_old(ptr addrspace(1) %out, i32 %in1, i32 %in2) { 3042; CHECK-LABEL: @update_dpp_drop_old( 3043; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true) 3044; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 3045; CHECK-NEXT: ret void 3046; 3047 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1) 3048 store i32 %tmp0, ptr addrspace(1) %out 3049 ret void 3050} 3051 3052define amdgpu_kernel void @update_dpp_undef_old(ptr addrspace(1) %out, i32 %in1) { 3053; CHECK-LABEL: @update_dpp_undef_old( 3054; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true) 3055; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 3056; CHECK-NEXT: ret void 3057; 3058 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1) 3059 store i32 %tmp0, ptr addrspace(1) %out 3060 ret void 3061} 3062 3063 3064; -------------------------------------------------------------------- 3065; llvm.amdgcn.permlane16 3066; -------------------------------------------------------------------- 3067 3068declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1 immarg, i1 immarg) 3069 3070define amdgpu_kernel void @permlane16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3071; CHECK-LABEL: @permlane16( 3072; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) 3073; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3074; CHECK-NEXT: ret void 3075; 3076 %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) 3077 store i32 %res, ptr addrspace(1) %out 3078 ret void 3079} 3080 3081define amdgpu_kernel void @permlane16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3082; CHECK-LABEL: @permlane16_bound_ctrl( 3083; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) 3084; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3085; CHECK-NEXT: ret void 3086; 3087 %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) 3088 store i32 %res, ptr addrspace(1) %out 3089 ret void 3090} 3091 3092define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3093; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl( 3094; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) 3095; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3096; CHECK-NEXT: ret void 3097; 3098 %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) 3099 store i32 %res, ptr addrspace(1) %out 3100 ret void 3101} 3102 3103; -------------------------------------------------------------------- 3104; llvm.amdgcn.permlanex16 3105; -------------------------------------------------------------------- 3106 3107declare i32 @llvm.amdgcn.permlanex16.i32(i32, i32, i32, i32, i1 immarg, i1 immarg) 3108 3109define amdgpu_kernel void @permlanex16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3110; CHECK-LABEL: @permlanex16( 3111; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) 3112; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3113; CHECK-NEXT: ret void 3114; 3115 %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) 3116 store i32 %res, ptr addrspace(1) %out 3117 ret void 3118} 3119 3120define amdgpu_kernel void @permlanex16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3121; CHECK-LABEL: @permlanex16_bound_ctrl( 3122; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) 3123; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3124; CHECK-NEXT: ret void 3125; 3126 %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) 3127 store i32 %res, ptr addrspace(1) %out 3128 ret void 3129} 3130 3131define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { 3132; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl( 3133; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) 3134; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3135; CHECK-NEXT: ret void 3136; 3137 %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) 3138 store i32 %res, ptr addrspace(1) %out 3139 ret void 3140} 3141 3142; -------------------------------------------------------------------- 3143; llvm.amdgcn.permlane64 3144; -------------------------------------------------------------------- 3145 3146define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src0) { 3147; CHECK-LABEL: @permlane64_uniform( 3148; CHECK-NEXT: [[SRC1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0:%.*]]) 3149; CHECK-NEXT: store i32 [[SRC1]], ptr addrspace(1) [[OUT:%.*]], align 4 3150; CHECK-NEXT: ret void 3151; 3152 %src1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src0) 3153 %res = call i32 @llvm.amdgcn.permlane64(i32 %src1) 3154 store i32 %res, ptr addrspace(1) %out 3155 ret void 3156} 3157 3158; -------------------------------------------------------------------- 3159; llvm.amdgcn.image.sample a16 3160; -------------------------------------------------------------------- 3161 3162declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3163declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3164declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3165declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3166declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3167declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3168 3169declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3170declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3171declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3172declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3173declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3174declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3175 3176declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3177declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3178declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3179declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3180declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3181declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3182declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3183declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3184 3185declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3186declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3187declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3188declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3189declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3190declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3191declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3192declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3193declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3194 3195declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3196declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3197declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3198declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3199declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3200declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3201declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3202declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3203 3204declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3205declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3206declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3207declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3208 3209declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3210declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3211declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3212declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3213 3214declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3215declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 3216 3217define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 3218; CHECK-LABEL: @image_sample_a16_1d( 3219; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3220; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3221; CHECK-NEXT: ret void 3222; 3223 %s32 = fpext half %s to float 3224 %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3225 store <4 x float> %res, ptr addrspace(1) %out 3226 ret void 3227} 3228 3229define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 3230; CHECK-LABEL: @image_sample_a16_2d( 3231; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3232; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3233; CHECK-NEXT: ret void 3234; 3235 %s32 = fpext half %s to float 3236 %t32 = fpext half %t to float 3237 %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3238 store <4 x float> %res, ptr addrspace(1) %out 3239 ret void 3240} 3241 3242define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { 3243; CHECK-LABEL: @image_sample_a16_3d( 3244; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3245; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3246; CHECK-NEXT: ret void 3247; 3248 %s32 = fpext half %s to float 3249 %t32 = fpext half %t to float 3250 %r32 = fpext half %r to float 3251 %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3252 store <4 x float> %res, ptr addrspace(1) %out 3253 ret void 3254} 3255 3256define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 3257; 3258; CHECK-LABEL: @image_sample_a16_cube( 3259; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3260; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3261; CHECK-NEXT: ret void 3262; 3263 %s32 = fpext half %s to float 3264 %t32 = fpext half %t to float 3265 %face32 = fpext half %face to float 3266 %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3267 store <4 x float> %res, ptr addrspace(1) %out 3268 ret void 3269} 3270 3271define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { 3272; CHECK-LABEL: @image_sample_a16_1darray( 3273; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3274; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3275; CHECK-NEXT: ret void 3276; 3277 %s32 = fpext half %s to float 3278 %slice32 = fpext half %slice to float 3279 %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3280 store <4 x float> %res, ptr addrspace(1) %out 3281 ret void 3282} 3283 3284define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 3285; CHECK-LABEL: @image_sample_a16_2darray( 3286; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3287; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3288; CHECK-NEXT: ret void 3289; 3290 %s32 = fpext half %s to float 3291 %t32 = fpext half %t to float 3292 %slice32 = fpext half %slice to float 3293 %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3294 store <4 x float> %res, ptr addrspace(1) %out 3295 ret void 3296} 3297 3298define amdgpu_kernel void @image_sample_a16_c_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 3299; CHECK-LABEL: @image_sample_a16_c_1d( 3300; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3301; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3302; CHECK-NEXT: ret void 3303; 3304 %s32 = fpext half %s to float 3305 %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3306 store <4 x float> %res, ptr addrspace(1) %out 3307 ret void 3308} 3309 3310define amdgpu_kernel void @image_sample_a16_c_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 3311; CHECK-LABEL: @image_sample_a16_c_2d( 3312; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3313; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3314; CHECK-NEXT: ret void 3315; 3316 %s32 = fpext half %s to float 3317 %t32 = fpext half %t to float 3318 %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3319 store <4 x float> %res, ptr addrspace(1) %out 3320 ret void 3321} 3322 3323define amdgpu_kernel void @image_sample_a16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { 3324; CHECK-LABEL: @image_sample_a16_cl_1d( 3325; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3326; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3327; CHECK-NEXT: ret void 3328; 3329 %s32 = fpext half %s to float 3330 %clamp32 = fpext half %clamp to float 3331 %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3332 store <4 x float> %res, ptr addrspace(1) %out 3333 ret void 3334} 3335 3336define amdgpu_kernel void @image_sample_a16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 3337; CHECK-LABEL: @image_sample_a16_cl_2d( 3338; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3339; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3340; CHECK-NEXT: ret void 3341; 3342 %s32 = fpext half %s to float 3343 %t32 = fpext half %t to float 3344 %clamp32 = fpext half %clamp to float 3345 %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3346 store <4 x float> %res, ptr addrspace(1) %out 3347 ret void 3348} 3349 3350define amdgpu_kernel void @image_sample_a16_c_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { 3351; CHECK-LABEL: @image_sample_a16_c_cl_1d( 3352; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3353; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3354; CHECK-NEXT: ret void 3355; 3356 %s32 = fpext half %s to float 3357 %clamp32 = fpext half %clamp to float 3358 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3359 store <4 x float> %res, ptr addrspace(1) %out 3360 ret void 3361} 3362 3363define amdgpu_kernel void @image_sample_a16_c_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 3364; CHECK-LABEL: @image_sample_a16_c_cl_2d( 3365; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3366; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3367; CHECK-NEXT: ret void 3368; 3369 %s32 = fpext half %s to float 3370 %t32 = fpext half %t to float 3371 %clamp32 = fpext half %clamp to float 3372 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3373 store <4 x float> %res, ptr addrspace(1) %out 3374 ret void 3375} 3376 3377define amdgpu_kernel void @image_sample_a16_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { 3378; CHECK-LABEL: @image_sample_a16_b16_1d( 3379; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3380; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3381; CHECK-NEXT: ret void 3382; 3383 %bias32 = fpext half %bias to float 3384 %s32 = fpext half %s to float 3385 %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3386 store <4 x float> %res, ptr addrspace(1) %out 3387 ret void 3388} 3389 3390define amdgpu_kernel void @image_sample_a16_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { 3391; CHECK-LABEL: @image_sample_a16_b32_1d( 3392; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3393; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3394; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3395; CHECK-NEXT: ret void 3396; 3397 %s32 = fpext half %s to float 3398 %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3399 store <4 x float> %res, ptr addrspace(1) %out 3400 ret void 3401} 3402 3403define amdgpu_kernel void @image_sample_a16_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { 3404; CHECK-LABEL: @image_sample_a16_b16_2d( 3405; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3406; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3407; CHECK-NEXT: ret void 3408; 3409 %bias32 = fpext half %bias to float 3410 %s32 = fpext half %s to float 3411 %t32 = fpext half %t to float 3412 %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3413 store <4 x float> %res, ptr addrspace(1) %out 3414 ret void 3415} 3416 3417define amdgpu_kernel void @image_sample_a16_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { 3418; CHECK-LABEL: @image_sample_a16_b32_2d( 3419; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3420; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float 3421; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3422; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3423; CHECK-NEXT: ret void 3424; 3425 %s32 = fpext half %s to float 3426 %t32 = fpext half %t to float 3427 %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3428 store <4 x float> %res, ptr addrspace(1) %out 3429 ret void 3430} 3431 3432define amdgpu_kernel void @image_sample_a16_c_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { 3433; CHECK-LABEL: @image_sample_a16_c_b16_1d( 3434; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3435; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3436; CHECK-NEXT: ret void 3437; 3438 %bias32 = fpext half %bias to float 3439 %s32 = fpext half %s to float 3440 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3441 store <4 x float> %res, ptr addrspace(1) %out 3442 ret void 3443} 3444 3445define amdgpu_kernel void @image_sample_a16_c_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { 3446; CHECK-LABEL: @image_sample_a16_c_b32_1d( 3447; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3448; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3449; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3450; CHECK-NEXT: ret void 3451; 3452 %s32 = fpext half %s to float 3453 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3454 store <4 x float> %res, ptr addrspace(1) %out 3455 ret void 3456} 3457 3458define amdgpu_kernel void @image_sample_a16_c_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { 3459; CHECK-LABEL: @image_sample_a16_c_b16_2d( 3460; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3461; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3462; CHECK-NEXT: ret void 3463; 3464 %bias32 = fpext half %bias to float 3465 %s32 = fpext half %s to float 3466 %t32 = fpext half %t to float 3467 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3468 store <4 x float> %res, ptr addrspace(1) %out 3469 ret void 3470} 3471 3472define amdgpu_kernel void @image_sample_a16_c_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { 3473; CHECK-LABEL: @image_sample_a16_c_b32_2d( 3474; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3475; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float 3476; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3477; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3478; CHECK-NEXT: ret void 3479; 3480 %s32 = fpext half %s to float 3481 %t32 = fpext half %t to float 3482 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3483 store <4 x float> %res, ptr addrspace(1) %out 3484 ret void 3485} 3486 3487define amdgpu_kernel void @image_sample_a16_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { 3488; CHECK-LABEL: @image_sample_a16_b16_cl_1d( 3489; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3490; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3491; CHECK-NEXT: ret void 3492; 3493 %bias32 = fpext half %bias to float 3494 %s32 = fpext half %s to float 3495 %clamp32 = fpext half %clamp to float 3496 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3497 store <4 x float> %res, ptr addrspace(1) %out 3498 ret void 3499} 3500 3501define amdgpu_kernel void @image_sample_a16_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { 3502; CHECK-LABEL: @image_sample_a16_b32_cl_1d( 3503; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3504; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float 3505; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3506; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3507; CHECK-NEXT: ret void 3508; 3509 %s32 = fpext half %s to float 3510 %clamp32 = fpext half %clamp to float 3511 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3512 store <4 x float> %res, ptr addrspace(1) %out 3513 ret void 3514} 3515 3516define amdgpu_kernel void @image_sample_a16_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { 3517; CHECK-LABEL: @image_sample_a16_b16_cl_2d( 3518; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3519; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3520; CHECK-NEXT: ret void 3521; 3522 %bias32 = fpext half %bias to float 3523 %s32 = fpext half %s to float 3524 %t32 = fpext half %t to float 3525 %clamp32 = fpext half %clamp to float 3526 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3527 store <4 x float> %res, ptr addrspace(1) %out 3528 ret void 3529} 3530 3531define amdgpu_kernel void @image_sample_a16_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { 3532; CHECK-LABEL: @image_sample_a16_b32_cl_2d( 3533; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3534; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float 3535; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float 3536; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3537; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3538; CHECK-NEXT: ret void 3539; 3540 %s32 = fpext half %s to float 3541 %t32 = fpext half %t to float 3542 %clamp32 = fpext half %clamp to float 3543 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3544 store <4 x float> %res, ptr addrspace(1) %out 3545 ret void 3546} 3547 3548define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { 3549; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d( 3550; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3551; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3552; CHECK-NEXT: ret void 3553; 3554 %bias32 = fpext half %bias to float 3555 %s32 = fpext half %s to float 3556 %clamp32 = fpext half %clamp to float 3557 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3558 store <4 x float> %res, ptr addrspace(1) %out 3559 ret void 3560} 3561 3562define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { 3563; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d( 3564; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3565; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float 3566; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3567; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3568; CHECK-NEXT: ret void 3569; 3570 %s32 = fpext half %s to float 3571 %clamp32 = fpext half %clamp to float 3572 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3573 store <4 x float> %res, ptr addrspace(1) %out 3574 ret void 3575} 3576 3577define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { 3578; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d( 3579; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3580; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3581; CHECK-NEXT: ret void 3582; 3583 %bias32 = fpext half %bias to float 3584 %s32 = fpext half %s to float 3585 %t32 = fpext half %t to float 3586 %clamp32 = fpext half %clamp to float 3587 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3588 store <4 x float> %res, ptr addrspace(1) %out 3589 ret void 3590} 3591 3592define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { 3593; CHECK-LABEL: @image_sample_a16_c_b32_cl_2d( 3594; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 3595; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float 3596; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float 3597; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3598; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3599; CHECK-NEXT: ret void 3600; 3601 %s32 = fpext half %s to float 3602 %t32 = fpext half %t to float 3603 %clamp32 = fpext half %clamp to float 3604 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3605 store <4 x float> %res, ptr addrspace(1) %out 3606 ret void 3607} 3608 3609define amdgpu_kernel void @image_sample_a16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 3610; CHECK-LABEL: @image_sample_a16_d_1d( 3611; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3612; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3613; CHECK-NEXT: ret void 3614; 3615 %dsdh32 = fpext half %dsdh to float 3616 %dsdv32 = fpext half %dsdv to float 3617 %s32 = fpext half %s to float 3618 %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3619 store <4 x float> %res, ptr addrspace(1) %out 3620 ret void 3621} 3622 3623define amdgpu_kernel void @image_sample_a16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 3624; CHECK-LABEL: @image_sample_a16_d_2d( 3625; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3626; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3627; CHECK-NEXT: ret void 3628; 3629 %dsdh32 = fpext half %dsdh to float 3630 %dtdh32 = fpext half %dtdh to float 3631 %dsdv32 = fpext half %dsdv to float 3632 %dtdv32 = fpext half %dtdv to float 3633 %s32 = fpext half %s to float 3634 %t32 = fpext half %t to float 3635 %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3636 store <4 x float> %res, ptr addrspace(1) %out 3637 ret void 3638} 3639 3640define amdgpu_kernel void @image_sample_a16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 3641; CHECK-LABEL: @image_sample_a16_d_3d( 3642; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3643; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3644; CHECK-NEXT: ret void 3645; 3646 %dsdh32 = fpext half %dsdh to float 3647 %dtdh32 = fpext half %dtdh to float 3648 %drdh32 = fpext half %drdh to float 3649 %dsdv32 = fpext half %dsdv to float 3650 %dtdv32 = fpext half %dtdv to float 3651 %drdv32 = fpext half %drdv to float 3652 %s32 = fpext half %s to float 3653 %t32 = fpext half %t to float 3654 %r32 = fpext half %r to float 3655 %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3656 store <4 x float> %res, ptr addrspace(1) %out 3657 ret void 3658} 3659 3660define amdgpu_kernel void @image_sample_a16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 3661; CHECK-LABEL: @image_sample_a16_c_d_1d( 3662; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3663; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3664; CHECK-NEXT: ret void 3665; 3666 %dsdh32 = fpext half %dsdh to float 3667 %dsdv32 = fpext half %dsdv to float 3668 %s32 = fpext half %s to float 3669 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3670 store <4 x float> %res, ptr addrspace(1) %out 3671 ret void 3672} 3673 3674define amdgpu_kernel void @image_sample_a16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 3675; CHECK-LABEL: @image_sample_a16_c_d_2d( 3676; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3677; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3678; CHECK-NEXT: ret void 3679; 3680 %dsdh32 = fpext half %dsdh to float 3681 %dtdh32 = fpext half %dtdh to float 3682 %dsdv32 = fpext half %dsdv to float 3683 %dtdv32 = fpext half %dtdv to float 3684 %s32 = fpext half %s to float 3685 %t32 = fpext half %t to float 3686 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3687 store <4 x float> %res, ptr addrspace(1) %out 3688 ret void 3689} 3690 3691define amdgpu_kernel void @image_sample_a16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 3692; CHECK-LABEL: @image_sample_a16_d_cl_1d( 3693; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3694; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3695; CHECK-NEXT: ret void 3696; 3697 %dsdh32 = fpext half %dsdh to float 3698 %dsdv32 = fpext half %dsdv to float 3699 %s32 = fpext half %s to float 3700 %clamp32 = fpext half %clamp to float 3701 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3702 store <4 x float> %res, ptr addrspace(1) %out 3703 ret void 3704} 3705 3706define amdgpu_kernel void @image_sample_a16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 3707; CHECK-LABEL: @image_sample_a16_d_cl_2d( 3708; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3709; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3710; CHECK-NEXT: ret void 3711; 3712 %dsdh32 = fpext half %dsdh to float 3713 %dtdh32 = fpext half %dtdh to float 3714 %dsdv32 = fpext half %dsdv to float 3715 %dtdv32 = fpext half %dtdv to float 3716 %s32 = fpext half %s to float 3717 %t32 = fpext half %t to float 3718 %clamp32 = fpext half %clamp to float 3719 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3720 store <4 x float> %res, ptr addrspace(1) %out 3721 ret void 3722} 3723 3724define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 3725; CHECK-LABEL: @image_sample_a16_c_d_cl_1d( 3726; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3727; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3728; CHECK-NEXT: ret void 3729; 3730 %dsdh32 = fpext half %dsdh to float 3731 %dsdv32 = fpext half %dsdv to float 3732 %s32 = fpext half %s to float 3733 %clamp32 = fpext half %clamp to float 3734 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3735 store <4 x float> %res, ptr addrspace(1) %out 3736 ret void 3737} 3738 3739define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 3740; CHECK-LABEL: @image_sample_a16_c_d_cl_2d( 3741; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3742; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3743; CHECK-NEXT: ret void 3744; 3745 %dsdh32 = fpext half %dsdh to float 3746 %dtdh32 = fpext half %dtdh to float 3747 %dsdv32 = fpext half %dsdv to float 3748 %dtdv32 = fpext half %dtdv to float 3749 %s32 = fpext half %s to float 3750 %t32 = fpext half %t to float 3751 %clamp32 = fpext half %clamp to float 3752 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3753 store <4 x float> %res, ptr addrspace(1) %out 3754 ret void 3755} 3756 3757define amdgpu_kernel void @image_sample_a16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 3758; CHECK-LABEL: @image_sample_a16_cd_1d( 3759; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3760; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3761; CHECK-NEXT: ret void 3762; 3763 %dsdh32 = fpext half %dsdh to float 3764 %dsdv32 = fpext half %dsdv to float 3765 %s32 = fpext half %s to float 3766 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3767 store <4 x float> %res, ptr addrspace(1) %out 3768 ret void 3769} 3770 3771define amdgpu_kernel void @image_sample_a16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 3772; CHECK-LABEL: @image_sample_a16_cd_2d( 3773; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3774; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3775; CHECK-NEXT: ret void 3776; 3777 %dsdh32 = fpext half %dsdh to float 3778 %dtdh32 = fpext half %dtdh to float 3779 %dsdv32 = fpext half %dsdv to float 3780 %dtdv32 = fpext half %dtdv to float 3781 %s32 = fpext half %s to float 3782 %t32 = fpext half %t to float 3783 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3784 store <4 x float> %res, ptr addrspace(1) %out 3785 ret void 3786} 3787 3788define amdgpu_kernel void @image_sample_a16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 3789; CHECK-LABEL: @image_sample_a16_c_cd_1d( 3790; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3791; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3792; CHECK-NEXT: ret void 3793; 3794 %dsdh32 = fpext half %dsdh to float 3795 %dsdv32 = fpext half %dsdv to float 3796 %s32 = fpext half %s to float 3797 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3798 store <4 x float> %res, ptr addrspace(1) %out 3799 ret void 3800} 3801 3802define amdgpu_kernel void @image_sample_a16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 3803; CHECK-LABEL: @image_sample_a16_c_cd_2d( 3804; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3805; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3806; CHECK-NEXT: ret void 3807; 3808 %dsdh32 = fpext half %dsdh to float 3809 %dtdh32 = fpext half %dtdh to float 3810 %dsdv32 = fpext half %dsdv to float 3811 %dtdv32 = fpext half %dtdv to float 3812 %s32 = fpext half %s to float 3813 %t32 = fpext half %t to float 3814 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3815 store <4 x float> %res, ptr addrspace(1) %out 3816 ret void 3817} 3818 3819define amdgpu_kernel void @image_sample_a16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 3820; CHECK-LABEL: @image_sample_a16_cd_cl_1d( 3821; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3822; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3823; CHECK-NEXT: ret void 3824; 3825 %dsdh32 = fpext half %dsdh to float 3826 %dsdv32 = fpext half %dsdv to float 3827 %s32 = fpext half %s to float 3828 %clamp32 = fpext half %clamp to float 3829 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3830 store <4 x float> %res, ptr addrspace(1) %out 3831 ret void 3832} 3833 3834define amdgpu_kernel void @image_sample_a16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 3835; CHECK-LABEL: @image_sample_a16_cd_cl_2d( 3836; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3837; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3838; CHECK-NEXT: ret void 3839; 3840 %dsdh32 = fpext half %dsdh to float 3841 %dtdh32 = fpext half %dtdh to float 3842 %dsdv32 = fpext half %dsdv to float 3843 %dtdv32 = fpext half %dtdv to float 3844 %s32 = fpext half %s to float 3845 %t32 = fpext half %t to float 3846 %clamp32 = fpext half %clamp to float 3847 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3848 store <4 x float> %res, ptr addrspace(1) %out 3849 ret void 3850} 3851 3852define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 3853; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d( 3854; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3855; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3856; CHECK-NEXT: ret void 3857; 3858 %dsdh32 = fpext half %dsdh to float 3859 %dsdv32 = fpext half %dsdv to float 3860 %s32 = fpext half %s to float 3861 %clamp32 = fpext half %clamp to float 3862 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3863 store <4 x float> %res, ptr addrspace(1) %out 3864 ret void 3865} 3866 3867define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 3868; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d( 3869; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3870; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3871; CHECK-NEXT: ret void 3872; 3873 %dsdh32 = fpext half %dsdh to float 3874 %dtdh32 = fpext half %dtdh to float 3875 %dsdv32 = fpext half %dsdv to float 3876 %dtdv32 = fpext half %dtdv to float 3877 %s32 = fpext half %s to float 3878 %t32 = fpext half %t to float 3879 %clamp32 = fpext half %clamp to float 3880 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3881 store <4 x float> %res, ptr addrspace(1) %out 3882 ret void 3883} 3884 3885define amdgpu_kernel void @image_sample_a16_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { 3886; CHECK-LABEL: @image_sample_a16_l_1d( 3887; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3888; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3889; CHECK-NEXT: ret void 3890; 3891 %s32 = fpext half %s to float 3892 %lod32 = fpext half %lod to float 3893 %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3894 store <4 x float> %res, ptr addrspace(1) %out 3895 ret void 3896} 3897 3898define amdgpu_kernel void @image_sample_a16_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 3899; CHECK-LABEL: @image_sample_a16_l_2d( 3900; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3901; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3902; CHECK-NEXT: ret void 3903; 3904 %s32 = fpext half %s to float 3905 %t32 = fpext half %t to float 3906 %lod32 = fpext half %lod to float 3907 %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3908 store <4 x float> %res, ptr addrspace(1) %out 3909 ret void 3910} 3911 3912define amdgpu_kernel void @image_sample_a16_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { 3913; CHECK-LABEL: @image_sample_a16_c_l_1d( 3914; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3915; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3916; CHECK-NEXT: ret void 3917; 3918 %s32 = fpext half %s to float 3919 %lod32 = fpext half %lod to float 3920 %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3921 store <4 x float> %res, ptr addrspace(1) %out 3922 ret void 3923} 3924 3925define amdgpu_kernel void @image_sample_a16_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 3926; CHECK-LABEL: @image_sample_a16_c_l_2d( 3927; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3928; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3929; CHECK-NEXT: ret void 3930; 3931 %s32 = fpext half %s to float 3932 %t32 = fpext half %t to float 3933 %lod32 = fpext half %lod to float 3934 %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3935 store <4 x float> %res, ptr addrspace(1) %out 3936 ret void 3937} 3938 3939define amdgpu_kernel void @image_sample_a16_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 3940; CHECK-LABEL: @image_sample_a16_lz_1d( 3941; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3942; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3943; CHECK-NEXT: ret void 3944; 3945 %s32 = fpext half %s to float 3946 %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3947 store <4 x float> %res, ptr addrspace(1) %out 3948 ret void 3949} 3950 3951define amdgpu_kernel void @image_sample_a16_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 3952; CHECK-LABEL: @image_sample_a16_lz_2d( 3953; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3954; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3955; CHECK-NEXT: ret void 3956; 3957 %s32 = fpext half %s to float 3958 %t32 = fpext half %t to float 3959 %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3960 store <4 x float> %res, ptr addrspace(1) %out 3961 ret void 3962} 3963 3964define amdgpu_kernel void @image_sample_a16_c_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { 3965; CHECK-LABEL: @image_sample_a16_c_lz_1d( 3966; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3967; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3968; CHECK-NEXT: ret void 3969; 3970 %s32 = fpext half %s to float 3971 %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3972 store <4 x float> %res, ptr addrspace(1) %out 3973 ret void 3974} 3975 3976define amdgpu_kernel void @image_sample_a16_c_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 3977; CHECK-LABEL: @image_sample_a16_c_lz_2d( 3978; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3979; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 3980; CHECK-NEXT: ret void 3981; 3982 %s32 = fpext half %s to float 3983 %t32 = fpext half %t to float 3984 %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 3985 store <4 x float> %res, ptr addrspace(1) %out 3986 ret void 3987} 3988 3989define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 3990; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1( 3991; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16.v8i32.v4i32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 3992; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 3993; CHECK-NEXT: ret void 3994; 3995 %dsdh32 = fpext half %dsdh to float 3996 %dtdh32 = fpext half %dtdh to float 3997 %dsdv32 = fpext half %dsdv to float 3998 %dtdv32 = fpext half %dtdv to float 3999 %s32 = fpext half %s to float 4000 %t32 = fpext half %t to float 4001 %slice32 = fpext half %slice to float 4002 %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4003 store float %res, ptr addrspace(1) %out 4004 ret void 4005} 4006 4007define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { 4008; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2( 4009; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4010; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 4011; CHECK-NEXT: ret void 4012; 4013 %dsdh32 = fpext half %dsdh to float 4014 %dtdh32 = fpext half %dtdh to float 4015 %dsdv32 = fpext half %dsdv to float 4016 %dtdv32 = fpext half %dtdv to float 4017 %s32 = fpext half %s to float 4018 %t32 = fpext half %t to float 4019 %slice32 = fpext half %slice to float 4020 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4021 store <2 x float> %res, ptr addrspace(1) %out 4022 ret void 4023} 4024 4025define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) { 4026; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const( 4027; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4028; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 4029; CHECK-NEXT: ret void 4030; 4031 %dsdh32 = fpext half %dsdh to float 4032 %dtdh32 = fpext half %dtdh to float 4033 %dsdv32 = fpext half %dsdv to float 4034 %dtdv32 = fpext half %dtdv to float 4035 %s32 = fpext half %s to float 4036 %slice32 = fpext half %slice to float 4037 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4038 store <2 x float> %res, ptr addrspace(1) %out 4039 ret void 4040} 4041 4042define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) { 4043; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt( 4044; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float 4045; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float 4046; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4047; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 4048; CHECK-NEXT: ret void 4049; 4050 %dsdh32 = fpext half %dsdh to float 4051 %dtdh32 = fpext half %dtdh to float 4052 %dsdv32 = fpext half %dsdv to float 4053 %dtdv32 = fpext half %dtdv to float 4054 %s32 = fpext half %s to float 4055 %slice32 = fpext half %slice to float 4056 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4057 store <2 x float> %res, ptr addrspace(1) %out 4058 ret void 4059} 4060 4061define amdgpu_kernel void @image_load_a16_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { 4062; CHECK-LABEL: @image_load_a16_mip_1d( 4063; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4064; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4065; CHECK-NEXT: ret void 4066; 4067 %s32 = zext i16 %s to i32 4068 %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4069 store <4 x float> %res, ptr addrspace(1) %out 4070 ret void 4071} 4072 4073define amdgpu_kernel void @image_load_a16_mip_1d_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { 4074; CHECK-LABEL: @image_load_a16_mip_1d_noopt( 4075; CHECK-NEXT: [[S32:%.*]] = sext i16 [[S:%.*]] to i32 4076; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4077; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4078; CHECK-NEXT: ret void 4079; 4080 %s32 = sext i16 %s to i32 4081 %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4082 store <4 x float> %res, ptr addrspace(1) %out 4083 ret void 4084} 4085 4086define amdgpu_kernel void @image_load_a16_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) { 4087; CHECK-LABEL: @image_load_a16_mip_2d( 4088; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4089; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4090; CHECK-NEXT: ret void 4091; 4092 %s32 = zext i16 %s to i32 4093 %t32 = zext i16 %t to i32 4094 %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4095 store <4 x float> %res, ptr addrspace(1) %out 4096 ret void 4097} 4098 4099define amdgpu_kernel void @image_load_a16_mip_2d_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { 4100; CHECK-LABEL: @image_load_a16_mip_2d_const( 4101; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4102; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4103; CHECK-NEXT: ret void 4104; 4105 %s32 = zext i16 %s to i32 4106 %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4107 store <4 x float> %res, ptr addrspace(1) %out 4108 ret void 4109} 4110 4111define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { 4112; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt( 4113; CHECK-NEXT: [[S32:%.*]] = zext i16 [[S:%.*]] to i32 4114; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4115; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4116; CHECK-NEXT: ret void 4117; 4118 %s32 = zext i16 %s to i32 4119 %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4120 store <4 x float> %res, ptr addrspace(1) %out 4121 ret void 4122} 4123 4124; -------------------------------------------------------------------- 4125; llvm.amdgcn.image.sample g16 4126; -------------------------------------------------------------------- 4127 4128define amdgpu_kernel void @image_sample_g16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 4129; CHECK-LABEL: @image_sample_g16_d_1d( 4130; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4131; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4132; CHECK-NEXT: ret void 4133; 4134 %dsdh32 = fpext half %dsdh to float 4135 %dsdv32 = fpext half %dsdv to float 4136 %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4137 store <4 x float> %res, ptr addrspace(1) %out 4138 ret void 4139} 4140 4141define amdgpu_kernel void @image_sample_g16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 4142; CHECK-LABEL: @image_sample_g16_d_2d( 4143; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4144; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4145; CHECK-NEXT: ret void 4146; 4147 %dsdh32 = fpext half %dsdh to float 4148 %dtdh32 = fpext half %dtdh to float 4149 %dsdv32 = fpext half %dsdv to float 4150 %dtdv32 = fpext half %dtdv to float 4151 %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4152 store <4 x float> %res, ptr addrspace(1) %out 4153 ret void 4154} 4155 4156define amdgpu_kernel void @image_sample_g16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 4157; CHECK-LABEL: @image_sample_g16_d_3d( 4158; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4159; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4160; CHECK-NEXT: ret void 4161; 4162 %dsdh32 = fpext half %dsdh to float 4163 %dtdh32 = fpext half %dtdh to float 4164 %drdh32 = fpext half %drdh to float 4165 %dsdv32 = fpext half %dsdv to float 4166 %dtdv32 = fpext half %dtdv to float 4167 %drdv32 = fpext half %drdv to float 4168 %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4169 store <4 x float> %res, ptr addrspace(1) %out 4170 ret void 4171} 4172 4173define amdgpu_kernel void @image_sample_g16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 4174; CHECK-LABEL: @image_sample_g16_c_d_1d( 4175; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4176; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4177; CHECK-NEXT: ret void 4178; 4179 %dsdh32 = fpext half %dsdh to float 4180 %dsdv32 = fpext half %dsdv to float 4181 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4182 store <4 x float> %res, ptr addrspace(1) %out 4183 ret void 4184} 4185 4186define amdgpu_kernel void @image_sample_g16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 4187; CHECK-LABEL: @image_sample_g16_c_d_2d( 4188; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4189; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4190; CHECK-NEXT: ret void 4191; 4192 %dsdh32 = fpext half %dsdh to float 4193 %dtdh32 = fpext half %dtdh to float 4194 %dsdv32 = fpext half %dsdv to float 4195 %dtdv32 = fpext half %dtdv to float 4196 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4197 store <4 x float> %res, ptr addrspace(1) %out 4198 ret void 4199} 4200 4201define amdgpu_kernel void @image_sample_g16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 4202; CHECK-LABEL: @image_sample_g16_d_cl_1d( 4203; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4204; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4205; CHECK-NEXT: ret void 4206; 4207 %dsdh32 = fpext half %dsdh to float 4208 %dsdv32 = fpext half %dsdv to float 4209 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4210 store <4 x float> %res, ptr addrspace(1) %out 4211 ret void 4212} 4213 4214define amdgpu_kernel void @image_sample_g16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 4215; CHECK-LABEL: @image_sample_g16_d_cl_2d( 4216; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4217; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4218; CHECK-NEXT: ret void 4219; 4220 %dsdh32 = fpext half %dsdh to float 4221 %dtdh32 = fpext half %dtdh to float 4222 %dsdv32 = fpext half %dsdv to float 4223 %dtdv32 = fpext half %dtdv to float 4224 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4225 store <4 x float> %res, ptr addrspace(1) %out 4226 ret void 4227} 4228 4229define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 4230; CHECK-LABEL: @image_sample_g16_c_d_cl_1d( 4231; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4232; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4233; CHECK-NEXT: ret void 4234; 4235 %dsdh32 = fpext half %dsdh to float 4236 %dsdv32 = fpext half %dsdv to float 4237 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4238 store <4 x float> %res, ptr addrspace(1) %out 4239 ret void 4240} 4241 4242define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 4243; CHECK-LABEL: @image_sample_g16_c_d_cl_2d( 4244; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4245; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4246; CHECK-NEXT: ret void 4247; 4248 %dsdh32 = fpext half %dsdh to float 4249 %dtdh32 = fpext half %dtdh to float 4250 %dsdv32 = fpext half %dsdv to float 4251 %dtdv32 = fpext half %dtdv to float 4252 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4253 store <4 x float> %res, ptr addrspace(1) %out 4254 ret void 4255} 4256 4257define amdgpu_kernel void @image_sample_g16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 4258; CHECK-LABEL: @image_sample_g16_cd_1d( 4259; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4260; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4261; CHECK-NEXT: ret void 4262; 4263 %dsdh32 = fpext half %dsdh to float 4264 %dsdv32 = fpext half %dsdv to float 4265 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4266 store <4 x float> %res, ptr addrspace(1) %out 4267 ret void 4268} 4269 4270define amdgpu_kernel void @image_sample_g16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 4271; CHECK-LABEL: @image_sample_g16_cd_2d( 4272; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4273; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4274; CHECK-NEXT: ret void 4275; 4276 %dsdh32 = fpext half %dsdh to float 4277 %dtdh32 = fpext half %dtdh to float 4278 %dsdv32 = fpext half %dsdv to float 4279 %dtdv32 = fpext half %dtdv to float 4280 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4281 store <4 x float> %res, ptr addrspace(1) %out 4282 ret void 4283} 4284 4285define amdgpu_kernel void @image_sample_g16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 4286; CHECK-LABEL: @image_sample_g16_c_cd_1d( 4287; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4288; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4289; CHECK-NEXT: ret void 4290; 4291 %dsdh32 = fpext half %dsdh to float 4292 %dsdv32 = fpext half %dsdv to float 4293 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4294 store <4 x float> %res, ptr addrspace(1) %out 4295 ret void 4296} 4297 4298define amdgpu_kernel void @image_sample_g16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 4299; CHECK-LABEL: @image_sample_g16_c_cd_2d( 4300; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4301; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4302; CHECK-NEXT: ret void 4303; 4304 %dsdh32 = fpext half %dsdh to float 4305 %dtdh32 = fpext half %dtdh to float 4306 %dsdv32 = fpext half %dsdv to float 4307 %dtdv32 = fpext half %dtdv to float 4308 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4309 store <4 x float> %res, ptr addrspace(1) %out 4310 ret void 4311} 4312 4313define amdgpu_kernel void @image_sample_g16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 4314; CHECK-LABEL: @image_sample_g16_cd_cl_1d( 4315; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4316; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4317; CHECK-NEXT: ret void 4318; 4319 %dsdh32 = fpext half %dsdh to float 4320 %dsdv32 = fpext half %dsdv to float 4321 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4322 store <4 x float> %res, ptr addrspace(1) %out 4323 ret void 4324} 4325 4326define amdgpu_kernel void @image_sample_g16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 4327; CHECK-LABEL: @image_sample_g16_cd_cl_2d( 4328; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4329; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4330; CHECK-NEXT: ret void 4331; 4332 %dsdh32 = fpext half %dsdh to float 4333 %dtdh32 = fpext half %dtdh to float 4334 %dsdv32 = fpext half %dsdv to float 4335 %dtdv32 = fpext half %dtdv to float 4336 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4337 store <4 x float> %res, ptr addrspace(1) %out 4338 ret void 4339} 4340 4341define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 4342; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d( 4343; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4344; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4345; CHECK-NEXT: ret void 4346; 4347 %dsdh32 = fpext half %dsdh to float 4348 %dsdv32 = fpext half %dsdv to float 4349 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4350 store <4 x float> %res, ptr addrspace(1) %out 4351 ret void 4352} 4353 4354define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 4355; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d( 4356; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4357; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4358; CHECK-NEXT: ret void 4359; 4360 %dsdh32 = fpext half %dsdh to float 4361 %dtdh32 = fpext half %dtdh to float 4362 %dsdv32 = fpext half %dsdv to float 4363 %dtdv32 = fpext half %dtdv to float 4364 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4365 store <4 x float> %res, ptr addrspace(1) %out 4366 ret void 4367} 4368 4369define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 4370; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1( 4371; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32.v8i32.v4i32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4372; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 4373; CHECK-NEXT: ret void 4374; 4375 %dsdh32 = fpext half %dsdh to float 4376 %dtdh32 = fpext half %dtdh to float 4377 %dsdv32 = fpext half %dsdv to float 4378 %dtdv32 = fpext half %dtdv to float 4379 %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4380 store float %res, ptr addrspace(1) %out 4381 ret void 4382} 4383 4384define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 4385; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2( 4386; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4387; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 4388; CHECK-NEXT: ret void 4389; 4390 %dsdh32 = fpext half %dsdh to float 4391 %dtdh32 = fpext half %dtdh to float 4392 %dsdv32 = fpext half %dsdv to float 4393 %dtdv32 = fpext half %dtdv to float 4394 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4395 store <2 x float> %res, ptr addrspace(1) %out 4396 ret void 4397} 4398 4399; -------------------------------------------------------------------- 4400; llvm.amdgcn.image.sample a16 preserve fast-math flags 4401; -------------------------------------------------------------------- 4402 4403define amdgpu_kernel void @image_sample_a16_1d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 4404; CHECK-LABEL: @image_sample_a16_1d_nnan( 4405; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4406; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4407; CHECK-NEXT: ret void 4408; 4409 %s32 = fpext half %s to float 4410 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4411 store <4 x float> %res, ptr addrspace(1) %out 4412 ret void 4413} 4414 4415define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 4416; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz( 4417; CHECK-NEXT: [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4418; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4419; CHECK-NEXT: ret void 4420; 4421 %s32 = fpext half %s to float 4422 %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4423 store <4 x float> %res, ptr addrspace(1) %out 4424 ret void 4425} 4426 4427define amdgpu_kernel void @image_sample_a16_1d_fast(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 4428; CHECK-LABEL: @image_sample_a16_1d_fast( 4429; CHECK-NEXT: [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4430; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4431; CHECK-NEXT: ret void 4432; 4433 %s32 = fpext half %s to float 4434 %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4435 store <4 x float> %res, ptr addrspace(1) %out 4436 ret void 4437} 4438 4439define amdgpu_kernel void @image_sample_a16_2d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 4440; CHECK-LABEL: @image_sample_a16_2d_nnan( 4441; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4442; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4443; CHECK-NEXT: ret void 4444; 4445 %s32 = fpext half %s to float 4446 %t32 = fpext half %t to float 4447 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4448 store <4 x float> %res, ptr addrspace(1) %out 4449 ret void 4450} 4451 4452define amdgpu_kernel void @image_sample_a16_3d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { 4453; CHECK-LABEL: @image_sample_a16_3d_nnan( 4454; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4455; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4456; CHECK-NEXT: ret void 4457; 4458 %s32 = fpext half %s to float 4459 %t32 = fpext half %t to float 4460 %r32 = fpext half %r to float 4461 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4462 store <4 x float> %res, ptr addrspace(1) %out 4463 ret void 4464} 4465 4466define amdgpu_kernel void @image_sample_a16_cube_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 4467; 4468; CHECK-LABEL: @image_sample_a16_cube_nnan( 4469; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4470; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4471; CHECK-NEXT: ret void 4472; 4473 %s32 = fpext half %s to float 4474 %t32 = fpext half %t to float 4475 %face32 = fpext half %face to float 4476 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4477 store <4 x float> %res, ptr addrspace(1) %out 4478 ret void 4479} 4480 4481define amdgpu_kernel void @image_sample_a16_1darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { 4482; CHECK-LABEL: @image_sample_a16_1darray_nnan( 4483; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4484; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4485; CHECK-NEXT: ret void 4486; 4487 %s32 = fpext half %s to float 4488 %slice32 = fpext half %slice to float 4489 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4490 store <4 x float> %res, ptr addrspace(1) %out 4491 ret void 4492} 4493 4494define amdgpu_kernel void @image_sample_a16_2darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 4495; CHECK-LABEL: @image_sample_a16_2darray_nnan( 4496; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4497; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 4498; CHECK-NEXT: ret void 4499; 4500 %s32 = fpext half %s to float 4501 %t32 = fpext half %t to float 4502 %slice32 = fpext half %slice to float 4503 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4504 store <4 x float> %res, ptr addrspace(1) %out 4505 ret void 4506} 4507 4508; -------------------------------------------------------------------- 4509; llvm.amdgcn.image.sample l to lz 4510; -------------------------------------------------------------------- 4511 4512declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4513declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4514declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4515declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4516 4517declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4518declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4519declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4520declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4521declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4522 4523define amdgpu_kernel void @sample_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { 4524; CHECK-LABEL: @sample_l_1d( 4525; CHECK-NEXT: main_body: 4526; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4527; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4528; CHECK-NEXT: ret void 4529; 4530main_body: 4531 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4532 store <4 x float> %v, ptr addrspace(1) %out 4533 ret void 4534} 4535 4536define amdgpu_kernel void @sample_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 4537; CHECK-LABEL: @sample_l_2d( 4538; CHECK-NEXT: main_body: 4539; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4540; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4541; CHECK-NEXT: ret void 4542; 4543main_body: 4544 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4545 store <4 x float> %v, ptr addrspace(1) %out 4546 ret void 4547} 4548 4549define amdgpu_kernel void @sample_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { 4550; CHECK-LABEL: @sample_c_l_1d( 4551; CHECK-NEXT: main_body: 4552; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4553; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4554; CHECK-NEXT: ret void 4555; 4556main_body: 4557 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4558 store <4 x float> %v, ptr addrspace(1) %out 4559 ret void 4560} 4561 4562define amdgpu_kernel void @sample_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 4563; CHECK-LABEL: @sample_c_l_2d( 4564; CHECK-NEXT: main_body: 4565; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4566; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4567; CHECK-NEXT: ret void 4568; 4569main_body: 4570 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4571 store <4 x float> %v, ptr addrspace(1) %out 4572 ret void 4573} 4574 4575define amdgpu_kernel void @sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { 4576; CHECK-LABEL: @sample_l_o_1d( 4577; CHECK-NEXT: main_body: 4578; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4579; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4580; CHECK-NEXT: ret void 4581; 4582main_body: 4583 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4584 store <4 x float> %v, ptr addrspace(1) %out 4585 ret void 4586} 4587 4588define amdgpu_kernel void @sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { 4589; CHECK-LABEL: @sample_l_o_2d( 4590; CHECK-NEXT: main_body: 4591; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4592; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4593; CHECK-NEXT: ret void 4594; 4595main_body: 4596 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4597 store <4 x float> %v, ptr addrspace(1) %out 4598 ret void 4599} 4600 4601define amdgpu_kernel void @sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { 4602; CHECK-LABEL: @sample_c_l_o_1d( 4603; CHECK-NEXT: main_body: 4604; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4605; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4606; CHECK-NEXT: ret void 4607; 4608main_body: 4609 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4610 store <4 x float> %v, ptr addrspace(1) %out 4611 ret void 4612} 4613 4614define amdgpu_kernel void @sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { 4615; CHECK-LABEL: @sample_c_l_o_2d( 4616; CHECK-NEXT: main_body: 4617; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4618; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4619; CHECK-NEXT: ret void 4620; 4621main_body: 4622 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4623 store <4 x float> %v, ptr addrspace(1) %out 4624 ret void 4625} 4626 4627define amdgpu_kernel void @gather4_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 4628; CHECK-LABEL: @gather4_l_2d( 4629; CHECK-NEXT: main_body: 4630; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4631; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4632; CHECK-NEXT: ret void 4633; 4634main_body: 4635 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4636 store <4 x float> %v, ptr addrspace(1) %out 4637 ret void 4638} 4639 4640define amdgpu_kernel void @gather4_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 4641; CHECK-LABEL: @gather4_c_l_2d( 4642; CHECK-NEXT: main_body: 4643; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4644; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4645; CHECK-NEXT: ret void 4646; 4647main_body: 4648 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32.v8i32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4649 store <4 x float> %v, ptr addrspace(1) %out 4650 ret void 4651} 4652 4653define amdgpu_kernel void @gather4_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { 4654; CHECK-LABEL: @gather4_l_o_2d( 4655; CHECK-NEXT: main_body: 4656; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4657; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4658; CHECK-NEXT: ret void 4659; 4660main_body: 4661 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4662 store <4 x float> %v, ptr addrspace(1) %out 4663 ret void 4664} 4665 4666define amdgpu_kernel void @gather4_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { 4667; CHECK-LABEL: @gather4_c_l_o_2d( 4668; CHECK-NEXT: main_body: 4669; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4670; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4671; CHECK-NEXT: ret void 4672; 4673main_body: 4674 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4675 store <4 x float> %v, ptr addrspace(1) %out 4676 ret void 4677} 4678 4679define amdgpu_kernel void @gather4_c_l_o_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) { 4680; CHECK-LABEL: @gather4_c_l_o_2darray( 4681; CHECK-NEXT: main_body: 4682; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4683; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4684; CHECK-NEXT: ret void 4685; 4686main_body: 4687 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32.v8i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4688 store <4 x float> %v, ptr addrspace(1) %out 4689 ret void 4690} 4691 4692; -------------------------------------------------------------------- 4693; llvm.amdgcn.image.sample mipmap zero 4694; -------------------------------------------------------------------- 4695 4696define amdgpu_kernel void @load_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s) { 4697; CHECK-LABEL: @load_mip_1d( 4698; CHECK-NEXT: main_body: 4699; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4700; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4701; CHECK-NEXT: ret void 4702; 4703main_body: 4704 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4705 store <4 x float> %v, ptr addrspace(1) %out 4706 ret void 4707} 4708 4709define amdgpu_kernel void @load_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { 4710; CHECK-LABEL: @load_mip_2d( 4711; CHECK-NEXT: main_body: 4712; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4713; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4714; CHECK-NEXT: ret void 4715; 4716main_body: 4717 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4718 store <4 x float> %v, ptr addrspace(1) %out 4719 ret void 4720} 4721 4722define amdgpu_kernel void @load_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { 4723; CHECK-LABEL: @load_mip_3d( 4724; CHECK-NEXT: main_body: 4725; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4726; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4727; CHECK-NEXT: ret void 4728; 4729main_body: 4730 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4731 store <4 x float> %v, ptr addrspace(1) %out 4732 ret void 4733} 4734 4735define amdgpu_kernel void @load_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { 4736; CHECK-LABEL: @load_mip_1darray( 4737; CHECK-NEXT: main_body: 4738; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4739; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4740; CHECK-NEXT: ret void 4741; 4742main_body: 4743 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4744 store <4 x float> %v, ptr addrspace(1) %out 4745 ret void 4746} 4747 4748define amdgpu_kernel void @load_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { 4749; CHECK-LABEL: @load_mip_2darray( 4750; CHECK-NEXT: main_body: 4751; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4752; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4753; CHECK-NEXT: ret void 4754; 4755main_body: 4756 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4757 store <4 x float> %v, ptr addrspace(1) %out 4758 ret void 4759} 4760 4761define amdgpu_kernel void @load_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { 4762; CHECK-LABEL: @load_mip_cube( 4763; CHECK-NEXT: main_body: 4764; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4765; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4766; CHECK-NEXT: ret void 4767; 4768main_body: 4769 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4770 store <4 x float> %v, ptr addrspace(1) %out 4771 ret void 4772} 4773 4774 4775define amdgpu_kernel void @store_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 4776; CHECK-LABEL: @store_mip_1d( 4777; CHECK-NEXT: main_body: 4778; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4779; CHECK-NEXT: ret void 4780; 4781main_body: 4782 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4783 ret void 4784} 4785 4786define amdgpu_kernel void @store_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { 4787; CHECK-LABEL: @store_mip_2d( 4788; CHECK-NEXT: main_body: 4789; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4790; CHECK-NEXT: ret void 4791; 4792main_body: 4793 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4794 ret void 4795} 4796 4797define amdgpu_kernel void @store_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { 4798; CHECK-LABEL: @store_mip_3d( 4799; CHECK-NEXT: main_body: 4800; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4801; CHECK-NEXT: ret void 4802; 4803main_body: 4804 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4805 ret void 4806} 4807 4808define amdgpu_kernel void @store_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { 4809; CHECK-LABEL: @store_mip_1darray( 4810; CHECK-NEXT: main_body: 4811; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4812; CHECK-NEXT: ret void 4813; 4814main_body: 4815 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4816 ret void 4817} 4818 4819define amdgpu_kernel void @store_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { 4820; CHECK-LABEL: @store_mip_2darray( 4821; CHECK-NEXT: main_body: 4822; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4823; CHECK-NEXT: ret void 4824; 4825main_body: 4826 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4827 ret void 4828} 4829 4830define amdgpu_kernel void @store_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { 4831; CHECK-LABEL: @store_mip_cube( 4832; CHECK-NEXT: main_body: 4833; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) 4834; CHECK-NEXT: ret void 4835; 4836main_body: 4837 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) 4838 ret void 4839} 4840 4841declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1 4842declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 4843declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 4844declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 4845declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 4846declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 4847 4848 4849declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 4850declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 4851declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 4852declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 4853declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 4854declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 4855 4856; -------------------------------------------------------------------- 4857; llvm.amdgcn.image.sample bias zero 4858; -------------------------------------------------------------------- 4859 4860declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4861declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4862declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4863declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4864declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4865declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4866declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4867declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4868declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16.v8i32.v4i32(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4869 4870declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4871declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4872declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4873declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 4874 4875define amdgpu_kernel void @sample_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 4876; CHECK-LABEL: @sample_b_1d( 4877; CHECK-NEXT: main_body: 4878; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4879; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4880; CHECK-NEXT: ret void 4881; 4882main_body: 4883 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4884 store <4 x float> %v, ptr addrspace(1) %out 4885 ret void 4886} 4887 4888define amdgpu_kernel void @sample_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 4889; CHECK-LABEL: @sample_b_2d( 4890; CHECK-NEXT: main_body: 4891; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4892; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4893; CHECK-NEXT: ret void 4894; 4895main_body: 4896 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4897 store <4 x float> %v, ptr addrspace(1) %out 4898 ret void 4899} 4900 4901define amdgpu_kernel void @sample_c_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 4902; CHECK-LABEL: @sample_c_b_1d( 4903; CHECK-NEXT: main_body: 4904; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4905; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4906; CHECK-NEXT: ret void 4907; 4908main_body: 4909 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4910 store <4 x float> %v, ptr addrspace(1) %out 4911 ret void 4912} 4913 4914define amdgpu_kernel void @sample_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 4915; CHECK-LABEL: @sample_c_b_2d( 4916; CHECK-NEXT: main_body: 4917; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4918; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4919; CHECK-NEXT: ret void 4920; 4921main_body: 4922 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.v8i32.v4i32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4923 store <4 x float> %v, ptr addrspace(1) %out 4924 ret void 4925} 4926 4927define amdgpu_kernel void @sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { 4928; CHECK-LABEL: @sample_b_o_1d( 4929; CHECK-NEXT: main_body: 4930; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4931; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4932; CHECK-NEXT: ret void 4933; 4934main_body: 4935 %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4936 store <4 x float> %v, ptr addrspace(1) %out 4937 ret void 4938} 4939 4940define amdgpu_kernel void @sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 4941; CHECK-LABEL: @sample_b_o_2d( 4942; CHECK-NEXT: main_body: 4943; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4944; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4945; CHECK-NEXT: ret void 4946; 4947main_body: 4948 %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4949 store <4 x float> %v, ptr addrspace(1) %out 4950 ret void 4951} 4952 4953define amdgpu_kernel void @sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { 4954; CHECK-LABEL: @sample_c_b_o_1d( 4955; CHECK-NEXT: main_body: 4956; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4957; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4958; CHECK-NEXT: ret void 4959; 4960main_body: 4961 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4962 store <4 x float> %v, ptr addrspace(1) %out 4963 ret void 4964} 4965 4966define amdgpu_kernel void @sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 4967; CHECK-LABEL: @sample_c_b_o_2d( 4968; CHECK-NEXT: main_body: 4969; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4970; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4971; CHECK-NEXT: ret void 4972; 4973main_body: 4974 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4975 store <4 x float> %v, ptr addrspace(1) %out 4976 ret void 4977} 4978 4979define amdgpu_kernel void @gather4_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 4980; CHECK-LABEL: @gather4_b_2d( 4981; CHECK-NEXT: main_body: 4982; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4983; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4984; CHECK-NEXT: ret void 4985; 4986main_body: 4987 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.v8i32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 4988 store <4 x float> %v, ptr addrspace(1) %out 4989 ret void 4990} 4991 4992define amdgpu_kernel void @gather4_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 4993; CHECK-LABEL: @gather4_c_b_2d( 4994; CHECK-NEXT: main_body: 4995; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 4996; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 4997; CHECK-NEXT: ret void 4998; 4999main_body: 5000 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.v8i32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5001 store <4 x float> %v, ptr addrspace(1) %out 5002 ret void 5003} 5004 5005define amdgpu_kernel void @gather4_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 5006; CHECK-LABEL: @gather4_b_o_2d( 5007; CHECK-NEXT: main_body: 5008; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5009; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5010; CHECK-NEXT: ret void 5011; 5012main_body: 5013 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5014 store <4 x float> %v, ptr addrspace(1) %out 5015 ret void 5016} 5017 5018define amdgpu_kernel void @gather4_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 5019; CHECK-LABEL: @gather4_c_b_o_2d( 5020; CHECK-NEXT: main_body: 5021; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5022; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5023; CHECK-NEXT: ret void 5024; 5025main_body: 5026 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5027 store <4 x float> %v, ptr addrspace(1) %out 5028 ret void 5029} 5030 5031define amdgpu_kernel void @sample_c_b_o_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) { 5032; CHECK-LABEL: @sample_c_b_o_a16_2d( 5033; CHECK-NEXT: main_body: 5034; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5035; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5036; CHECK-NEXT: ret void 5037; 5038main_body: 5039 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16.v8i32.v4i32(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5040 store <4 x float> %v, ptr addrspace(1) %out 5041 ret void 5042} 5043 5044; Check that bias is not optimized away if > 0 5045define amdgpu_kernel void @sample_b_1d_pos(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 5046; CHECK-LABEL: @sample_b_1d_pos( 5047; CHECK-NEXT: main_body: 5048; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5049; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5050; CHECK-NEXT: ret void 5051; 5052main_body: 5053 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5054 store <4 x float> %v, ptr addrspace(1) %out 5055 ret void 5056} 5057 5058; Check that bias is not optimized away if < 0 5059define amdgpu_kernel void @sample_b_1d_neg(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 5060; CHECK-LABEL: @sample_b_1d_neg( 5061; CHECK-NEXT: main_body: 5062; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5063; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5064; CHECK-NEXT: ret void 5065; 5066main_body: 5067 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5068 store <4 x float> %v, ptr addrspace(1) %out 5069 ret void 5070} 5071 5072; Zero bias + A16 5073define amdgpu_kernel void @sample_b_1d_a16(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { 5074; CHECK-LABEL: @sample_b_1d_a16( 5075; CHECK-NEXT: main_body: 5076; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5077; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5078; CHECK-NEXT: ret void 5079; 5080main_body: 5081 %s32 = fpext half %s to float 5082 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5083 store <4 x float> %v, ptr addrspace(1) %out 5084 ret void 5085} 5086 5087; -------------------------------------------------------------------- 5088; llvm.amdgcn.image.sample offset zero 5089; -------------------------------------------------------------------- 5090 5091define amdgpu_kernel void @offset_sample_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 5092; CHECK-LABEL: @offset_sample_o_1d( 5093; CHECK-NEXT: main_body: 5094; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5095; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5096; CHECK-NEXT: ret void 5097; 5098main_body: 5099 %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5100 store <4 x float> %v, ptr addrspace(1) %out 5101 ret void 5102} 5103 5104define amdgpu_kernel void @offset_sample_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 5105; CHECK-LABEL: @offset_sample_o_2d( 5106; CHECK-NEXT: main_body: 5107; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5108; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5109; CHECK-NEXT: ret void 5110; 5111main_body: 5112 %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5113 store <4 x float> %v, ptr addrspace(1) %out 5114 ret void 5115} 5116 5117define amdgpu_kernel void @offset_sample_c_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 5118; CHECK-LABEL: @offset_sample_c_o_1d( 5119; CHECK-NEXT: main_body: 5120; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5121; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5122; CHECK-NEXT: ret void 5123; 5124main_body: 5125 %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5126 store <4 x float> %v, ptr addrspace(1) %out 5127 ret void 5128} 5129 5130define amdgpu_kernel void @offset_sample_c_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 5131; CHECK-LABEL: @offset_sample_c_o_2d( 5132; CHECK-NEXT: main_body: 5133; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5134; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5135; CHECK-NEXT: ret void 5136; 5137main_body: 5138 %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5139 store <4 x float> %v, ptr addrspace(1) %out 5140 ret void 5141} 5142 5143define amdgpu_kernel void @offset_sample_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { 5144; CHECK-LABEL: @offset_sample_cl_o_1d( 5145; CHECK-NEXT: main_body: 5146; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5147; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5148; CHECK-NEXT: ret void 5149; 5150main_body: 5151 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5152 store <4 x float> %v, ptr addrspace(1) %out 5153 ret void 5154} 5155 5156define amdgpu_kernel void @offset_sample_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 5157; CHECK-LABEL: @offset_sample_cl_o_2d( 5158; CHECK-NEXT: main_body: 5159; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5160; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5161; CHECK-NEXT: ret void 5162; 5163main_body: 5164 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5165 store <4 x float> %v, ptr addrspace(1) %out 5166 ret void 5167} 5168 5169define amdgpu_kernel void @offset_sample_c_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { 5170; CHECK-LABEL: @offset_sample_c_cl_o_1d( 5171; CHECK-NEXT: main_body: 5172; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5173; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5174; CHECK-NEXT: ret void 5175; 5176main_body: 5177 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5178 store <4 x float> %v, ptr addrspace(1) %out 5179 ret void 5180} 5181 5182define amdgpu_kernel void @offset_sample_c_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 5183; CHECK-LABEL: @offset_sample_c_cl_o_2d( 5184; CHECK-NEXT: main_body: 5185; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5186; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5187; CHECK-NEXT: ret void 5188; 5189main_body: 5190 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5191 store <4 x float> %v, ptr addrspace(1) %out 5192 ret void 5193} 5194 5195define amdgpu_kernel void @offset_sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { 5196; CHECK-LABEL: @offset_sample_b_o_1d( 5197; CHECK-NEXT: main_body: 5198; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5199; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5200; CHECK-NEXT: ret void 5201; 5202main_body: 5203 %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5204 store <4 x float> %v, ptr addrspace(1) %out 5205 ret void 5206} 5207 5208define amdgpu_kernel void @offset_sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 5209; CHECK-LABEL: @offset_sample_b_o_2d( 5210; CHECK-NEXT: main_body: 5211; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5212; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5213; CHECK-NEXT: ret void 5214; 5215main_body: 5216 %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5217 store <4 x float> %v, ptr addrspace(1) %out 5218 ret void 5219} 5220 5221define amdgpu_kernel void @offset_sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { 5222; CHECK-LABEL: @offset_sample_c_b_o_1d( 5223; CHECK-NEXT: main_body: 5224; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5225; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5226; CHECK-NEXT: ret void 5227; 5228main_body: 5229 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5230 store <4 x float> %v, ptr addrspace(1) %out 5231 ret void 5232} 5233 5234define amdgpu_kernel void @offset_sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 5235; CHECK-LABEL: @offset_sample_c_b_o_2d( 5236; CHECK-NEXT: main_body: 5237; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5238; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5239; CHECK-NEXT: ret void 5240; 5241main_body: 5242 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5243 store <4 x float> %v, ptr addrspace(1) %out 5244 ret void 5245} 5246 5247define amdgpu_kernel void @offset_sample_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { 5248; CHECK-LABEL: @offset_sample_b_cl_o_1d( 5249; CHECK-NEXT: main_body: 5250; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5251; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5252; CHECK-NEXT: ret void 5253; 5254main_body: 5255 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5256 store <4 x float> %v, ptr addrspace(1) %out 5257 ret void 5258} 5259 5260define amdgpu_kernel void @offset_sample_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 5261; CHECK-LABEL: @offset_sample_b_cl_o_2d( 5262; CHECK-NEXT: main_body: 5263; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5264; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5265; CHECK-NEXT: ret void 5266; 5267main_body: 5268 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5269 store <4 x float> %v, ptr addrspace(1) %out 5270 ret void 5271} 5272 5273define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { 5274; CHECK-LABEL: @offset_sample_c_b_cl_o_1d( 5275; CHECK-NEXT: main_body: 5276; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5277; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5278; CHECK-NEXT: ret void 5279; 5280main_body: 5281 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5282 store <4 x float> %v, ptr addrspace(1) %out 5283 ret void 5284} 5285 5286define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 5287; CHECK-LABEL: @offset_sample_c_b_cl_o_2d( 5288; CHECK-NEXT: main_body: 5289; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5290; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5291; CHECK-NEXT: ret void 5292; 5293main_body: 5294 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5295 store <4 x float> %v, ptr addrspace(1) %out 5296 ret void 5297} 5298 5299define amdgpu_kernel void @offset_sample_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { 5300; CHECK-LABEL: @offset_sample_d_o_1d( 5301; CHECK-NEXT: main_body: 5302; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5303; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5304; CHECK-NEXT: ret void 5305; 5306main_body: 5307 %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5308 store <4 x float> %v, ptr addrspace(1) %out 5309 ret void 5310} 5311 5312define amdgpu_kernel void @offset_sample_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 5313; CHECK-LABEL: @offset_sample_d_o_2d( 5314; CHECK-NEXT: main_body: 5315; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5316; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5317; CHECK-NEXT: ret void 5318; 5319main_body: 5320 %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5321 store <4 x float> %v, ptr addrspace(1) %out 5322 ret void 5323} 5324 5325define amdgpu_kernel void @offset_sample_c_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { 5326; CHECK-LABEL: @offset_sample_c_d_o_1d( 5327; CHECK-NEXT: main_body: 5328; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5329; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5330; CHECK-NEXT: ret void 5331; 5332main_body: 5333 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5334 store <4 x float> %v, ptr addrspace(1) %out 5335 ret void 5336} 5337 5338define amdgpu_kernel void @offset_sample_c_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 5339; CHECK-LABEL: @offset_sample_c_d_o_2d( 5340; CHECK-NEXT: main_body: 5341; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5342; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5343; CHECK-NEXT: ret void 5344; 5345main_body: 5346 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5347 store <4 x float> %v, ptr addrspace(1) %out 5348 ret void 5349} 5350 5351define amdgpu_kernel void @offset_sample_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { 5352; CHECK-LABEL: @offset_sample_d_cl_o_1d( 5353; CHECK-NEXT: main_body: 5354; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5355; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5356; CHECK-NEXT: ret void 5357; 5358main_body: 5359 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5360 store <4 x float> %v, ptr addrspace(1) %out 5361 ret void 5362} 5363 5364define amdgpu_kernel void @offset_sample_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 5365; CHECK-LABEL: @offset_sample_d_cl_o_2d( 5366; CHECK-NEXT: main_body: 5367; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5368; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5369; CHECK-NEXT: ret void 5370; 5371main_body: 5372 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5373 store <4 x float> %v, ptr addrspace(1) %out 5374 ret void 5375} 5376 5377define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { 5378; CHECK-LABEL: @offset_sample_c_d_cl_o_1d( 5379; CHECK-NEXT: main_body: 5380; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5381; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5382; CHECK-NEXT: ret void 5383; 5384main_body: 5385 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5386 store <4 x float> %v, ptr addrspace(1) %out 5387 ret void 5388} 5389 5390define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 5391; CHECK-LABEL: @offset_sample_c_d_cl_o_2d( 5392; CHECK-NEXT: main_body: 5393; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5394; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5395; CHECK-NEXT: ret void 5396; 5397main_body: 5398 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5399 store <4 x float> %v, ptr addrspace(1) %out 5400 ret void 5401} 5402 5403define amdgpu_kernel void @offset_sample_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { 5404; CHECK-LABEL: @offset_sample_cd_o_1d( 5405; CHECK-NEXT: main_body: 5406; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5407; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5408; CHECK-NEXT: ret void 5409; 5410main_body: 5411 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5412 store <4 x float> %v, ptr addrspace(1) %out 5413 ret void 5414} 5415 5416define amdgpu_kernel void @offset_sample_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 5417; CHECK-LABEL: @offset_sample_cd_o_2d( 5418; CHECK-NEXT: main_body: 5419; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5420; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5421; CHECK-NEXT: ret void 5422; 5423main_body: 5424 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5425 store <4 x float> %v, ptr addrspace(1) %out 5426 ret void 5427} 5428 5429define amdgpu_kernel void @offset_sample_c_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { 5430; CHECK-LABEL: @offset_sample_c_cd_o_1d( 5431; CHECK-NEXT: main_body: 5432; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5433; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5434; CHECK-NEXT: ret void 5435; 5436main_body: 5437 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5438 store <4 x float> %v, ptr addrspace(1) %out 5439 ret void 5440} 5441 5442define amdgpu_kernel void @offset_sample_c_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 5443; CHECK-LABEL: @offset_sample_c_cd_o_2d( 5444; CHECK-NEXT: main_body: 5445; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5446; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5447; CHECK-NEXT: ret void 5448; 5449main_body: 5450 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5451 store <4 x float> %v, ptr addrspace(1) %out 5452 ret void 5453} 5454 5455define amdgpu_kernel void @offset_sample_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { 5456; CHECK-LABEL: @offset_sample_cd_cl_o_1d( 5457; CHECK-NEXT: main_body: 5458; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5459; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5460; CHECK-NEXT: ret void 5461; 5462main_body: 5463 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5464 store <4 x float> %v, ptr addrspace(1) %out 5465 ret void 5466} 5467 5468define amdgpu_kernel void @offset_sample_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 5469; CHECK-LABEL: @offset_sample_cd_cl_o_2d( 5470; CHECK-NEXT: main_body: 5471; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5472; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5473; CHECK-NEXT: ret void 5474; 5475main_body: 5476 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5477 store <4 x float> %v, ptr addrspace(1) %out 5478 ret void 5479} 5480 5481define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { 5482; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d( 5483; CHECK-NEXT: main_body: 5484; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5485; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5486; CHECK-NEXT: ret void 5487; 5488main_body: 5489 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5490 store <4 x float> %v, ptr addrspace(1) %out 5491 ret void 5492} 5493 5494define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 5495; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d( 5496; CHECK-NEXT: main_body: 5497; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5498; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5499; CHECK-NEXT: ret void 5500; 5501main_body: 5502 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5503 store <4 x float> %v, ptr addrspace(1) %out 5504 ret void 5505} 5506 5507define amdgpu_kernel void @offset_sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { 5508; CHECK-LABEL: @offset_sample_l_o_1d( 5509; CHECK-NEXT: main_body: 5510; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5511; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5512; CHECK-NEXT: ret void 5513; 5514main_body: 5515 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5516 store <4 x float> %v, ptr addrspace(1) %out 5517 ret void 5518} 5519 5520define amdgpu_kernel void @offset_sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 5521; CHECK-LABEL: @offset_sample_l_o_2d( 5522; CHECK-NEXT: main_body: 5523; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5524; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5525; CHECK-NEXT: ret void 5526; 5527main_body: 5528 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5529 store <4 x float> %v, ptr addrspace(1) %out 5530 ret void 5531} 5532 5533define amdgpu_kernel void @offset_sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { 5534; CHECK-LABEL: @offset_sample_c_l_o_1d( 5535; CHECK-NEXT: main_body: 5536; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5537; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5538; CHECK-NEXT: ret void 5539; 5540main_body: 5541 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5542 store <4 x float> %v, ptr addrspace(1) %out 5543 ret void 5544} 5545 5546define amdgpu_kernel void @offset_sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 5547; CHECK-LABEL: @offset_sample_c_l_o_2d( 5548; CHECK-NEXT: main_body: 5549; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5550; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5551; CHECK-NEXT: ret void 5552; 5553main_body: 5554 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5555 store <4 x float> %v, ptr addrspace(1) %out 5556 ret void 5557} 5558 5559define amdgpu_kernel void @offset_sample_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 5560; CHECK-LABEL: @offset_sample_lz_o_1d( 5561; CHECK-NEXT: main_body: 5562; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5563; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5564; CHECK-NEXT: ret void 5565; 5566main_body: 5567 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5568 store <4 x float> %v, ptr addrspace(1) %out 5569 ret void 5570} 5571 5572define amdgpu_kernel void @offset_sample_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 5573; CHECK-LABEL: @offset_sample_lz_o_2d( 5574; CHECK-NEXT: main_body: 5575; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5576; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5577; CHECK-NEXT: ret void 5578; 5579main_body: 5580 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5581 store <4 x float> %v, ptr addrspace(1) %out 5582 ret void 5583} 5584 5585define amdgpu_kernel void @offset_sample_c_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 5586; CHECK-LABEL: @offset_sample_c_lz_o_1d( 5587; CHECK-NEXT: main_body: 5588; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5589; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5590; CHECK-NEXT: ret void 5591; 5592main_body: 5593 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5594 store <4 x float> %v, ptr addrspace(1) %out 5595 ret void 5596} 5597 5598define amdgpu_kernel void @offset_sample_c_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 5599; CHECK-LABEL: @offset_sample_c_lz_o_2d( 5600; CHECK-NEXT: main_body: 5601; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) 5602; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 5603; CHECK-NEXT: ret void 5604; 5605main_body: 5606 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 5607 store <4 x float> %v, ptr addrspace(1) %out 5608 ret void 5609} 5610 5611declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5612declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5613declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5614declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5615declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5616declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5617declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5618declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5619 5620declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5621declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5622declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5623declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5624declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5625declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5626declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5627declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5628 5629declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5630declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5631declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5632declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5633declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5634declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5635declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5636declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5637 5638declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5639declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5640declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5641declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5642declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5643declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5644declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5645declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5646 5647declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5648declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5649declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5650declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 5651 5652; -------------------------------------------------------------------- 5653; llvm.amdgcn.is.shared 5654; -------------------------------------------------------------------- 5655 5656declare i1 @llvm.amdgcn.is.shared(ptr) nounwind readnone 5657 5658define i1 @test_is_shared_null() nounwind { 5659; CHECK-LABEL: @test_is_shared_null( 5660; CHECK-NEXT: ret i1 false 5661; 5662 %val = call i1 @llvm.amdgcn.is.shared(ptr null) 5663 ret i1 %val 5664} 5665 5666define i1 @test_is_shared_undef() nounwind { 5667; CHECK-LABEL: @test_is_shared_undef( 5668; CHECK-NEXT: ret i1 undef 5669; 5670 %val = call i1 @llvm.amdgcn.is.shared(ptr undef) 5671 ret i1 %val 5672} 5673 5674; -------------------------------------------------------------------- 5675; llvm.amdgcn.is.private 5676; -------------------------------------------------------------------- 5677 5678declare i1 @llvm.amdgcn.is.private(ptr) nounwind readnone 5679 5680define i1 @test_is_private_null() nounwind { 5681; CHECK-LABEL: @test_is_private_null( 5682; CHECK-NEXT: ret i1 false 5683; 5684 %val = call i1 @llvm.amdgcn.is.private(ptr null) 5685 ret i1 %val 5686} 5687 5688define i1 @test_is_private_undef() nounwind { 5689; CHECK-LABEL: @test_is_private_undef( 5690; CHECK-NEXT: ret i1 undef 5691; 5692 %val = call i1 @llvm.amdgcn.is.private(ptr undef) 5693 ret i1 %val 5694} 5695 5696; -------------------------------------------------------------------- 5697; llvm.amdgcn.trig.preop 5698; -------------------------------------------------------------------- 5699 5700declare double @llvm.amdgcn.trig.preop.f64(double, i32) 5701declare float @llvm.amdgcn.trig.preop.f32(float, i32) 5702 5703define double @trig_preop_constfold_variable_undef_arg(i32 %arg) { 5704; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg( 5705; CHECK-NEXT: ret double 0x7FF8000000000000 5706; 5707 %val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg) 5708 ret double %val 5709} 5710 5711define double @trig_preop_constfold_variable_poison_arg(i32 %arg) { 5712; CHECK-LABEL: @trig_preop_constfold_variable_poison_arg( 5713; CHECK-NEXT: ret double poison 5714; 5715 %val = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 %arg) 5716 ret double %val 5717} 5718 5719define double @trig_preop_constfold_variable_arg_undef(double %arg) { 5720; CHECK-LABEL: @trig_preop_constfold_variable_arg_undef( 5721; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 undef) 5722; CHECK-NEXT: ret double [[VAL]] 5723; 5724 %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 undef) 5725 ret double %val 5726} 5727 5728define double @trig_preop_constfold_variable_arg_poison(double %arg) { 5729; CHECK-LABEL: @trig_preop_constfold_variable_arg_poison( 5730; CHECK-NEXT: ret double poison 5731; 5732 %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 poison) 5733 ret double %val 5734} 5735 5736define double @trig_preop_constfold_variable_int(i32 %arg) { 5737; CHECK-LABEL: @trig_preop_constfold_variable_int( 5738; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 [[ARG:%.*]]) 5739; CHECK-NEXT: ret double [[VAL]] 5740; 5741 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 %arg) 5742 ret double %val 5743} 5744 5745define double @trig_preop_qnan(i32 %arg) { 5746; CHECK-LABEL: @trig_preop_qnan( 5747; CHECK-NEXT: ret double 0x7FF8000000000000 5748; 5749 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg) 5750 ret double %val 5751} 5752 5753define double @trig_preop_snan(i32 %arg) { 5754; CHECK-LABEL: @trig_preop_snan( 5755; CHECK-NEXT: ret double 0x7FF8000000000001 5756; 5757 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg) 5758 ret double %val 5759} 5760 5761define double @trig_preop_inf_0() { 5762; CHECK-LABEL: @trig_preop_inf_0( 5763; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD 5764; 5765 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0) 5766 ret double %val 5767} 5768 5769define double @trig_preop_ninf_0() { 5770; CHECK-LABEL: @trig_preop_ninf_0( 5771; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD 5772; 5773 %val = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0) 5774 ret double %val 5775} 5776 5777define double @trig_preop_variable_fp(double %arg) { 5778; CHECK-LABEL: @trig_preop_variable_fp( 5779; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 5) 5780; CHECK-NEXT: ret double [[VAL]] 5781; 5782 %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 5) 5783 ret double %val 5784} 5785 5786define double @trig_preop_variable_args(double %arg0, i32 %arg1) { 5787; CHECK-LABEL: @trig_preop_variable_args( 5788; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG0:%.*]], i32 [[ARG1:%.*]]) 5789; CHECK-NEXT: ret double [[VAL]] 5790; 5791 %val = call double @llvm.amdgcn.trig.preop.f64(double %arg0, i32 %arg1) 5792 ret double %val 5793} 5794 5795define double @trig_preop_constfold() { 5796; CHECK-LABEL: @trig_preop_constfold( 5797; CHECK-NEXT: ret double 0x394A6EE06DB14ACC 5798; 5799 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 2) 5800 ret double %val 5801} 5802 5803; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. 5804define double @trig_preop_constfold_outbound_segment() { 5805; CHECK-LABEL: @trig_preop_constfold_outbound_segment( 5806; CHECK-NEXT: ret double 0.000000e+00 5807; 5808 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 22) 5809 ret double %val 5810} 5811 5812; Only use src1[4:0], so segment is actually 31 for -1. 5813define double @trig_preop_constfold_neg1_segment() { 5814; CHECK-LABEL: @trig_preop_constfold_neg1_segment( 5815; CHECK-NEXT: ret double 0.000000e+00 5816; 5817 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -1) 5818 ret double %val 5819} 5820 5821; Only use src1[4:0], so segment is actually 0 for -32. 5822define double @trig_preop_constfold_neg32_segment() { 5823; CHECK-LABEL: @trig_preop_constfold_neg32_segment( 5824; CHECK-NEXT: ret double 0x3FE45F306DC9C882 5825; 5826 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -32) 5827 ret double %val 5828} 5829 5830define double @trig_preop_constfold_strictfp() strictfp { 5831; CHECK-LABEL: @trig_preop_constfold_strictfp( 5832; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]] 5833; CHECK-NEXT: ret double [[VAL]] 5834; 5835 %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp 5836 ret double %val 5837} 5838 5839define double @trig_preop_constfold_exponent0_mantissa0__segment0() { 5840; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__segment0( 5841; CHECK-NEXT: ret double 0x3FE45F306DC9C882 5842; 5843 %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 0) 5844 ret double %val 5845} 5846 5847define double @trig_preop_constfold_exponent0_mantissa1__segment0() { 5848; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__segment0( 5849; CHECK-NEXT: ret double 0x3FE45F306DC9C882 5850; 5851 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 0) 5852 ret double %val 5853} 5854 5855define double @trig_preop_constfold_exponent0_mantissaX__segment0() { 5856; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__segment0( 5857; CHECK-NEXT: ret double 0x3FE45F306DC9C882 5858; 5859 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x0004A7F09D5F47D4, i32 0) 5860 ret double %val 5861} 5862 5863define double @trig_preop_constfold_exponent0_mantissa0__segment2() { 5864; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__segment2( 5865; CHECK-NEXT: ret double 0x394A6EE06DB14ACC 5866; 5867 %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 2) 5868 ret double %val 5869} 5870 5871define double @trig_preop_constfold_exponent0_mantissa1__segment2() { 5872; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__segment2( 5873; CHECK-NEXT: ret double 0x394A6EE06DB14ACC 5874; 5875 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 2) 5876 ret double %val 5877} 5878 5879define double @trig_preop_constfold_exponent0_mantissaX__segment2() { 5880; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__segment2( 5881; CHECK-NEXT: ret double 0x394A6EE06DB14ACC 5882; 5883 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x00094A6EE06DB14A, i32 2) 5884 ret double %val 5885} 5886 5887; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. 5888define double @trig_preop_constfold_exponent0_mantissa0__outbound_segment() { 5889; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__outbound_segment( 5890; CHECK-NEXT: ret double 0.000000e+00 5891; 5892 %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 22) 5893 ret double %val 5894} 5895 5896; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. 5897define double @trig_preop_constfold_exponent0_mantissa1__outbound_segment() { 5898; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__outbound_segment( 5899; CHECK-NEXT: ret double 0.000000e+00 5900; 5901 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 22) 5902 ret double %val 5903} 5904 5905; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. 5906define double @trig_preop_constfold_exponent0_mantissaX__outbound_segment() { 5907; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__outbound_segment( 5908; CHECK-NEXT: ret double 0.000000e+00 5909; 5910 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000A6EE06DB14ACC, i32 22) 5911 ret double %val 5912} 5913 5914; 1607 = 1077 + 10 * 53 5915define double @trig_preop_constfold_exponent1607_mantissa0__segment0() { 5916; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__segment0( 5917; CHECK-NEXT: ret double 0x1EC8135A2FBF209C 5918; 5919 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 0) 5920 ret double %val 5921} 5922 5923; 1607 = 1077 + 10 * 53 5924define double @trig_preop_constfold_exponent1607_mantissa1__segment1() { 5925; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__segment1( 5926; CHECK-NEXT: ret double 0x1EC8135A2FBF209C 5927; 5928 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 0) 5929 ret double %val 5930} 5931 5932; 1607 = 1077 + 10 * 53 5933define double @trig_preop_constfold_exponent1607_mantissaX__segment1() { 5934; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__segment1( 5935; CHECK-NEXT: ret double 0x1EC8135A2FBF209C 5936; 5937 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6471B791D6398353, i32 0) 5938 ret double %val 5939} 5940 5941; 1607 = 1077 + 10 * 53 5942define double @trig_preop_constfold_exponent1607_mantissa0__segment2() { 5943; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__segment2( 5944; CHECK-NEXT: ret double 0x181272117E2EF7E4 5945; 5946 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 2) 5947 ret double %val 5948} 5949 5950; 1607 = 1077 + 10 * 53 5951define double @trig_preop_constfold_exponent1607_mantissa1__segment2() { 5952; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__segment2( 5953; CHECK-NEXT: ret double 0x181272117E2EF7E4 5954; 5955 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 2) 5956 ret double %val 5957} 5958 5959; 1607 = 1077 + 10 * 53 5960define double @trig_preop_constfold_exponent1607_mantissaX__segment2() { 5961; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__segment2( 5962; CHECK-NEXT: ret double 0x181272117E2EF7E4 5963; 5964 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647272117E2EF7E4, i32 2) 5965 ret double %val 5966} 5967 5968; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. 5969define double @trig_preop_constfold_exponent1607_mantissa0__outbound_segment() { 5970; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__outbound_segment( 5971; CHECK-NEXT: ret double 0.000000e+00 5972; 5973 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 12) 5974 ret double %val 5975} 5976 5977; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. 5978define double @trig_preop_constfold_exponent1607_mantissa1__outbound_segment() { 5979; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__outbound_segment( 5980; CHECK-NEXT: ret double 0.000000e+00 5981; 5982 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 12) 5983 ret double %val 5984} 5985 5986; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. 5987define double @trig_preop_constfold_exponent1607_mantissaX__outbound_segment() { 5988; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__outbound_segment( 5989; CHECK-NEXT: ret double 0.000000e+00 5990; 5991 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647181272117E2EF, i32 12) 5992 ret double %val 5993} 5994 5995define double @trig_preop_constfold_exponent1968_mantissa0__segment0() { 5996; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__segment0( 5997; CHECK-NEXT: ret double 0x10374F463F669E5F 5998; 5999 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 0) 6000 ret double %val 6001} 6002 6003define double @trig_preop_constfold_exponent1968_mantissa1__segment0() { 6004; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__segment0( 6005; CHECK-NEXT: ret double 0x10374F463F669E5F 6006; 6007 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 0) 6008 ret double %val 6009} 6010 6011define double @trig_preop_constfold_exponent1968_mantissax__segment0() { 6012; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissax__segment0( 6013; CHECK-NEXT: ret double 0x10374F463F669E5F 6014; 6015 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B074F463F669E5F, i32 0) 6016 ret double %val 6017} 6018 6019define double @trig_preop_constfold_exponent1968_mantissa0__segment2() { 6020; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__segment2( 6021; CHECK-NEXT: ret double 0x98F2F8BD9E839CE 6022; 6023 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 2) 6024 ret double %val 6025} 6026 6027define double @trig_preop_constfold_exponent1968_mantissa1__segment2() { 6028; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__segment2( 6029; CHECK-NEXT: ret double 0x98F2F8BD9E839CE 6030; 6031 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 2) 6032 ret double %val 6033} 6034 6035define double @trig_preop_constfold_exponent1968_mantissaX__segment2() { 6036; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissaX__segment2( 6037; CHECK-NEXT: ret double 0x98F2F8BD9E839CE 6038; 6039 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0A2F8BD9E839CE, i32 2) 6040 ret double %val 6041} 6042 6043; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. 6044define double @trig_preop_constfold_exponent1968_mantissa0__outbound_segment() { 6045; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__outbound_segment( 6046; CHECK-NEXT: ret double 0.000000e+00 6047; 6048 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 5) 6049 ret double %val 6050} 6051 6052; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. 6053define double @trig_preop_constfold_exponent1968_mantissa1__outbound_segment() { 6054; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__outbound_segment( 6055; CHECK-NEXT: ret double 0.000000e+00 6056; 6057 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 5) 6058 ret double %val 6059} 6060 6061; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. 6062define double @trig_preop_constfold_exponent1968_mantissaX__outbound_segment() { 6063; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissaX__outbound_segment( 6064; CHECK-NEXT: ret double 0.000000e+00 6065; 6066 %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0A98F2F8BD9E83, i32 5) 6067 ret double %val 6068} 6069 6070; -------------------------------------------------------------------- 6071; llvm.amdgcn.log 6072; -------------------------------------------------------------------- 6073 6074declare float @llvm.amdgcn.log.f32(float) nounwind readnone 6075declare half @llvm.amdgcn.log.f16(half) nounwind readnone 6076 6077define float @test_constant_fold_log_f32_undef() { 6078; CHECK-LABEL: @test_constant_fold_log_f32_undef( 6079; CHECK-NEXT: ret float 0x7FF8000000000000 6080; 6081 %val = call float @llvm.amdgcn.log.f32(float undef) 6082 ret float %val 6083} 6084 6085define float @test_constant_fold_log_f32_poison() { 6086; CHECK-LABEL: @test_constant_fold_log_f32_poison( 6087; CHECK-NEXT: ret float poison 6088; 6089 %val = call float @llvm.amdgcn.log.f32(float poison) 6090 ret float %val 6091} 6092 6093define float @test_constant_fold_log_f32_p0() { 6094; CHECK-LABEL: @test_constant_fold_log_f32_p0( 6095; CHECK-NEXT: ret float 0xFFF0000000000000 6096; 6097 %val = call float @llvm.amdgcn.log.f32(float 0.0) 6098 ret float %val 6099} 6100 6101define float @test_constant_fold_log_f32_n0() { 6102; CHECK-LABEL: @test_constant_fold_log_f32_n0( 6103; CHECK-NEXT: ret float 0xFFF0000000000000 6104; 6105 %val = call float @llvm.amdgcn.log.f32(float -0.0) 6106 ret float %val 6107} 6108 6109define float @test_constant_fold_log_f32_subnormal() { 6110; CHECK-LABEL: @test_constant_fold_log_f32_subnormal( 6111; CHECK-NEXT: ret float 0xFFF0000000000000 6112; 6113 %val = call float @llvm.amdgcn.log.f32(float 0x380FFFFFC0000000) 6114 ret float %val 6115} 6116 6117define float @test_constant_fold_log_f32_negsubnormal() { 6118; CHECK-LABEL: @test_constant_fold_log_f32_negsubnormal( 6119; CHECK-NEXT: ret float 0xFFF0000000000000 6120; 6121 %val = call float @llvm.amdgcn.log.f32(float 0xB80FFFFFC0000000) 6122 ret float %val 6123} 6124 6125define float @test_constant_fold_log_f32_pinf() { 6126; CHECK-LABEL: @test_constant_fold_log_f32_pinf( 6127; CHECK-NEXT: ret float 0x7FF0000000000000 6128; 6129 %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000000000000) 6130 ret float %val 6131} 6132 6133define float @test_constant_fold_log_f32_ninf() { 6134; CHECK-LABEL: @test_constant_fold_log_f32_ninf( 6135; CHECK-NEXT: ret float 0x7FF8000000000000 6136; 6137 %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) 6138 ret float %val 6139} 6140 6141define float @test_constant_fold_log_f32_p1() { 6142; CHECK-LABEL: @test_constant_fold_log_f32_p1( 6143; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 1.000000e+00) 6144; CHECK-NEXT: ret float [[VAL]] 6145; 6146 %val = call float @llvm.amdgcn.log.f32(float 1.0) 6147 ret float %val 6148} 6149 6150define float @test_constant_fold_log_f32_p10() { 6151; CHECK-LABEL: @test_constant_fold_log_f32_p10( 6152; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 1.000000e+01) 6153; CHECK-NEXT: ret float [[VAL]] 6154; 6155 %val = call float @llvm.amdgcn.log.f32(float 10.0) 6156 ret float %val 6157} 6158 6159define float @test_constant_fold_log_f32_neg10() { 6160; CHECK-LABEL: @test_constant_fold_log_f32_neg10( 6161; CHECK-NEXT: ret float 0x7FF8000000000000 6162; 6163 %val = call float @llvm.amdgcn.log.f32(float -10.0) 6164 ret float %val 6165} 6166 6167define float @test_constant_fold_log_f32_qnan() { 6168; CHECK-LABEL: @test_constant_fold_log_f32_qnan( 6169; CHECK-NEXT: ret float 0x7FF8000000000000 6170; 6171 %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) 6172 ret float %val 6173} 6174 6175define float @test_constant_fold_log_f32_snan() { 6176; CHECK-LABEL: @test_constant_fold_log_f32_snan( 6177; CHECK-NEXT: ret float 0x7FF8000020000000 6178; 6179 %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000020000000) 6180 ret float %val 6181} 6182 6183define half @test_constant_fold_log_f16_p0() { 6184; CHECK-LABEL: @test_constant_fold_log_f16_p0( 6185; CHECK-NEXT: ret half 0xHFC00 6186; 6187 %val = call half @llvm.amdgcn.log.f16(half 0.0) 6188 ret half %val 6189} 6190 6191define half @test_constant_fold_log_f16_neg10() { 6192; CHECK-LABEL: @test_constant_fold_log_f16_neg10( 6193; CHECK-NEXT: ret half 0xH7E00 6194; 6195 %val = call half @llvm.amdgcn.log.f16(half -10.0) 6196 ret half %val 6197} 6198 6199define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { 6200; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp( 6201; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR16]] 6202; CHECK-NEXT: ret float [[VAL]] 6203; 6204 %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp 6205 ret float %val 6206} 6207 6208define float @test_constant_fold_log_f32_0_strictfp() strictfp { 6209; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp( 6210; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR16]] 6211; CHECK-NEXT: ret float [[VAL]] 6212; 6213 %val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp 6214 ret float %val 6215} 6216 6217define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { 6218; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp( 6219; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR16]] 6220; CHECK-NEXT: ret float [[VAL]] 6221; 6222 %val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp 6223 ret float %val 6224} 6225 6226define float @test_constant_fold_log_f32_neg_strictfp() strictfp { 6227; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp( 6228; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR16]] 6229; CHECK-NEXT: ret float [[VAL]] 6230; 6231 %val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp 6232 ret float %val 6233} 6234 6235define float @test_constant_fold_log_f32_pinf_strictfp() strictfp { 6236; CHECK-LABEL: @test_constant_fold_log_f32_pinf_strictfp( 6237; CHECK-NEXT: ret float 0x7FF0000000000000 6238; 6239 %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000000000000) strictfp 6240 ret float %val 6241} 6242 6243define float @test_constant_fold_log_f32_ninf_strictfp() strictfp { 6244; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp( 6245; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR16]] 6246; CHECK-NEXT: ret float [[VAL]] 6247; 6248 %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp 6249 ret float %val 6250} 6251 6252define half @test_constant_fold_log_f16_denorm() { 6253; CHECK-LABEL: @test_constant_fold_log_f16_denorm( 6254; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.log.f16(half 0xH03FF) 6255; CHECK-NEXT: ret half [[VAL]] 6256; 6257 %val = call half @llvm.amdgcn.log.f16(half 0xH03ff) 6258 ret half %val 6259} 6260 6261define half @test_constant_fold_log_f16_neg_denorm() { 6262; CHECK-LABEL: @test_constant_fold_log_f16_neg_denorm( 6263; CHECK-NEXT: ret half 0xH7E00 6264; 6265 %val = call half @llvm.amdgcn.log.f16(half 0xH83ff) 6266 ret half %val 6267} 6268 6269; -------------------------------------------------------------------- 6270; llvm.amdgcn.exp2 6271; -------------------------------------------------------------------- 6272 6273declare float @llvm.amdgcn.exp2.f32(float) nounwind readnone 6274declare half @llvm.amdgcn.exp2.f16(half) nounwind readnone 6275 6276define float @test_constant_fold_exp2_f32_undef() { 6277; CHECK-LABEL: @test_constant_fold_exp2_f32_undef( 6278; CHECK-NEXT: ret float 0x7FF8000000000000 6279; 6280 %val = call float @llvm.amdgcn.exp2.f32(float undef) 6281 ret float %val 6282} 6283 6284define float @test_constant_fold_exp2_f32_poison() { 6285; CHECK-LABEL: @test_constant_fold_exp2_f32_poison( 6286; CHECK-NEXT: ret float poison 6287; 6288 %val = call float @llvm.amdgcn.exp2.f32(float poison) 6289 ret float %val 6290} 6291 6292define float @test_constant_fold_exp2_f32_p0() { 6293; CHECK-LABEL: @test_constant_fold_exp2_f32_p0( 6294; CHECK-NEXT: ret float 1.000000e+00 6295; 6296 %val = call float @llvm.amdgcn.exp2.f32(float 0.0) 6297 ret float %val 6298} 6299 6300define float @test_constant_fold_exp2_f32_n0() { 6301; CHECK-LABEL: @test_constant_fold_exp2_f32_n0( 6302; CHECK-NEXT: ret float 1.000000e+00 6303; 6304 %val = call float @llvm.amdgcn.exp2.f32(float -0.0) 6305 ret float %val 6306} 6307 6308define float @test_constant_fold_exp2_f32_p1() { 6309; CHECK-LABEL: @test_constant_fold_exp2_f32_p1( 6310; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) 6311; CHECK-NEXT: ret float [[VAL]] 6312; 6313 %val = call float @llvm.amdgcn.exp2.f32(float 1.0) 6314 ret float %val 6315} 6316 6317define float @test_constant_fold_exp2_f32_n1() { 6318; CHECK-LABEL: @test_constant_fold_exp2_f32_n1( 6319; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) 6320; CHECK-NEXT: ret float [[VAL]] 6321; 6322 %val = call float @llvm.amdgcn.exp2.f32(float -1.0) 6323 ret float %val 6324} 6325 6326define float @test_constant_fold_exp2_f32_p2() { 6327; CHECK-LABEL: @test_constant_fold_exp2_f32_p2( 6328; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) 6329; CHECK-NEXT: ret float [[VAL]] 6330; 6331 %val = call float @llvm.amdgcn.exp2.f32(float 2.0) 6332 ret float %val 6333} 6334 6335define float @test_constant_fold_exp2_f32_n2() { 6336; CHECK-LABEL: @test_constant_fold_exp2_f32_n2( 6337; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) 6338; CHECK-NEXT: ret float [[VAL]] 6339; 6340 %val = call float @llvm.amdgcn.exp2.f32(float -2.0) 6341 ret float %val 6342} 6343 6344define float @test_constant_fold_exp2_f32_p4() { 6345; CHECK-LABEL: @test_constant_fold_exp2_f32_p4( 6346; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 4.000000e+00) 6347; CHECK-NEXT: ret float [[VAL]] 6348; 6349 %val = call float @llvm.amdgcn.exp2.f32(float 4.0) 6350 ret float %val 6351} 6352 6353define float @test_constant_fold_exp2_f32_n4() { 6354; CHECK-LABEL: @test_constant_fold_exp2_f32_n4( 6355; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -4.000000e+00) 6356; CHECK-NEXT: ret float [[VAL]] 6357; 6358 %val = call float @llvm.amdgcn.exp2.f32(float -4.0) 6359 ret float %val 6360} 6361 6362define float @test_constant_fold_exp2_f32_subnormal() { 6363; CHECK-LABEL: @test_constant_fold_exp2_f32_subnormal( 6364; CHECK-NEXT: ret float 1.000000e+00 6365; 6366 %val = call float @llvm.amdgcn.exp2.f32(float 0x380FFFFFC0000000) 6367 ret float %val 6368} 6369 6370define float @test_constant_fold_exp2_f32_negsubnormal() { 6371; CHECK-LABEL: @test_constant_fold_exp2_f32_negsubnormal( 6372; CHECK-NEXT: ret float 1.000000e+00 6373; 6374 %val = call float @llvm.amdgcn.exp2.f32(float 0xB80FFFFFC0000000) 6375 ret float %val 6376} 6377 6378define float @test_constant_fold_exp2_f32_pinf() { 6379; CHECK-LABEL: @test_constant_fold_exp2_f32_pinf( 6380; CHECK-NEXT: ret float 0x7FF0000000000000 6381; 6382 %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000000000000) 6383 ret float %val 6384} 6385 6386define float @test_constant_fold_exp2_f32_ninf() { 6387; CHECK-LABEL: @test_constant_fold_exp2_f32_ninf( 6388; CHECK-NEXT: ret float 0.000000e+00 6389; 6390 %val = call float @llvm.amdgcn.exp2.f32(float 0xFFF0000000000000) 6391 ret float %val 6392} 6393 6394define float @test_constant_fold_exp2_f32_p10() { 6395; CHECK-LABEL: @test_constant_fold_exp2_f32_p10( 6396; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+01) 6397; CHECK-NEXT: ret float [[VAL]] 6398; 6399 %val = call float @llvm.amdgcn.exp2.f32(float 10.0) 6400 ret float %val 6401} 6402 6403define float @test_constant_fold_exp2_f32_neg10() { 6404; CHECK-LABEL: @test_constant_fold_exp2_f32_neg10( 6405; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) 6406; CHECK-NEXT: ret float [[VAL]] 6407; 6408 %val = call float @llvm.amdgcn.exp2.f32(float -10.0) 6409 ret float %val 6410} 6411 6412define float @test_constant_fold_exp2_f32_qnan() { 6413; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan( 6414; CHECK-NEXT: ret float 0x7FF8000000000000 6415; 6416 %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) 6417 ret float %val 6418} 6419 6420define float @test_constant_fold_exp2_f32_snan() { 6421; CHECK-LABEL: @test_constant_fold_exp2_f32_snan( 6422; CHECK-NEXT: ret float 0x7FF8000020000000 6423; 6424 %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000020000000) 6425 ret float %val 6426} 6427 6428define half @test_constant_fold_exp2_f16_p0() { 6429; CHECK-LABEL: @test_constant_fold_exp2_f16_p0( 6430; CHECK-NEXT: ret half 0xH3C00 6431; 6432 %val = call half @llvm.amdgcn.exp2.f16(half 0.0) 6433 ret half %val 6434} 6435 6436define half @test_constant_fold_exp2_f16_neg10() { 6437; CHECK-LABEL: @test_constant_fold_exp2_f16_neg10( 6438; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xHC900) 6439; CHECK-NEXT: ret half [[VAL]] 6440; 6441 %val = call half @llvm.amdgcn.exp2.f16(half -10.0) 6442 ret half %val 6443} 6444 6445define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { 6446; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp( 6447; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR16]] 6448; CHECK-NEXT: ret float [[VAL]] 6449; 6450 %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp 6451 ret float %val 6452} 6453 6454define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { 6455; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp( 6456; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR16]] 6457; CHECK-NEXT: ret float [[VAL]] 6458; 6459 %val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp 6460 ret float %val 6461} 6462 6463define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { 6464; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp( 6465; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR16]] 6466; CHECK-NEXT: ret float [[VAL]] 6467; 6468 %val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp 6469 ret float %val 6470} 6471 6472define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { 6473; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp( 6474; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR16]] 6475; CHECK-NEXT: ret float [[VAL]] 6476; 6477 %val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp 6478 ret float %val 6479} 6480 6481define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { 6482; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp( 6483; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR16]] 6484; CHECK-NEXT: ret float [[VAL]] 6485; 6486 %val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp 6487 ret float %val 6488} 6489 6490define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { 6491; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp( 6492; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR16]] 6493; CHECK-NEXT: ret float [[VAL]] 6494; 6495 %val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp 6496 ret float %val 6497} 6498 6499define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { 6500; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp( 6501; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR16]] 6502; CHECK-NEXT: ret float [[VAL]] 6503; 6504 %val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp 6505 ret float %val 6506} 6507 6508define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp { 6509; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp( 6510; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR16]] 6511; CHECK-NEXT: ret float [[VAL]] 6512; 6513 %val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp 6514 ret float %val 6515} 6516 6517define float @test_constant_fold_exp2_f32_pinf_strictfp() strictfp { 6518; CHECK-LABEL: @test_constant_fold_exp2_f32_pinf_strictfp( 6519; CHECK-NEXT: ret float 0x7FF0000000000000 6520; 6521 %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000000000000) strictfp 6522 ret float %val 6523} 6524 6525define float @test_constant_fold_exp2_f32_ninf_strictfp() strictfp { 6526; CHECK-LABEL: @test_constant_fold_exp2_f32_ninf_strictfp( 6527; CHECK-NEXT: ret float 0.000000e+00 6528; 6529 %val = call float @llvm.amdgcn.exp2.f32(float 0xFFF0000000000000) strictfp 6530 ret float %val 6531} 6532 6533define half @test_constant_fold_exp2_f16_denorm() { 6534; CHECK-LABEL: @test_constant_fold_exp2_f16_denorm( 6535; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xH03FF) 6536; CHECK-NEXT: ret half [[VAL]] 6537; 6538 %val = call half @llvm.amdgcn.exp2.f16(half 0xH03ff) 6539 ret half %val 6540} 6541 6542define half @test_constant_fold_exp2_f16_neg_denorm() { 6543; CHECK-LABEL: @test_constant_fold_exp2_f16_neg_denorm( 6544; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xH83FF) 6545; CHECK-NEXT: ret half [[VAL]] 6546; 6547 %val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff) 6548 ret half %val 6549} 6550 6551; -------------------------------------------------------------------- 6552; llvm.amdgcn.prng 6553; -------------------------------------------------------------------- 6554declare i32 @llvm.amdgcn.prng.b32(i32) 6555define i32 @prng_undef_i32() { 6556; CHECK-LABEL: @prng_undef_i32( 6557; CHECK-NEXT: ret i32 undef 6558 %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef) 6559 ret i32 %prng 6560} 6561 6562define i32 @prng_poison_i32() { 6563; CHECK-LABEL: @prng_poison_i32( 6564; CHECK-NEXT: ret i32 poison 6565 %prng = call i32 @llvm.amdgcn.prng.b32(i32 poison) 6566 ret i32 %prng 6567} 6568