1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX1 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX2 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ 8 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10 11@src64 = common global [8 x i64] zeroinitializer, align 64 12@src32 = common global [16 x i32] zeroinitializer, align 64 13@src16 = common global [32 x i16] zeroinitializer, align 64 14@src8 = common global [64 x i8] zeroinitializer, align 64 15 16@dst64 = common global [8 x double] zeroinitializer, align 64 17@dst32 = common global [16 x float] zeroinitializer, align 64 18 19; 20; UITOFP to vXf64 21; 22 23define void @uitofp_2i64_2f64() #0 { 24; CHECK-LABEL: @uitofp_2i64_2f64( 25; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64 26; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 27; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 28; CHECK-NEXT: ret void 29; 30 %ld0 = load i64, ptr @src64, align 64 31 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 32 %cvt0 = uitofp i64 %ld0 to double 33 %cvt1 = uitofp i64 %ld1 to double 34 store double %cvt0, ptr @dst64, align 64 35 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 36 ret void 37} 38 39define void @uitofp_4i64_4f64() #0 { 40; SSE-LABEL: @uitofp_4i64_4f64( 41; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64 42; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 43; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 44; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 45; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double> 46; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 47; SSE-NEXT: ret void 48; 49; AVX-LABEL: @uitofp_4i64_4f64( 50; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64 51; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double> 52; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 53; AVX-NEXT: ret void 54; 55 %ld0 = load i64, ptr @src64, align 64 56 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 57 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 58 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8 59 %cvt0 = uitofp i64 %ld0 to double 60 %cvt1 = uitofp i64 %ld1 to double 61 %cvt2 = uitofp i64 %ld2 to double 62 %cvt3 = uitofp i64 %ld3 to double 63 store double %cvt0, ptr @dst64, align 64 64 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 65 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 66 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 67 ret void 68} 69 70define void @uitofp_8i64_8f64() #0 { 71; SSE-LABEL: @uitofp_8i64_8f64( 72; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64 73; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 74; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 75; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 76; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double> 77; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 78; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32 79; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i64> [[TMP5]] to <2 x double> 80; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 81; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16 82; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i64> [[TMP7]] to <2 x double> 83; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 84; SSE-NEXT: ret void 85; 86; AVX256-LABEL: @uitofp_8i64_8f64( 87; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64 88; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double> 89; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 90; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32 91; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x double> 92; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 93; AVX256-NEXT: ret void 94; 95; AVX512-LABEL: @uitofp_8i64_8f64( 96; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64 97; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double> 98; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64 99; AVX512-NEXT: ret void 100; 101 %ld0 = load i64, ptr @src64, align 64 102 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 103 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 104 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8 105 %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32 106 %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8 107 %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16 108 %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8 109 %cvt0 = uitofp i64 %ld0 to double 110 %cvt1 = uitofp i64 %ld1 to double 111 %cvt2 = uitofp i64 %ld2 to double 112 %cvt3 = uitofp i64 %ld3 to double 113 %cvt4 = uitofp i64 %ld4 to double 114 %cvt5 = uitofp i64 %ld5 to double 115 %cvt6 = uitofp i64 %ld6 to double 116 %cvt7 = uitofp i64 %ld7 to double 117 store double %cvt0, ptr @dst64, align 64 118 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 119 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 120 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 121 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 122 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8 123 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 124 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8 125 ret void 126} 127 128define void @uitofp_2i32_2f64() #0 { 129; SSE-LABEL: @uitofp_2i32_2f64( 130; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 131; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 132; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 133; SSE-NEXT: ret void 134; 135; AVX1-LABEL: @uitofp_2i32_2f64( 136; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 64 137; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 138; AVX1-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double 139; AVX1-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double 140; AVX1-NEXT: store double [[CVT0]], ptr @dst64, align 64 141; AVX1-NEXT: store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 142; AVX1-NEXT: ret void 143; 144; AVX2-LABEL: @uitofp_2i32_2f64( 145; AVX2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 146; AVX2-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 147; AVX2-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 148; AVX2-NEXT: ret void 149; 150; AVX512-LABEL: @uitofp_2i32_2f64( 151; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 152; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 153; AVX512-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 154; AVX512-NEXT: ret void 155; 156; AVX256DQ-LABEL: @uitofp_2i32_2f64( 157; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 158; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 159; AVX256DQ-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 160; AVX256DQ-NEXT: ret void 161; 162 %ld0 = load i32, ptr @src32, align 64 163 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 164 %cvt0 = uitofp i32 %ld0 to double 165 %cvt1 = uitofp i32 %ld1 to double 166 store double %cvt0, ptr @dst64, align 64 167 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 168 ret void 169} 170 171define void @uitofp_4i32_4f64() #0 { 172; SSE-LABEL: @uitofp_4i32_4f64( 173; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 174; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 175; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 176; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 177; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double> 178; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 179; SSE-NEXT: ret void 180; 181; AVX-LABEL: @uitofp_4i32_4f64( 182; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64 183; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> 184; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 185; AVX-NEXT: ret void 186; 187 %ld0 = load i32, ptr @src32, align 64 188 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 189 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 190 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4 191 %cvt0 = uitofp i32 %ld0 to double 192 %cvt1 = uitofp i32 %ld1 to double 193 %cvt2 = uitofp i32 %ld2 to double 194 %cvt3 = uitofp i32 %ld3 to double 195 store double %cvt0, ptr @dst64, align 64 196 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 197 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 198 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 199 ret void 200} 201 202define void @uitofp_8i32_8f64() #0 { 203; SSE-LABEL: @uitofp_8i32_8f64( 204; SSE-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64 205; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 206; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 207; SSE-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 208; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double> 209; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 210; SSE-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 211; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i32> [[TMP5]] to <2 x double> 212; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 213; SSE-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8 214; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i32> [[TMP7]] to <2 x double> 215; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 216; SSE-NEXT: ret void 217; 218; AVX256-LABEL: @uitofp_8i32_8f64( 219; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64 220; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> 221; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 222; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 223; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x double> 224; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 225; AVX256-NEXT: ret void 226; 227; AVX512-LABEL: @uitofp_8i32_8f64( 228; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64 229; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double> 230; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64 231; AVX512-NEXT: ret void 232; 233 %ld0 = load i32, ptr @src32, align 64 234 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 235 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 236 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4 237 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 238 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4 239 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8 240 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4 241 %cvt0 = uitofp i32 %ld0 to double 242 %cvt1 = uitofp i32 %ld1 to double 243 %cvt2 = uitofp i32 %ld2 to double 244 %cvt3 = uitofp i32 %ld3 to double 245 %cvt4 = uitofp i32 %ld4 to double 246 %cvt5 = uitofp i32 %ld5 to double 247 %cvt6 = uitofp i32 %ld6 to double 248 %cvt7 = uitofp i32 %ld7 to double 249 store double %cvt0, ptr @dst64, align 64 250 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 251 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 252 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 253 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 254 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8 255 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 256 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8 257 ret void 258} 259 260define void @uitofp_2i16_2f64() #0 { 261; CHECK-LABEL: @uitofp_2i16_2f64( 262; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64 263; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double> 264; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 265; CHECK-NEXT: ret void 266; 267 %ld0 = load i16, ptr @src16, align 64 268 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2 269 %cvt0 = uitofp i16 %ld0 to double 270 %cvt1 = uitofp i16 %ld1 to double 271 store double %cvt0, ptr @dst64, align 64 272 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 273 ret void 274} 275 276define void @uitofp_4i16_4f64() #0 { 277; SSE-LABEL: @uitofp_4i16_4f64( 278; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64 279; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double> 280; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 281; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 282; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> 283; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 284; SSE-NEXT: ret void 285; 286; AVX-LABEL: @uitofp_4i16_4f64( 287; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64 288; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double> 289; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 290; AVX-NEXT: ret void 291; 292 %ld0 = load i16, ptr @src16, align 64 293 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2 294 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 295 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2 296 %cvt0 = uitofp i16 %ld0 to double 297 %cvt1 = uitofp i16 %ld1 to double 298 %cvt2 = uitofp i16 %ld2 to double 299 %cvt3 = uitofp i16 %ld3 to double 300 store double %cvt0, ptr @dst64, align 64 301 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 302 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 303 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 304 ret void 305} 306 307define void @uitofp_8i16_8f64() #0 { 308; SSE-LABEL: @uitofp_8i16_8f64( 309; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64 310; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double> 311; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 312; SSE-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 313; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> 314; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 315; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 316; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP5]] to <2 x double> 317; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 318; SSE-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4 319; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i16> [[TMP7]] to <2 x double> 320; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 321; SSE-NEXT: ret void 322; 323; AVX256-LABEL: @uitofp_8i16_8f64( 324; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64 325; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double> 326; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 327; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 328; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x double> 329; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 330; AVX256-NEXT: ret void 331; 332; AVX512-LABEL: @uitofp_8i16_8f64( 333; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64 334; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double> 335; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64 336; AVX512-NEXT: ret void 337; 338 %ld0 = load i16, ptr @src16, align 64 339 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2 340 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 341 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2 342 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 343 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2 344 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4 345 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2 346 %cvt0 = uitofp i16 %ld0 to double 347 %cvt1 = uitofp i16 %ld1 to double 348 %cvt2 = uitofp i16 %ld2 to double 349 %cvt3 = uitofp i16 %ld3 to double 350 %cvt4 = uitofp i16 %ld4 to double 351 %cvt5 = uitofp i16 %ld5 to double 352 %cvt6 = uitofp i16 %ld6 to double 353 %cvt7 = uitofp i16 %ld7 to double 354 store double %cvt0, ptr @dst64, align 64 355 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 356 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 357 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 358 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 359 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8 360 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 361 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8 362 ret void 363} 364 365define void @uitofp_2i8_2f64() #0 { 366; CHECK-LABEL: @uitofp_2i8_2f64( 367; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64 368; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> 369; CHECK-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 370; CHECK-NEXT: ret void 371; 372 %ld0 = load i8, ptr @src8, align 64 373 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1 374 %cvt0 = uitofp i8 %ld0 to double 375 %cvt1 = uitofp i8 %ld1 to double 376 store double %cvt0, ptr @dst64, align 64 377 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 378 ret void 379} 380 381define void @uitofp_4i8_4f64() #0 { 382; SSE-LABEL: @uitofp_4i8_4f64( 383; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64 384; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> 385; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 386; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 387; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double> 388; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 389; SSE-NEXT: ret void 390; 391; AVX-LABEL: @uitofp_4i8_4f64( 392; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64 393; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double> 394; AVX-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 395; AVX-NEXT: ret void 396; 397 %ld0 = load i8, ptr @src8, align 64 398 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1 399 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 400 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1 401 %cvt0 = uitofp i8 %ld0 to double 402 %cvt1 = uitofp i8 %ld1 to double 403 %cvt2 = uitofp i8 %ld2 to double 404 %cvt3 = uitofp i8 %ld3 to double 405 store double %cvt0, ptr @dst64, align 64 406 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 407 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 408 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 409 ret void 410} 411 412define void @uitofp_8i8_8f64() #0 { 413; SSE-LABEL: @uitofp_8i8_8f64( 414; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64 415; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> 416; SSE-NEXT: store <2 x double> [[TMP2]], ptr @dst64, align 64 417; SSE-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 418; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double> 419; SSE-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 420; SSE-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 421; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP5]] to <2 x double> 422; SSE-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 423; SSE-NEXT: [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2 424; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i8> [[TMP7]] to <2 x double> 425; SSE-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 426; SSE-NEXT: ret void 427; 428; AVX256-LABEL: @uitofp_8i8_8f64( 429; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64 430; AVX256-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double> 431; AVX256-NEXT: store <4 x double> [[TMP2]], ptr @dst64, align 64 432; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 433; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x double> 434; AVX256-NEXT: store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 435; AVX256-NEXT: ret void 436; 437; AVX512-LABEL: @uitofp_8i8_8f64( 438; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64 439; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double> 440; AVX512-NEXT: store <8 x double> [[TMP2]], ptr @dst64, align 64 441; AVX512-NEXT: ret void 442; 443 %ld0 = load i8, ptr @src8, align 64 444 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1 445 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 446 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1 447 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 448 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1 449 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2 450 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1 451 %cvt0 = uitofp i8 %ld0 to double 452 %cvt1 = uitofp i8 %ld1 to double 453 %cvt2 = uitofp i8 %ld2 to double 454 %cvt3 = uitofp i8 %ld3 to double 455 %cvt4 = uitofp i8 %ld4 to double 456 %cvt5 = uitofp i8 %ld5 to double 457 %cvt6 = uitofp i8 %ld6 to double 458 %cvt7 = uitofp i8 %ld7 to double 459 store double %cvt0, ptr @dst64, align 64 460 store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8 461 store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16 462 store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8 463 store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32 464 store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8 465 store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16 466 store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8 467 ret void 468} 469 470; 471; UITOFP to vXf32 472; 473 474define void @uitofp_2i64_2f32() #0 { 475; SSE-LABEL: @uitofp_2i64_2f32( 476; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64 477; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 478; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 479; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 480; SSE-NEXT: store float [[CVT0]], ptr @dst32, align 64 481; SSE-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 482; SSE-NEXT: ret void 483; 484; AVX1-LABEL: @uitofp_2i64_2f32( 485; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64 486; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 487; AVX1-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 488; AVX1-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 489; AVX1-NEXT: store float [[CVT0]], ptr @dst32, align 64 490; AVX1-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 491; AVX1-NEXT: ret void 492; 493; AVX2-LABEL: @uitofp_2i64_2f32( 494; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 64 495; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 496; AVX2-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 497; AVX2-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 498; AVX2-NEXT: store float [[CVT0]], ptr @dst32, align 64 499; AVX2-NEXT: store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 500; AVX2-NEXT: ret void 501; 502; AVX512-LABEL: @uitofp_2i64_2f32( 503; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64 504; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float> 505; AVX512-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64 506; AVX512-NEXT: ret void 507; 508; AVX256DQ-LABEL: @uitofp_2i64_2f32( 509; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64 510; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float> 511; AVX256DQ-NEXT: store <2 x float> [[TMP2]], ptr @dst32, align 64 512; AVX256DQ-NEXT: ret void 513; 514 %ld0 = load i64, ptr @src64, align 64 515 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 516 %cvt0 = uitofp i64 %ld0 to float 517 %cvt1 = uitofp i64 %ld1 to float 518 store float %cvt0, ptr @dst32, align 64 519 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 520 ret void 521} 522 523define void @uitofp_4i64_4f32() #0 { 524; CHECK-LABEL: @uitofp_4i64_4f32( 525; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64 526; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> 527; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 528; CHECK-NEXT: ret void 529; 530 %ld0 = load i64, ptr @src64, align 64 531 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 532 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 533 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8 534 %cvt0 = uitofp i64 %ld0 to float 535 %cvt1 = uitofp i64 %ld1 to float 536 %cvt2 = uitofp i64 %ld2 to float 537 %cvt3 = uitofp i64 %ld3 to float 538 store float %cvt0, ptr @dst32, align 64 539 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 540 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 541 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 542 ret void 543} 544 545define void @uitofp_8i64_8f32() #0 { 546; SSE-LABEL: @uitofp_8i64_8f32( 547; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64 548; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> 549; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 550; SSE-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32 551; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x float> 552; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 553; SSE-NEXT: ret void 554; 555; AVX-LABEL: @uitofp_8i64_8f32( 556; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64 557; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> 558; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 559; AVX-NEXT: ret void 560; 561 %ld0 = load i64, ptr @src64, align 64 562 %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8 563 %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16 564 %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8 565 %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32 566 %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8 567 %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16 568 %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8 569 %cvt0 = uitofp i64 %ld0 to float 570 %cvt1 = uitofp i64 %ld1 to float 571 %cvt2 = uitofp i64 %ld2 to float 572 %cvt3 = uitofp i64 %ld3 to float 573 %cvt4 = uitofp i64 %ld4 to float 574 %cvt5 = uitofp i64 %ld5 to float 575 %cvt6 = uitofp i64 %ld6 to float 576 %cvt7 = uitofp i64 %ld7 to float 577 store float %cvt0, ptr @dst32, align 64 578 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 579 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 580 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 581 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 582 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4 583 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8 584 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4 585 ret void 586} 587 588define void @uitofp_4i32_4f32() #0 { 589; CHECK-LABEL: @uitofp_4i32_4f32( 590; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64 591; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 592; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 593; CHECK-NEXT: ret void 594; 595 %ld0 = load i32, ptr @src32, align 64 596 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 597 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 598 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4 599 %cvt0 = uitofp i32 %ld0 to float 600 %cvt1 = uitofp i32 %ld1 to float 601 %cvt2 = uitofp i32 %ld2 to float 602 %cvt3 = uitofp i32 %ld3 to float 603 store float %cvt0, ptr @dst32, align 64 604 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 605 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 606 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 607 ret void 608} 609 610define void @uitofp_8i32_8f32() #0 { 611; SSE-LABEL: @uitofp_8i32_8f32( 612; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64 613; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 614; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 615; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 616; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> 617; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 618; SSE-NEXT: ret void 619; 620; AVX-LABEL: @uitofp_8i32_8f32( 621; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64 622; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float> 623; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 624; AVX-NEXT: ret void 625; 626 %ld0 = load i32, ptr @src32, align 64 627 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4 628 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8 629 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4 630 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 631 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4 632 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8 633 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4 634 %cvt0 = uitofp i32 %ld0 to float 635 %cvt1 = uitofp i32 %ld1 to float 636 %cvt2 = uitofp i32 %ld2 to float 637 %cvt3 = uitofp i32 %ld3 to float 638 %cvt4 = uitofp i32 %ld4 to float 639 %cvt5 = uitofp i32 %ld5 to float 640 %cvt6 = uitofp i32 %ld6 to float 641 %cvt7 = uitofp i32 %ld7 to float 642 store float %cvt0, ptr @dst32, align 64 643 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 644 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 645 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 646 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 647 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4 648 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8 649 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4 650 ret void 651} 652 653define void @uitofp_16i32_16f32() #0 { 654; SSE-LABEL: @uitofp_16i32_16f32( 655; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64 656; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 657; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 658; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16 659; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> 660; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 661; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32 662; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i32> [[TMP5]] to <4 x float> 663; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 664; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16 665; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i32> [[TMP7]] to <4 x float> 666; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 667; SSE-NEXT: ret void 668; 669; AVX256-LABEL: @uitofp_16i32_16f32( 670; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64 671; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float> 672; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 673; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32 674; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i32> [[TMP3]] to <8 x float> 675; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 676; AVX256-NEXT: ret void 677; 678; AVX512-LABEL: @uitofp_16i32_16f32( 679; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64 680; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float> 681; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64 682; AVX512-NEXT: ret void 683; 684 %ld0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64 685 %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4 686 %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8 687 %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4 688 %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16 689 %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4 690 %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8 691 %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4 692 %ld8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32 693 %ld9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4 694 %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8 695 %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4 696 %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16 697 %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4 698 %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8 699 %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4 700 %cvt0 = uitofp i32 %ld0 to float 701 %cvt1 = uitofp i32 %ld1 to float 702 %cvt2 = uitofp i32 %ld2 to float 703 %cvt3 = uitofp i32 %ld3 to float 704 %cvt4 = uitofp i32 %ld4 to float 705 %cvt5 = uitofp i32 %ld5 to float 706 %cvt6 = uitofp i32 %ld6 to float 707 %cvt7 = uitofp i32 %ld7 to float 708 %cvt8 = uitofp i32 %ld8 to float 709 %cvt9 = uitofp i32 %ld9 to float 710 %cvt10 = uitofp i32 %ld10 to float 711 %cvt11 = uitofp i32 %ld11 to float 712 %cvt12 = uitofp i32 %ld12 to float 713 %cvt13 = uitofp i32 %ld13 to float 714 %cvt14 = uitofp i32 %ld14 to float 715 %cvt15 = uitofp i32 %ld15 to float 716 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64 717 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4 718 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8 719 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4 720 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16 721 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4 722 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8 723 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4 724 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32 725 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4 726 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8 727 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4 728 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 729 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4 730 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8 731 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4 732 ret void 733} 734 735define void @uitofp_4i16_4f32() #0 { 736; CHECK-LABEL: @uitofp_4i16_4f32( 737; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64 738; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> 739; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 740; CHECK-NEXT: ret void 741; 742 %ld0 = load i16, ptr @src16, align 64 743 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2 744 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 745 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2 746 %cvt0 = uitofp i16 %ld0 to float 747 %cvt1 = uitofp i16 %ld1 to float 748 %cvt2 = uitofp i16 %ld2 to float 749 %cvt3 = uitofp i16 %ld3 to float 750 store float %cvt0, ptr @dst32, align 64 751 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 752 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 753 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 754 ret void 755} 756 757define void @uitofp_8i16_8f32() #0 { 758; SSE-LABEL: @uitofp_8i16_8f32( 759; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64 760; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> 761; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 762; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 763; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float> 764; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 765; SSE-NEXT: ret void 766; 767; AVX-LABEL: @uitofp_8i16_8f32( 768; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64 769; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float> 770; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 771; AVX-NEXT: ret void 772; 773 %ld0 = load i16, ptr @src16, align 64 774 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2 775 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4 776 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2 777 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 778 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2 779 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4 780 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2 781 %cvt0 = uitofp i16 %ld0 to float 782 %cvt1 = uitofp i16 %ld1 to float 783 %cvt2 = uitofp i16 %ld2 to float 784 %cvt3 = uitofp i16 %ld3 to float 785 %cvt4 = uitofp i16 %ld4 to float 786 %cvt5 = uitofp i16 %ld5 to float 787 %cvt6 = uitofp i16 %ld6 to float 788 %cvt7 = uitofp i16 %ld7 to float 789 store float %cvt0, ptr @dst32, align 64 790 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 791 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 792 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 793 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 794 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4 795 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8 796 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4 797 ret void 798} 799 800define void @uitofp_16i16_16f32() #0 { 801; SSE-LABEL: @uitofp_16i16_16f32( 802; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64 803; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> 804; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 805; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8 806; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float> 807; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 808; SSE-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16 809; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i16> [[TMP5]] to <4 x float> 810; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 811; SSE-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8 812; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i16> [[TMP7]] to <4 x float> 813; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 814; SSE-NEXT: ret void 815; 816; AVX256-LABEL: @uitofp_16i16_16f32( 817; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64 818; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float> 819; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 820; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16 821; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i16> [[TMP3]] to <8 x float> 822; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 823; AVX256-NEXT: ret void 824; 825; AVX512-LABEL: @uitofp_16i16_16f32( 826; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64 827; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float> 828; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64 829; AVX512-NEXT: ret void 830; 831 %ld0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64 832 %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2 833 %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4 834 %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2 835 %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8 836 %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2 837 %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4 838 %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2 839 %ld8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16 840 %ld9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2 841 %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4 842 %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2 843 %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8 844 %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2 845 %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4 846 %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2 847 %cvt0 = uitofp i16 %ld0 to float 848 %cvt1 = uitofp i16 %ld1 to float 849 %cvt2 = uitofp i16 %ld2 to float 850 %cvt3 = uitofp i16 %ld3 to float 851 %cvt4 = uitofp i16 %ld4 to float 852 %cvt5 = uitofp i16 %ld5 to float 853 %cvt6 = uitofp i16 %ld6 to float 854 %cvt7 = uitofp i16 %ld7 to float 855 %cvt8 = uitofp i16 %ld8 to float 856 %cvt9 = uitofp i16 %ld9 to float 857 %cvt10 = uitofp i16 %ld10 to float 858 %cvt11 = uitofp i16 %ld11 to float 859 %cvt12 = uitofp i16 %ld12 to float 860 %cvt13 = uitofp i16 %ld13 to float 861 %cvt14 = uitofp i16 %ld14 to float 862 %cvt15 = uitofp i16 %ld15 to float 863 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64 864 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4 865 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8 866 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4 867 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16 868 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4 869 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8 870 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4 871 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32 872 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4 873 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8 874 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4 875 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 876 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4 877 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8 878 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4 879 ret void 880} 881 882define void @uitofp_4i8_4f32() #0 { 883; CHECK-LABEL: @uitofp_4i8_4f32( 884; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64 885; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 886; CHECK-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 887; CHECK-NEXT: ret void 888; 889 %ld0 = load i8, ptr @src8, align 64 890 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1 891 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 892 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1 893 %cvt0 = uitofp i8 %ld0 to float 894 %cvt1 = uitofp i8 %ld1 to float 895 %cvt2 = uitofp i8 %ld2 to float 896 %cvt3 = uitofp i8 %ld3 to float 897 store float %cvt0, ptr @dst32, align 64 898 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 899 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 900 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 901 ret void 902} 903 904define void @uitofp_8i8_8f32() #0 { 905; SSE-LABEL: @uitofp_8i8_8f32( 906; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64 907; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 908; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 909; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 910; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float> 911; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 912; SSE-NEXT: ret void 913; 914; AVX-LABEL: @uitofp_8i8_8f32( 915; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64 916; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float> 917; AVX-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 918; AVX-NEXT: ret void 919; 920 %ld0 = load i8, ptr @src8, align 64 921 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1 922 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2 923 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1 924 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 925 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1 926 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2 927 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1 928 %cvt0 = uitofp i8 %ld0 to float 929 %cvt1 = uitofp i8 %ld1 to float 930 %cvt2 = uitofp i8 %ld2 to float 931 %cvt3 = uitofp i8 %ld3 to float 932 %cvt4 = uitofp i8 %ld4 to float 933 %cvt5 = uitofp i8 %ld5 to float 934 %cvt6 = uitofp i8 %ld6 to float 935 %cvt7 = uitofp i8 %ld7 to float 936 store float %cvt0, ptr @dst32, align 64 937 store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4 938 store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8 939 store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4 940 store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 941 store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4 942 store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8 943 store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4 944 ret void 945} 946 947define void @uitofp_16i8_16f32() #0 { 948; SSE-LABEL: @uitofp_16i8_16f32( 949; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64 950; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 951; SSE-NEXT: store <4 x float> [[TMP2]], ptr @dst32, align 64 952; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4 953; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float> 954; SSE-NEXT: store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16 955; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8 956; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i8> [[TMP5]] to <4 x float> 957; SSE-NEXT: store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 958; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4 959; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i8> [[TMP7]] to <4 x float> 960; SSE-NEXT: store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 961; SSE-NEXT: ret void 962; 963; AVX256-LABEL: @uitofp_16i8_16f32( 964; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64 965; AVX256-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float> 966; AVX256-NEXT: store <8 x float> [[TMP2]], ptr @dst32, align 64 967; AVX256-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8 968; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i8> [[TMP3]] to <8 x float> 969; AVX256-NEXT: store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32 970; AVX256-NEXT: ret void 971; 972; AVX512-LABEL: @uitofp_16i8_16f32( 973; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64 974; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float> 975; AVX512-NEXT: store <16 x float> [[TMP2]], ptr @dst32, align 64 976; AVX512-NEXT: ret void 977; 978 %ld0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64 979 %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1 980 %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2 981 %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1 982 %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4 983 %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1 984 %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2 985 %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1 986 %ld8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8 987 %ld9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1 988 %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2 989 %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1 990 %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4 991 %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1 992 %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2 993 %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1 994 %cvt0 = uitofp i8 %ld0 to float 995 %cvt1 = uitofp i8 %ld1 to float 996 %cvt2 = uitofp i8 %ld2 to float 997 %cvt3 = uitofp i8 %ld3 to float 998 %cvt4 = uitofp i8 %ld4 to float 999 %cvt5 = uitofp i8 %ld5 to float 1000 %cvt6 = uitofp i8 %ld6 to float 1001 %cvt7 = uitofp i8 %ld7 to float 1002 %cvt8 = uitofp i8 %ld8 to float 1003 %cvt9 = uitofp i8 %ld9 to float 1004 %cvt10 = uitofp i8 %ld10 to float 1005 %cvt11 = uitofp i8 %ld11 to float 1006 %cvt12 = uitofp i8 %ld12 to float 1007 %cvt13 = uitofp i8 %ld13 to float 1008 %cvt14 = uitofp i8 %ld14 to float 1009 %cvt15 = uitofp i8 %ld15 to float 1010 store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64 1011 store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4 1012 store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8 1013 store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4 1014 store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16 1015 store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4 1016 store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8 1017 store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4 1018 store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32 1019 store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4 1020 store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8 1021 store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4 1022 store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16 1023 store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4 1024 store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8 1025 store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4 1026 ret void 1027} 1028 1029attributes #0 = { nounwind } 1030