MemorySanitizer/X86/mmx-intrinsics.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s

target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test1(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test88(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test87(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test86(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test85(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test84(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test83(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test82(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test81(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test80(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test79(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test78(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test77(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test76(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP22]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test75(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <2 x i32>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
; CHECK-NEXT:    [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP22]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test74(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP22]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone

define i64 @test73(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test73(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <2 x i32>
  %3 = bitcast <2 x i32> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone

define i64 @test72(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test72(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <4 x i16>
  %3 = bitcast <4 x i16> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

define i64 @test72_2(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test72_2(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 0)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind
  %2 = bitcast <1 x i64> %1 to <4 x i16>
  %3 = bitcast <4 x i16> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone

define i64 @test71(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test71(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[TMP2]], i32 3)
; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP4]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var.i = bitcast i64 %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to i64
  ret i64 %2
}

declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone

define i64 @test70(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test70(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <2 x i32>
  %3 = bitcast <2 x i32> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

define i64 @test70_2(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test70_2(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 0)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind
  %2 = bitcast <1 x i64> %1 to <2 x i32>
  %3 = bitcast <2 x i32> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone

define i64 @test69(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test69(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <4 x i16>
  %3 = bitcast <4 x i16> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone

define i64 @test68(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test68(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[TMP2]], i32 3)
; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP4]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var.i = bitcast i64 %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to i64
  ret i64 %2
}

declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone

define i64 @test67(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test67(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <2 x i32>
  %3 = bitcast <2 x i32> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone

define i64 @test66(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test66(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 3)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
  %2 = bitcast <1 x i64> %1 to <4 x i16>
  %3 = bitcast <4 x i16> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

define i64 @test66_2(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test66_2(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 0)
; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind
  %2 = bitcast <1 x i64> %1 to <4 x i16>
  %3 = bitcast <4 x i16> %2 to <1 x i64>
  %4 = extractelement <1 x i64> %3, i32 0
  ret i64 %4
}

declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test65(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test64(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test63(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64
; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0
; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64
; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP5]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var.i = bitcast i64 %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test62(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test61(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test60(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64
; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0
; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64
; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP5]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var.i = bitcast i64 %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test59(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test58(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP17]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test56(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test55(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test54(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test53(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test52(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test51(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test50(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test49(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP18]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test48(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test47(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test46(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test45(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test44(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]]
; CHECK-NEXT:    [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]])
; CHECK-NEXT:    [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var = bitcast i64 %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1 = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone

declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test43(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test42(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test41(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test40(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test39(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test38(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test37(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test36(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]]
; CHECK-NEXT:    [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]])
; CHECK-NEXT:    [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var = bitcast i64 %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1 = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test35(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test34(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test33(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test32(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP10]], 48
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64 [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP17]] to i64
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP14]] to i64
; CHECK-NEXT:    store i64 [[TMP15]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test31(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test30(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test29(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test28(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test27(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test26(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind

define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 {
; CHECK-LABEL: define void @test25(
; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    store <1 x i64> [[TMP3]], ptr [[TMP6]], align 1
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK:       7:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
; CHECK-NEXT:    unreachable
; CHECK:       8:
; CHECK-NEXT:    tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], <1 x i64> [[MMX_VAR_I]]) #[[ATTR2]]
; CHECK-NEXT:    ret void
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var.i = bitcast i64 %0 to <1 x i64>
  tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind
  ret void
}

declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone

define i32 @test24(<1 x i64> %a) #0 {
; CHECK-LABEL: define i32 @test24(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> [[MMX_VAR_I]]) #[[ATTR2]]
; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %0 = bitcast <1 x i64> %a to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64>
  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind
  ret i32 %1
}

declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind

define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 {
; CHECK-LABEL: define void @test23(
; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP9]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
; CHECK:       11:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       12:
; CHECK-NEXT:    tail call void @llvm.x86.mmx.maskmovq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]]
; CHECK-NEXT:    ret void
;
entry:
  %0 = bitcast <1 x i64> %n to <8 x i8>
  %1 = bitcast <1 x i64> %d to <8 x i8>
  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
  tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind
  ret void
}

declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test22(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP13]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone

define i64 @test21(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test21(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
; CHECK:       6:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       7:
; CHECK-NEXT:    [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP5]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %1 = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

define i32 @test21_2(<1 x i64> %a) #0 {
; CHECK-LABEL: define i32 @test21_2(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
; CHECK:       6:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       7:
; CHECK-NEXT:    [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP5]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %1 = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <2 x i32>
  %5 = extractelement <2 x i32> %4, i32 0
  ret i32 %5
}

declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test20(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
; CHECK-NEXT:    [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
; CHECK-NEXT:    [[_MSPROP:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to i64
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone

define <2 x double> @test19(<1 x i64> %a) #0 {
; CHECK-LABEL: define <2 x double> @test19(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP7]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
; CHECK:       6:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       7:
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> [[TMP8]]) #[[ATTR5]]
; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x double> [[TMP2]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %1 = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone
  ret <2 x double> %2
}

declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone

define i64 @test18(<2 x double> %a) #0 {
; CHECK-LABEL: define i64 @test18(
; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
  %1 = bitcast <1 x i64> %0 to <2 x i32>
  %2 = bitcast <2 x i32> %1 to <1 x i64>
  %3 = extractelement <1 x i64> %2, i32 0
  ret i64 %3
}

declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone

define i64 @test17(<2 x double> %a) #0 {
; CHECK-LABEL: define i64 @test17(
; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
  %1 = bitcast <1 x i64> %0 to <2 x i32>
  %2 = bitcast <2 x i32> %1 to <1 x i64>
  %3 = extractelement <1 x i64> %2, i32 0
  ret i64 %3
}

declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone

define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test16(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP11]], 0
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP12]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]], i8 16)
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
entry:
  %0 = extractelement <1 x i64> %a, i32 0
  %mmx_var = bitcast i64 %0 to <1 x i64>
  %1 = extractelement <1 x i64> %b, i32 0
  %mmx_var1 = bitcast i64 %1 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16)
  %3 = bitcast <1 x i64> %2 to i64
  ret i64 %3
}

declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone

define i64 @test15(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test15(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> [[TMP1]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP10]]
;
entry:
  %0 = bitcast <1 x i64> %a to <2 x i32>
  %1 = bitcast <2 x i32> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone
  %3 = bitcast <1 x i64> %2 to <2 x i32>
  %4 = bitcast <2 x i32> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone

define i64 @test14(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test14(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> [[TMP1]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP10]]
;
entry:
  %0 = bitcast <1 x i64> %a to <4 x i16>
  %1 = bitcast <4 x i16> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone
  %3 = bitcast <1 x i64> %2 to <4 x i16>
  %4 = bitcast <4 x i16> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone

define i64 @test13(<1 x i64> %a) #0 {
; CHECK-LABEL: define i64 @test13(
; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> [[TMP1]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <8 x i8>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8>
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP10]]
;
entry:
  %0 = bitcast <1 x i64> %a to <8 x i8>
  %1 = bitcast <8 x i8> %0 to <1 x i64>
  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone
  %3 = bitcast <1 x i64> %2 to <8 x i8>
  %4 = bitcast <8 x i8> %3 to <1 x i64>
  %5 = extractelement <1 x i64> %4, i32 0
  ret i64 %5
}

declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test12(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %2 = bitcast <2 x i32> %1 to <1 x i64>
  %3 = bitcast <2 x i32> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <2 x i32>
  %6 = bitcast <2 x i32> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test11(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test10(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %2 = bitcast <8 x i8> %1 to <1 x i64>
  %3 = bitcast <8 x i8> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <8 x i8>
  %6 = bitcast <8 x i8> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test9(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %2 = bitcast <8 x i8> %1 to <1 x i64>
  %3 = bitcast <8 x i8> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <8 x i8>
  %6 = bitcast <8 x i8> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test8(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test7(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP20]]
;
entry:
  %0 = bitcast <1 x i64> %b to <8 x i8>
  %1 = bitcast <1 x i64> %a to <8 x i8>
  %2 = bitcast <8 x i8> %1 to <1 x i64>
  %3 = bitcast <8 x i8> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <8 x i8>
  %6 = bitcast <8 x i8> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test6(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test5(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %2 = bitcast <2 x i32> %1 to <1 x i64>
  %3 = bitcast <2 x i32> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <2 x i32>
  %6 = bitcast <2 x i32> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test4(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test3(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <4 x i16>
  %1 = bitcast <1 x i64> %a to <4 x i16>
  %2 = bitcast <4 x i16> %1 to <1 x i64>
  %3 = bitcast <4 x i16> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <4 x i16>
  %6 = bitcast <4 x i16> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone

define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-LABEL: define i64 @test2(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP15]]
;
entry:
  %0 = bitcast <1 x i64> %b to <2 x i32>
  %1 = bitcast <1 x i64> %a to <2 x i32>
  %2 = bitcast <2 x i32> %1 to <1 x i64>
  %3 = bitcast <2 x i32> %0 to <1 x i64>
  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
  %5 = bitcast <1 x i64> %4 to <2 x i32>
  %6 = bitcast <2 x i32> %5 to <1 x i64>
  %7 = extractelement <1 x i64> %6, i32 0
  ret i64 %7
}

define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind #0 {
; ALL-LABEL: test89:
; ALL:       # %bb.0:
; ALL-NEXT:    cvtpi2ps %mm0, %xmm0
; ALL-NEXT:    ret{{[l|q]}}
; CHECK-LABEL: define <4 x float> @test89(
; CHECK-SAME: <4 x float> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], <1 x i64> [[B]])
; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x float> [[C]]
;
  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b)
  ret <4 x float> %c
}

declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone

define void @test90() #0 {
; ALL-LABEL: test90:
; ALL:       # %bb.0:
; ALL-NEXT:    emms
; ALL-NEXT:    ret{{[l|q]}}
; CHECK-LABEL: define void @test90(
; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    call void @llvm.x86.mmx.emms()
; CHECK-NEXT:    ret void
;
  call void @llvm.x86.mmx.emms()
  ret void
}

declare void @llvm.x86.mmx.emms()

define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 {
; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16(
; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> [[A_COERCE]], i32 [[D]], i32 2)
; CHECK-NEXT:    store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP9]]
;
entry:
  %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2)
  ret <1 x i64> %1
}

declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg)

define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 {
; CHECK-LABEL: define i32 @test_mm_extract_pi16(
; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> [[A_COERCE]], i32 2)
; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP1]]
;
entry:
  %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2)
  ret i32 %1
}

declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg)

attributes #0 = { sanitize_memory }
;.
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
;.