1; RUN: llc -mtriple armv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=LE 2; RUN: llc -mtriple armebv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=BE 3 4; The key is the last vrev64 should be vrev64.16 instead of vrev64.32 5 6define void @sdiv_shl(ptr %x, ptr %y) nounwind { 7; LE-LABEL: sdiv_shl: 8; LE: @ %bb.0: @ %entry 9; LE-NEXT: adr r2, .LCPI0_0 10; LE-NEXT: vld1.64 {d18, d19}, [r1] 11; LE-NEXT: adr r1, .LCPI0_1 12; LE-NEXT: vld1.64 {d16, d17}, [r2:128] 13; LE-NEXT: vshr.s16 q10, q9, #15 14; LE-NEXT: vneg.s16 q8, q8 15; LE-NEXT: vld1.64 {d22, d23}, [r1:128] 16; LE-NEXT: adr r1, .LCPI0_2 17; LE-NEXT: vshl.u16 q8, q10, q8 18; LE-NEXT: vneg.s16 q10, q11 19; LE-NEXT: vadd.i16 q8, q9, q8 20; LE-NEXT: vshl.s16 q8, q8, q10 21; LE-NEXT: vld1.64 {d20, d21}, [r1:128] 22; LE-NEXT: vbit q8, q9, q10 23; LE-NEXT: vst1.64 {d16, d17}, [r0] 24; LE: .LCPI0_0: 25; LE-NEXT: .short 16 @ 0x10 26; LE-NEXT: .short 14 @ 0xe 27; LE-NEXT: .short 15 @ 0xf 28; LE-NEXT: .short 13 @ 0xd 29; LE-NEXT: .short 12 @ 0xc 30; LE-NEXT: .short 10 @ 0xa 31; LE-NEXT: .short 11 @ 0xb 32; LE-NEXT: .short 9 @ 0x9 33; LE-NEXT: .LCPI0_1: 34; LE-NEXT: .short 0 @ 0x0 35; LE-NEXT: .short 2 @ 0x2 36; LE-NEXT: .short 1 @ 0x1 37; LE-NEXT: .short 3 @ 0x3 38; LE-NEXT: .short 4 @ 0x4 39; LE-NEXT: .short 6 @ 0x6 40; LE-NEXT: .short 5 @ 0x5 41; LE-NEXT: .short 7 @ 0x7 42; LE-NEXT: .LCPI0_2: 43; LE-NEXT: .short 65535 @ 0xffff 44; LE-NEXT: .short 0 @ 0x0 45; LE-NEXT: .short 0 @ 0x0 46; LE-NEXT: .short 0 @ 0x0 47; LE-NEXT: .short 0 @ 0x0 48; LE-NEXT: .short 0 @ 0x0 49; LE-NEXT: .short 0 @ 0x0 50; LE-NEXT: .short 0 @ 0x0 51; 52; BE-LABEL: sdiv_shl: 53; BE: @ %bb.0: @ %entry 54; BE-NEXT: adr r2, .LCPI0_0 55; BE-NEXT: vld1.64 {d18, d19}, [r1] 56; BE-NEXT: adr r1, .LCPI0_1 57; BE-NEXT: vld1.64 {d16, d17}, [r2:128] 58; BE-NEXT: vrev64.16 q8, q8 59; BE-NEXT: vrev64.16 q9, q9 60; BE-NEXT: vneg.s16 q8, q8 61; BE-NEXT: vld1.64 {d20, d21}, [r1:128] 62; BE-NEXT: adr r1, .LCPI0_2 63; BE-NEXT: vshr.s16 q11, q9, #15 64; BE-NEXT: vrev64.16 q10, q10 65; BE-NEXT: vshl.u16 q8, q11, q8 66; BE-NEXT: vld1.64 {d22, d23}, [r1:128] 67; BE-NEXT: vneg.s16 q10, q10 68; BE-NEXT: vrev64.16 q11, q11 69; BE-NEXT: vadd.i16 q8, q9, q8 70; BE-NEXT: vshl.s16 q8, q8, q10 71; BE-NEXT: vbit q8, q9, q11 72; BE-NEXT: vrev64.16 q8, q8 73; BE-NEXT: vst1.64 {d16, d17}, [r0] 74; BE: .LCPI0_0: 75; BE-NEXT: .short 16 @ 0x10 76; BE-NEXT: .short 14 @ 0xe 77; BE-NEXT: .short 15 @ 0xf 78; BE-NEXT: .short 13 @ 0xd 79; BE-NEXT: .short 12 @ 0xc 80; BE-NEXT: .short 10 @ 0xa 81; BE-NEXT: .short 11 @ 0xb 82; BE-NEXT: .short 9 @ 0x9 83; BE-NEXT: .LCPI0_1: 84; BE-NEXT: .short 0 @ 0x0 85; BE-NEXT: .short 2 @ 0x2 86; BE-NEXT: .short 1 @ 0x1 87; BE-NEXT: .short 3 @ 0x3 88; BE-NEXT: .short 4 @ 0x4 89; BE-NEXT: .short 6 @ 0x6 90; BE-NEXT: .short 5 @ 0x5 91; BE-NEXT: .short 7 @ 0x7 92; BE-NEXT: .LCPI0_2: 93; BE-NEXT: .short 65535 @ 0xffff 94; BE-NEXT: .short 0 @ 0x0 95; BE-NEXT: .short 0 @ 0x0 96; BE-NEXT: .short 0 @ 0x0 97; BE-NEXT: .short 0 @ 0x0 98; BE-NEXT: .short 0 @ 0x0 99; BE-NEXT: .short 0 @ 0x0 100; BE-NEXT: .short 0 @ 0x0 101entry: 102 %0 = load <8 x i16>, ptr %y, align 8 103 %div = sdiv <8 x i16> %0, <i16 1, i16 4, i16 2, i16 8, i16 16, i16 64, i16 32, i16 128> 104 store <8 x i16> %div, ptr %x, align 8 105 ret void 106} 107