1# mach: bfin 2 3// The assembly program uses two instructions to speed the decoder inner loop: 4// R6= VMAX/VMAX (R5, R4) A0>>2; 5// R2 =H+L (SGN(R0)*R1); 6// VMAX is a 2-way parallel comparison of four updated path metrics, resulting 7// in 2 new path metrics as well as a 2 bit field indicating the selection 8// results. This 2 bit field is shifted into accumulator A0. This instruction 9// implements the selections of a complete butterfly for a rate 1/n system. 10// The H+L(SGN) instruction is used to compute the branch metric used by each 11// butterfly. It takes as input a pair of values representing the received 12// symbol, and another pair of values which are +1 or -1. The latter come 13// from a pre-computed table that holds all the branch metric information for 14// a specific set of polynomials. As all symbols are assumed to be binary, 15// distance metrics between a received symbol and a branch metric are computed 16// by adding and subtracting the values of the symbol according to the 17// transition of a branch. 18 19.include "testutils.inc" 20 start 21 22 // 16 in bytes for M2 23 // A few pointer initializations 24 // P2 points to decision history, where outputs are stored 25 loadsym P2, DecisionHistory 26 27 // P4 holds address of APMFrom 28 loadsym P4, APMFrom; 29 30 // P5 holds address of APMTo 31 loadsym P5, APMTo; 32 33 // I0 points to precomputed d's 34 loadsym I0, BranchStorage; 35 36 M2.L = 32; 37 38 loadsym P0, InputData; 39 40 // storage for all precomputed branch metrics 41 loadsym P1, BranchStorage; 42 43 R6 = 0; R0 = 0; // inits 44 45 R0.L = 0x0001; 46 R0.H = 0x0001; 47 [ P1 + 0 ] = R0; 48 R0.L = 0xffff; 49 R0.H = 0xffff; 50 [ P1 + 4 ] = R0; 51 R0.L = 0xffff; 52 R0.H = 0x0001; 53 [ P1 + 8 ] = R0; 54 R0.L = 0x0001; 55 R0.H = 0xffff; 56 [ P1 + 12 ] = R0; 57 R0.L = 0xffff; 58 R0.H = 0x0001; 59 [ P1 + 16 ] = R0; 60 R0.L = 0x0001; 61 R0.H = 0xffff; 62 [ P1 + 20 ] = R0; 63 R0.L = 0x0001; 64 R0.H = 0x0001; 65 [ P1 + 24 ] = R0; 66 R0.L = 0xffff; 67 R0.H = 0xffff; 68 [ P1 + 28 ] = R0; 69 R0.L = 0x0001; 70 R0.H = 0xffff; 71 [ P1 + 32 ] = R0; 72 R0.L = 0xffff; 73 R0.H = 0x0001; 74 [ P1 + 36 ] = R0; 75 R0.L = 0xffff; 76 R0.H = 0xffff; 77 [ P1 + 40 ] = R0; 78 R0.L = 0x0001; 79 R0.H = 0x0001; 80 [ P1 + 44 ] = R0; 81 R0.L = 0xffff; 82 R0.H = 0xffff; 83 [ P1 + 48 ] = R0; 84 R0.L = 0x0001; 85 R0.H = 0x0001; 86 [ P1 + 52 ] = R0; 87 R0.L = 0x0001; 88 R0.H = 0xffff; 89 [ P1 + 56 ] = R0; 90 R0.L = 0xffff; 91 R0.H = 0x0001; 92 [ P1 + 60 ] = R0; 93 94 P1 = 18; 95 LSETUP ( L$0 , L$0end ) LC0 = P1; // SymNo loop start 96 97L$0: 98 99 // Get a symbol and leave it resident in R1 100 R1 = [ P0 ]; // R1=(InputData[SymNo*2+1] InputData[SymNo*2]) 101 P0 += 4; 102 103 A0 = 0; 104 105 // I0 points to precomputed D1, D0 106 loadsym I0, BranchStorage; 107 108 I1 = P4; // I1 points to APM[From] 109 I2 = P4; 110 I2 += M2; // I2 points to APM[From+16] 111 I3 = P5; // I3 points to APM[To] 112 113 P1 = 16; 114 P1 += -1; 115 LSETUP ( L$1 , L$1end ) LC1 = P1; 116 117 // APMFrom and APMTo are in alternate 118 // memory banks. 119 120 R0 = [ I0 ++ ]; // load R0 = (D1 D0) 121 R3.L = W [ I1 ++ ]; // load RL3 = PM0 122 // (R1 holds current symbol) 123 124 R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L; // apply sum-on-sign instruction 125 R3.H = W [ I2 ++ ]; // now, R3 = (PM1 PM0) 126 127L$1: 128 R5 = R3 +|- R2 , R4 = R3 -|+ R2 || R0 = [ I0 ++ ] || NOP; 129 // R5 = (PM11 PM01) R4 = (PM10 PM00) 130 // and load next (D1 D0) 131 132 R6 = VIT_MAX( R5 , R4 ) (ASR) || R3.L = W [ I1 ++ ] || NOP; 133 // do 2 ACS in parallel 134 // R6 = (nPM1 nPM0) and update to A0 135 136L$1end: 137 138 R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L || R3.H = W [ I2 ++ ] || [ I3 ++ ] = R6; 139 // store new path metrics in 140 // two consecutive locations 141 142 R5 = R3 +|- R2 , R4 = R3 -|+ R2; 143 144 R6 = VIT_MAX( R5 , R4 ) (ASR); 145 146 [ I3 ++ ] = R6; 147 148 R7 = A0.w; 149 [ P2 ] = R7; 150 P2 += 4; // store history 151 152 FP = P4; // swap pointers From <--> To 153 P4 = P5; 154L$0end: 155 P5 = FP; 156 157 // check results 158 loadsym I0, DecisionHistory 159 160 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6ff2 ); 161 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xf99f ); 162 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9909 ); 163 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6666 ); 164 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x0096 ); 165 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 ); 166 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9309 ); 167 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0000 ); 168 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xffff ); 169 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xffff ); 170 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xf0ff ); 171 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xcf00 ); 172 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9009 ); 173 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x07f6 ); 174 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6004 ); 175 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 ); 176 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x8338 ); 177 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x3443 ); 178 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6bd6 ); 179 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6197 ); 180 R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6c26 ); 181 R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0990 ); 182 183 pass 184 185 .data 186 .align 8 187InputData: 188 .dw 0x0001 189 .dw 0x0001 190 .dw 0xffff 191 .dw 0xfffb 192 .dw 0x0005 193 .dw 0x0001 194 .dw 0xfffd 195 .dw 0xfffd 196 .dw 0x0005 197 .dw 0x0001 198 .dw 0x0001 199 .dw 0x0001 200 .dw 0xffff 201 .dw 0xfffb 202 .dw 0x0005 203 .dw 0x0001 204 .dw 0xfffd 205 .dw 0xfffd 206 .dw 0x0005 207 .dw 0x0001 208 209 .align 8 210APMFrom: 211 .dw 0xc000 212 .dw 0x0 213 .dw 0xc000 214 .dw 0xc000 215 .dw 0xc000 216 .dw 0xc000 217 .dw 0xc000 218 .dw 0xc000 219 .dw 0xc000 220 .dw 0xc000 221 .dw 0xc000 222 .dw 0xc000 223 .dw 0xc000 224 .dw 0xc000 225 .dw 0xc000 226 .dw 0xc000 227 .dw 0xc000 228 .dw 0xc000 229 .dw 0xc000 230 .dw 0xc000 231 .dw 0xc000 232 .dw 0xc000 233 .dw 0xc000 234 .dw 0xc000 235 .dw 0xc000 236 .dw 0xc000 237 .dw 0xc000 238 .dw 0xc000 239 .dw 0xc000 240 .dw 0xc000 241 .dw 0xc000 242 .dw 0xc000 243 244 .align 8 245APMTo: 246 .space (32*8) 247 248 .align 8 249BranchStorage: 250 .space (32*8) 251 252 .align 8 253DecisionHistory: 254 .space (18*4) 255