1#!/usr/bin/env python 2 3import sys, fileinput 4 5err = 0 6 7# Giant associative set of builtin->intrinsic mappings where clang 8# doesn't implement the builtin. (Either because the vector operation 9# works without a builtin, or for other reasons.) 10 11repl_map = { 12 "__builtin_ia32_addps": "_mm_add_ps", 13 "__builtin_ia32_addsd": "_mm_add_sd", 14 "__builtin_ia32_addpd": "_mm_add_pd", 15 "__builtin_ia32_addss": "_mm_add_ss", 16 "__builtin_ia32_paddb128": "_mm_add_epi8", 17 "__builtin_ia32_paddw128": "_mm_add_epi16", 18 "__builtin_ia32_paddd128": "_mm_add_epi32", 19 "__builtin_ia32_paddq128": "_mm_add_epi64", 20 "__builtin_ia32_subps": "_mm_sub_ps", 21 "__builtin_ia32_subsd": "_mm_sub_sd", 22 "__builtin_ia32_subpd": "_mm_sub_pd", 23 "__builtin_ia32_subss": "_mm_sub_ss", 24 "__builtin_ia32_psubb128": "_mm_sub_epi8", 25 "__builtin_ia32_psubw128": "_mm_sub_epi16", 26 "__builtin_ia32_psubd128": "_mm_sub_epi32", 27 "__builtin_ia32_psubq128": "_mm_sub_epi64", 28 "__builtin_ia32_mulsd": "_mm_mul_sd", 29 "__builtin_ia32_mulpd": "_mm_mul_pd", 30 "__builtin_ia32_mulps": "_mm_mul_ps", 31 "__builtin_ia32_mulss": "_mm_mul_ss", 32 "__builtin_ia32_pmullw128": "_mm_mullo_epi16", 33 "__builtin_ia32_divsd": "_mm_div_sd", 34 "__builtin_ia32_divpd": "_mm_div_pd", 35 "__builtin_ia32_divps": "_mm_div_ps", 36 "__builtin_ia32_subss": "_mm_div_ss", 37 "__builtin_ia32_andpd": "_mm_and_pd", 38 "__builtin_ia32_andps": "_mm_and_ps", 39 "__builtin_ia32_pand128": "_mm_and_si128", 40 "__builtin_ia32_andnpd": "_mm_andnot_pd", 41 "__builtin_ia32_andnps": "_mm_andnot_ps", 42 "__builtin_ia32_pandn128": "_mm_andnot_si128", 43 "__builtin_ia32_orpd": "_mm_or_pd", 44 "__builtin_ia32_orps": "_mm_or_ps", 45 "__builtin_ia32_por128": "_mm_or_si128", 46 "__builtin_ia32_xorpd": "_mm_xor_pd", 47 "__builtin_ia32_xorps": "_mm_xor_ps", 48 "__builtin_ia32_pxor128": "_mm_xor_si128", 49 "__builtin_ia32_cvtps2dq": "_mm_cvtps_epi32", 50 "__builtin_ia32_cvtsd2ss": "_mm_cvtsd_ss", 51 "__builtin_ia32_cvtsi2sd": "_mm_cvtsi32_sd", 52 "__builtin_ia32_cvtss2sd": "_mm_cvtss_sd", 53 "__builtin_ia32_cvttsd2si": "_mm_cvttsd_si32", 54 "__builtin_ia32_vec_ext_v2df": "_mm_cvtsd_f64", 55 "__builtin_ia32_loadhpd": "_mm_loadh_pd", 56 "__builtin_ia32_loadlpd": "_mm_loadl_pd", 57 "__builtin_ia32_loadlv4si": "_mm_loadl_epi64", 58 "__builtin_ia32_cmpeqps": "_mm_cmpeq_ps", 59 "__builtin_ia32_cmpltps": "_mm_cmplt_ps", 60 "__builtin_ia32_cmpleps": "_mm_cmple_ps", 61 "__builtin_ia32_cmpgtps": "_mm_cmpgt_ps", 62 "__builtin_ia32_cmpgeps": "_mm_cmpge_ps", 63 "__builtin_ia32_cmpunordps": "_mm_cmpunord_ps", 64 "__builtin_ia32_cmpneqps": "_mm_cmpneq_ps", 65 "__builtin_ia32_cmpnltps": "_mm_cmpnlt_ps", 66 "__builtin_ia32_cmpnleps": "_mm_cmpnle_ps", 67 "__builtin_ia32_cmpngtps": "_mm_cmpngt_ps", 68 "__builtin_ia32_cmpordps": "_mm_cmpord_ps", 69 "__builtin_ia32_cmpeqss": "_mm_cmpeq_ss", 70 "__builtin_ia32_cmpltss": "_mm_cmplt_ss", 71 "__builtin_ia32_cmpless": "_mm_cmple_ss", 72 "__builtin_ia32_cmpunordss": "_mm_cmpunord_ss", 73 "__builtin_ia32_cmpneqss": "_mm_cmpneq_ss", 74 "__builtin_ia32_cmpnltss": "_mm_cmpnlt_ss", 75 "__builtin_ia32_cmpnless": "_mm_cmpnle_ss", 76 "__builtin_ia32_cmpngtss": "_mm_cmpngt_ss", 77 "__builtin_ia32_cmpngess": "_mm_cmpnge_ss", 78 "__builtin_ia32_cmpordss": "_mm_cmpord_ss", 79 "__builtin_ia32_movss": "_mm_move_ss", 80 "__builtin_ia32_movsd": "_mm_move_sd", 81 "__builtin_ia32_movhlps": "_mm_movehl_ps", 82 "__builtin_ia32_movlhps": "_mm_movelh_ps", 83 "__builtin_ia32_movqv4si": "_mm_move_epi64", 84 "__builtin_ia32_unpckhps": "_mm_unpackhi_ps", 85 "__builtin_ia32_unpckhpd": "_mm_unpackhi_pd", 86 "__builtin_ia32_punpckhbw128": "_mm_unpackhi_epi8", 87 "__builtin_ia32_punpckhwd128": "_mm_unpackhi_epi16", 88 "__builtin_ia32_punpckhdq128": "_mm_unpackhi_epi32", 89 "__builtin_ia32_punpckhqdq128": "_mm_unpackhi_epi64", 90 "__builtin_ia32_unpcklps": "_mm_unpacklo_ps", 91 "__builtin_ia32_unpcklpd": "_mm_unpacklo_pd", 92 "__builtin_ia32_punpcklbw128": "_mm_unpacklo_epi8", 93 "__builtin_ia32_punpcklwd128": "_mm_unpacklo_epi16", 94 "__builtin_ia32_punpckldq128": "_mm_unpacklo_epi32", 95 "__builtin_ia32_punpcklqdq128": "_mm_unpacklo_epi64", 96 "__builtin_ia32_cmpeqpd": "_mm_cmpeq_pd", 97 "__builtin_ia32_cmpltpd": "_mm_cmplt_pd", 98 "__builtin_ia32_cmplepd": "_mm_cmple_pd", 99 "__builtin_ia32_cmpgtpd": "_mm_cmpgt_pd", 100 "__builtin_ia32_cmpgepd": "_mm_cmpge_pd", 101 "__builtin_ia32_cmpunordpd": "_mm_cmpunord_pd", 102 "__builtin_ia32_cmpneqpd": "_mm_cmpneq_pd", 103 "__builtin_ia32_cmpnltpd": "_mm_cmpnlt_pd", 104 "__builtin_ia32_cmpnlepd": "_mm_cmpnle_pd", 105 "__builtin_ia32_cmpngtpd": "_mm_cmpngt_pd", 106 "__builtin_ia32_cmpngepd": "_mm_cmpnge_pd", 107 "__builtin_ia32_cmpordpd": "_mm_cmpord_pd", 108 "__builtin_ia32_cmpeqsd": "_mm_cmpeq_sd", 109 "__builtin_ia32_cmpltsd": "_mm_cmplt_sd", 110 "__builtin_ia32_cmplesd": "_mm_cmple_sd", 111 "__builtin_ia32_cmpunordsd": "_mm_cmpunord_sd", 112 "__builtin_ia32_cmpneqsd": "_mm_cmpneq_sd", 113 "__builtin_ia32_cmpnltsd": "_mm_cmpnlt_sd", 114 "__builtin_ia32_cmpnlesd": "_mm_cmpnle_sd", 115 "__builtin_ia32_cmpordsd": "_mm_cmpord_sd", 116 "__builtin_ia32_cvtsi642ss": "_mm_cvtsi64_ss", 117 "__builtin_ia32_cvttss2si64": "_mm_cvtss_si64", 118 "__builtin_ia32_shufps": "_mm_shuffle_ps", 119 "__builtin_ia32_shufpd": "_mm_shuffle_pd", 120 "__builtin_ia32_pshufhw": "_mm_shufflehi_epi16", 121 "__builtin_ia32_pshuflw": "_mm_shufflelo_epi16", 122 "__builtin_ia32_pshufd": "_mm_shuffle_epi32", 123 "__builtin_ia32_movshdup": "_mm_movehdup_ps", 124 "__builtin_ia32_movsldup": "_mm_moveldup_ps", 125 "__builtin_ia32_maxps": "_mm_max_ps", 126 "__builtin_ia32_pslldi128": "_mm_slli_epi32", 127 "__builtin_ia32_vec_set_v16qi": "_mm_insert_epi8", 128 "__builtin_ia32_vec_set_v8hi": "_mm_insert_epi16", 129 "__builtin_ia32_vec_set_v4si": "_mm_insert_epi32", 130 "__builtin_ia32_vec_set_v2di": "_mm_insert_epi64", 131 "__builtin_ia32_vec_set_v4hi": "_mm_insert_pi16", 132 "__builtin_ia32_vec_ext_v16qi": "_mm_extract_epi8", 133 "__builtin_ia32_vec_ext_v8hi": "_mm_extract_epi16", 134 "__builtin_ia32_vec_ext_v4si": "_mm_extract_epi32", 135 "__builtin_ia32_vec_ext_v2di": "_mm_extract_epi64", 136 "__builtin_ia32_vec_ext_v4hi": "_mm_extract_pi16", 137 "__builtin_ia32_vec_ext_v4sf": "_mm_extract_ps", 138 # Removed MMX builtins 139 "__builtin_ia32_paddb": "_mm_add_pi8", 140 "__builtin_ia32_paddw": "_mm_add_pi16", 141 "__builtin_ia32_paddd": "_mm_add_pi32", 142 "__builtin_ia32_paddsb": "_mm_adds_pi8", 143 "__builtin_ia32_paddsw": "_mm_adds_pi16", 144 "__builtin_ia32_paddusb": "_mm_adds_pu8", 145 "__builtin_ia32_paddusw": "_mm_adds_pu16", 146 "__builtin_ia32_psubb": "_mm_sub_pi8", 147 "__builtin_ia32_psubw": "_mm_sub_pi16", 148 "__builtin_ia32_psubd": "_mm_sub_pi32", 149 "__builtin_ia32_psubsb": "_mm_subs_pi8", 150 "__builtin_ia32_psubsw": "_mm_subs_pi16", 151 "__builtin_ia32_psubusb": "_mm_subs_pu8", 152 "__builtin_ia32_psubusw": "_mm_subs_pu16", 153 "__builtin_ia32_pmulhw": "_mm_mulhi_pi16", 154 "__builtin_ia32_pmullw": "_mm_mullo_pi16", 155 "__builtin_ia32_pmaddwd": "_mm_madd_pi16", 156 "__builtin_ia32_pand": "_mm_and_si64", 157 "__builtin_ia32_pandn": "_mm_andnot_si64", 158 "__builtin_ia32_por": "_mm_or_si64", 159 "__builtin_ia32_pxor": "_mm_xor_si64", 160 "__builtin_ia32_psllw": "_mm_sll_pi16", 161 "__builtin_ia32_pslld": "_mm_sll_pi32", 162 "__builtin_ia32_psllq": "_mm_sll_si64", 163 "__builtin_ia32_psrlw": "_mm_srl_pi16", 164 "__builtin_ia32_psrld": "_mm_srl_pi32", 165 "__builtin_ia32_psrlq": "_mm_srl_si64", 166 "__builtin_ia32_psraw": "_mm_sra_pi16", 167 "__builtin_ia32_psrad": "_mm_sra_pi32", 168 "__builtin_ia32_psllwi": "_mm_slli_pi16", 169 "__builtin_ia32_pslldi": "_mm_slli_pi32", 170 "__builtin_ia32_psllqi": "_mm_slli_si64", 171 "__builtin_ia32_psrlwi": "_mm_srli_pi16", 172 "__builtin_ia32_psrldi": "_mm_srli_pi32", 173 "__builtin_ia32_psrlqi": "_mm_srli_si64", 174 "__builtin_ia32_psrawi": "_mm_srai_pi16", 175 "__builtin_ia32_psradi": "_mm_srai_pi32", 176 "__builtin_ia32_packsswb": "_mm_packs_pi16", 177 "__builtin_ia32_packssdw": "_mm_packs_pi32", 178 "__builtin_ia32_packuswb": "_mm_packs_pu16", 179 "__builtin_ia32_punpckhbw": "_mm_unpackhi_pi8", 180 "__builtin_ia32_punpckhwd": "_mm_unpackhi_pi16", 181 "__builtin_ia32_punpckhdq": "_mm_unpackhi_pi32", 182 "__builtin_ia32_punpcklbw": "_mm_unpacklo_pi8", 183 "__builtin_ia32_punpcklwd": "_mm_unpacklo_pi16", 184 "__builtin_ia32_punpckldq": "_mm_unpacklo_pi32", 185 "__builtin_ia32_pcmpeqb": "_mm_cmpeq_pi8", 186 "__builtin_ia32_pcmpeqw": "_mm_cmpeq_pi16", 187 "__builtin_ia32_pcmpeqd": "_mm_cmpeq_pi32", 188 "__builtin_ia32_pcmpgtb": "_mm_cmpgt_pi8", 189 "__builtin_ia32_pcmpgtw": "_mm_cmpgt_pi16", 190 "__builtin_ia32_pcmpgtd": "_mm_cmpgt_pi32", 191 "__builtin_ia32_maskmovq": "_mm_maskmove_si64", 192 "__builtin_ia32_movntq": "_mm_stream_pi", 193 "__builtin_ia32_vec_init_v2si": "_mm_setr_pi32", 194 "__builtin_ia32_vec_init_v4hi": "_mm_setr_pi16", 195 "__builtin_ia32_vec_init_v8qi": "_mm_setr_pi8", 196 "__builtin_ia32_cvtpi2ps": "_mm_cvtpi32_ps", 197 "__builtin_ia32_cvtps2pi": "_mm_cvtps_pi32", 198 "__builtin_ia32_cvttps2pi": "_mm_cvttps_pi32", 199 "__builtin_ia32_pavgb": "_mm_avg_pu8", 200 "__builtin_ia32_pavgw": "_mm_avg_pu16", 201 "__builtin_ia32_pmaxsw": "_mm_max_pi16", 202 "__builtin_ia32_pmaxub": "_mm_max_pu8", 203 "__builtin_ia32_pminsw": "_mm_min_pi16", 204 "__builtin_ia32_pminub": "_mm_min_pu8", 205 "__builtin_ia32_pmovmskb": "_mm_movemask_pi8", 206 "__builtin_ia32_pmulhuw": "_mm_mulhi_pu16", 207 "__builtin_ia32_psadbw": "_mm_sad_pu8", 208 "__builtin_ia32_pshufw": "_mm_shuffle_pi16", 209 "__builtin_ia32_cvtpd2pi": "_mm_cvtpd_pi32", 210 "__builtin_ia32_cvtpi2pd": "_mm_cvtpi32_pd", 211 "__builtin_ia32_cvttpd2pi": "_mm_cvttpd_pi32", 212 "__builtin_ia32_paddq": "_mm_add_si64", 213 "__builtin_ia32_pmuludq": "_mm_mul_su32", 214 "__builtin_ia32_psubq": "_mm_sub_si64", 215 "__builtin_ia32_pabsb": "_mm_abs_pi8", 216 "__builtin_ia32_pabsd": "_mm_abs_pi32", 217 "__builtin_ia32_pabsw": "_mm_abs_pi16", 218 "__builtin_ia32_palignr": "_mm_alignr_pi8", 219 "__builtin_ia32_phaddd": "_mm_hadd_pi32", 220 "__builtin_ia32_phaddsw": "_mm_hadds_pi16", 221 "__builtin_ia32_phaddw": "_mm_hadd_pi16", 222 "__builtin_ia32_phsubd": "_mm_hsub_pi32", 223 "__builtin_ia32_phsubsw": "_mm_hsubs_pi16", 224 "__builtin_ia32_phsubw": "_mm_hsub_pi16", 225 "__builtin_ia32_pmaddubsw": "_mm_maddubs_pi16", 226 "__builtin_ia32_pmulhrsw": "_mm_mulhrs_pi16", 227 "__builtin_ia32_pshufb": "_mm_shuffle_pi8", 228 "__builtin_ia32_psignw": "_mm_sign_pi16", 229 "__builtin_ia32_psignb": "_mm_sign_pi8", 230 "__builtin_ia32_psignd": "_mm_sign_pi32", 231} 232 233# Special unhandled cases: 234# __builtin_ia32_vec_ext_*(__P, idx) -> _mm_store_sd/_mm_storeh_pd 235# depending on index. No abstract insert/extract for these oddly. 236unhandled = [ 237 "__builtin_ia32_vec_ext_v2df", 238 "__builtin_ia32_vec_ext_v2si", 239] 240 241 242def report_repl(builtin, repl): 243 sys.stderr.write( 244 "%s:%d: x86 builtin %s used, replaced with %s\n" 245 % (fileinput.filename(), fileinput.filelineno(), builtin, repl) 246 ) 247 248 249def report_cant(builtin): 250 sys.stderr.write( 251 "%s:%d: x86 builtin %s used, too many replacements\n" 252 % (fileinput.filename(), fileinput.filelineno(), builtin) 253 ) 254 255 256for line in fileinput.input(inplace=1): 257 for builtin, repl in repl_map.items(): 258 if builtin in line: 259 line = line.replace(builtin, repl) 260 report_repl(builtin, repl) 261 for unh in unhandled: 262 if unh in line: 263 report_cant(unh) 264 sys.stdout.write(line) 265 266sys.exit(err) 267