1*8e3e3a7aSWarner Losh /* 2*8e3e3a7aSWarner Losh ** $Id: lstrlib.c,v 1.254 2016/12/22 13:08:50 roberto Exp $ 3*8e3e3a7aSWarner Losh ** Standard library for string operations and pattern-matching 4*8e3e3a7aSWarner Losh ** See Copyright Notice in lua.h 5*8e3e3a7aSWarner Losh */ 6*8e3e3a7aSWarner Losh 7*8e3e3a7aSWarner Losh #define lstrlib_c 8*8e3e3a7aSWarner Losh #define LUA_LIB 9*8e3e3a7aSWarner Losh 10*8e3e3a7aSWarner Losh #include "lprefix.h" 11*8e3e3a7aSWarner Losh 12*8e3e3a7aSWarner Losh 13*8e3e3a7aSWarner Losh #include <ctype.h> 14*8e3e3a7aSWarner Losh #include <float.h> 15*8e3e3a7aSWarner Losh #include <limits.h> 16*8e3e3a7aSWarner Losh #include <locale.h> 17*8e3e3a7aSWarner Losh #include <stddef.h> 18*8e3e3a7aSWarner Losh #include <stdio.h> 19*8e3e3a7aSWarner Losh #include <stdlib.h> 20*8e3e3a7aSWarner Losh #include <string.h> 21*8e3e3a7aSWarner Losh 22*8e3e3a7aSWarner Losh #include "lua.h" 23*8e3e3a7aSWarner Losh 24*8e3e3a7aSWarner Losh #include "lauxlib.h" 25*8e3e3a7aSWarner Losh #include "lualib.h" 26*8e3e3a7aSWarner Losh 27*8e3e3a7aSWarner Losh 28*8e3e3a7aSWarner Losh /* 29*8e3e3a7aSWarner Losh ** maximum number of captures that a pattern can do during 30*8e3e3a7aSWarner Losh ** pattern-matching. This limit is arbitrary, but must fit in 31*8e3e3a7aSWarner Losh ** an unsigned char. 32*8e3e3a7aSWarner Losh */ 33*8e3e3a7aSWarner Losh #if !defined(LUA_MAXCAPTURES) 34*8e3e3a7aSWarner Losh #define LUA_MAXCAPTURES 32 35*8e3e3a7aSWarner Losh #endif 36*8e3e3a7aSWarner Losh 37*8e3e3a7aSWarner Losh 38*8e3e3a7aSWarner Losh /* macro to 'unsign' a character */ 39*8e3e3a7aSWarner Losh #define uchar(c) ((unsigned char)(c)) 40*8e3e3a7aSWarner Losh 41*8e3e3a7aSWarner Losh 42*8e3e3a7aSWarner Losh /* 43*8e3e3a7aSWarner Losh ** Some sizes are better limited to fit in 'int', but must also fit in 44*8e3e3a7aSWarner Losh ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) 45*8e3e3a7aSWarner Losh */ 46*8e3e3a7aSWarner Losh #define MAX_SIZET ((size_t)(~(size_t)0)) 47*8e3e3a7aSWarner Losh 48*8e3e3a7aSWarner Losh #define MAXSIZE \ 49*8e3e3a7aSWarner Losh (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) 50*8e3e3a7aSWarner Losh 51*8e3e3a7aSWarner Losh 52*8e3e3a7aSWarner Losh 53*8e3e3a7aSWarner Losh 54*8e3e3a7aSWarner Losh static int str_len (lua_State *L) { 55*8e3e3a7aSWarner Losh size_t l; 56*8e3e3a7aSWarner Losh luaL_checklstring(L, 1, &l); 57*8e3e3a7aSWarner Losh lua_pushinteger(L, (lua_Integer)l); 58*8e3e3a7aSWarner Losh return 1; 59*8e3e3a7aSWarner Losh } 60*8e3e3a7aSWarner Losh 61*8e3e3a7aSWarner Losh 62*8e3e3a7aSWarner Losh /* translate a relative string position: negative means back from end */ 63*8e3e3a7aSWarner Losh static lua_Integer posrelat (lua_Integer pos, size_t len) { 64*8e3e3a7aSWarner Losh if (pos >= 0) return pos; 65*8e3e3a7aSWarner Losh else if (0u - (size_t)pos > len) return 0; 66*8e3e3a7aSWarner Losh else return (lua_Integer)len + pos + 1; 67*8e3e3a7aSWarner Losh } 68*8e3e3a7aSWarner Losh 69*8e3e3a7aSWarner Losh 70*8e3e3a7aSWarner Losh static int str_sub (lua_State *L) { 71*8e3e3a7aSWarner Losh size_t l; 72*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 73*8e3e3a7aSWarner Losh lua_Integer start = posrelat(luaL_checkinteger(L, 2), l); 74*8e3e3a7aSWarner Losh lua_Integer end = posrelat(luaL_optinteger(L, 3, -1), l); 75*8e3e3a7aSWarner Losh if (start < 1) start = 1; 76*8e3e3a7aSWarner Losh if (end > (lua_Integer)l) end = l; 77*8e3e3a7aSWarner Losh if (start <= end) 78*8e3e3a7aSWarner Losh lua_pushlstring(L, s + start - 1, (size_t)(end - start) + 1); 79*8e3e3a7aSWarner Losh else lua_pushliteral(L, ""); 80*8e3e3a7aSWarner Losh return 1; 81*8e3e3a7aSWarner Losh } 82*8e3e3a7aSWarner Losh 83*8e3e3a7aSWarner Losh 84*8e3e3a7aSWarner Losh static int str_reverse (lua_State *L) { 85*8e3e3a7aSWarner Losh size_t l, i; 86*8e3e3a7aSWarner Losh luaL_Buffer b; 87*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 88*8e3e3a7aSWarner Losh char *p = luaL_buffinitsize(L, &b, l); 89*8e3e3a7aSWarner Losh for (i = 0; i < l; i++) 90*8e3e3a7aSWarner Losh p[i] = s[l - i - 1]; 91*8e3e3a7aSWarner Losh luaL_pushresultsize(&b, l); 92*8e3e3a7aSWarner Losh return 1; 93*8e3e3a7aSWarner Losh } 94*8e3e3a7aSWarner Losh 95*8e3e3a7aSWarner Losh 96*8e3e3a7aSWarner Losh static int str_lower (lua_State *L) { 97*8e3e3a7aSWarner Losh size_t l; 98*8e3e3a7aSWarner Losh size_t i; 99*8e3e3a7aSWarner Losh luaL_Buffer b; 100*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 101*8e3e3a7aSWarner Losh char *p = luaL_buffinitsize(L, &b, l); 102*8e3e3a7aSWarner Losh for (i=0; i<l; i++) 103*8e3e3a7aSWarner Losh p[i] = tolower(uchar(s[i])); 104*8e3e3a7aSWarner Losh luaL_pushresultsize(&b, l); 105*8e3e3a7aSWarner Losh return 1; 106*8e3e3a7aSWarner Losh } 107*8e3e3a7aSWarner Losh 108*8e3e3a7aSWarner Losh 109*8e3e3a7aSWarner Losh static int str_upper (lua_State *L) { 110*8e3e3a7aSWarner Losh size_t l; 111*8e3e3a7aSWarner Losh size_t i; 112*8e3e3a7aSWarner Losh luaL_Buffer b; 113*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 114*8e3e3a7aSWarner Losh char *p = luaL_buffinitsize(L, &b, l); 115*8e3e3a7aSWarner Losh for (i=0; i<l; i++) 116*8e3e3a7aSWarner Losh p[i] = toupper(uchar(s[i])); 117*8e3e3a7aSWarner Losh luaL_pushresultsize(&b, l); 118*8e3e3a7aSWarner Losh return 1; 119*8e3e3a7aSWarner Losh } 120*8e3e3a7aSWarner Losh 121*8e3e3a7aSWarner Losh 122*8e3e3a7aSWarner Losh static int str_rep (lua_State *L) { 123*8e3e3a7aSWarner Losh size_t l, lsep; 124*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 125*8e3e3a7aSWarner Losh lua_Integer n = luaL_checkinteger(L, 2); 126*8e3e3a7aSWarner Losh const char *sep = luaL_optlstring(L, 3, "", &lsep); 127*8e3e3a7aSWarner Losh if (n <= 0) lua_pushliteral(L, ""); 128*8e3e3a7aSWarner Losh else if (l + lsep < l || l + lsep > MAXSIZE / n) /* may overflow? */ 129*8e3e3a7aSWarner Losh return luaL_error(L, "resulting string too large"); 130*8e3e3a7aSWarner Losh else { 131*8e3e3a7aSWarner Losh size_t totallen = (size_t)n * l + (size_t)(n - 1) * lsep; 132*8e3e3a7aSWarner Losh luaL_Buffer b; 133*8e3e3a7aSWarner Losh char *p = luaL_buffinitsize(L, &b, totallen); 134*8e3e3a7aSWarner Losh while (n-- > 1) { /* first n-1 copies (followed by separator) */ 135*8e3e3a7aSWarner Losh memcpy(p, s, l * sizeof(char)); p += l; 136*8e3e3a7aSWarner Losh if (lsep > 0) { /* empty 'memcpy' is not that cheap */ 137*8e3e3a7aSWarner Losh memcpy(p, sep, lsep * sizeof(char)); 138*8e3e3a7aSWarner Losh p += lsep; 139*8e3e3a7aSWarner Losh } 140*8e3e3a7aSWarner Losh } 141*8e3e3a7aSWarner Losh memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 142*8e3e3a7aSWarner Losh luaL_pushresultsize(&b, totallen); 143*8e3e3a7aSWarner Losh } 144*8e3e3a7aSWarner Losh return 1; 145*8e3e3a7aSWarner Losh } 146*8e3e3a7aSWarner Losh 147*8e3e3a7aSWarner Losh 148*8e3e3a7aSWarner Losh static int str_byte (lua_State *L) { 149*8e3e3a7aSWarner Losh size_t l; 150*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &l); 151*8e3e3a7aSWarner Losh lua_Integer posi = posrelat(luaL_optinteger(L, 2, 1), l); 152*8e3e3a7aSWarner Losh lua_Integer pose = posrelat(luaL_optinteger(L, 3, posi), l); 153*8e3e3a7aSWarner Losh int n, i; 154*8e3e3a7aSWarner Losh if (posi < 1) posi = 1; 155*8e3e3a7aSWarner Losh if (pose > (lua_Integer)l) pose = l; 156*8e3e3a7aSWarner Losh if (posi > pose) return 0; /* empty interval; return no values */ 157*8e3e3a7aSWarner Losh if (pose - posi >= INT_MAX) /* arithmetic overflow? */ 158*8e3e3a7aSWarner Losh return luaL_error(L, "string slice too long"); 159*8e3e3a7aSWarner Losh n = (int)(pose - posi) + 1; 160*8e3e3a7aSWarner Losh luaL_checkstack(L, n, "string slice too long"); 161*8e3e3a7aSWarner Losh for (i=0; i<n; i++) 162*8e3e3a7aSWarner Losh lua_pushinteger(L, uchar(s[posi+i-1])); 163*8e3e3a7aSWarner Losh return n; 164*8e3e3a7aSWarner Losh } 165*8e3e3a7aSWarner Losh 166*8e3e3a7aSWarner Losh 167*8e3e3a7aSWarner Losh static int str_char (lua_State *L) { 168*8e3e3a7aSWarner Losh int n = lua_gettop(L); /* number of arguments */ 169*8e3e3a7aSWarner Losh int i; 170*8e3e3a7aSWarner Losh luaL_Buffer b; 171*8e3e3a7aSWarner Losh char *p = luaL_buffinitsize(L, &b, n); 172*8e3e3a7aSWarner Losh for (i=1; i<=n; i++) { 173*8e3e3a7aSWarner Losh lua_Integer c = luaL_checkinteger(L, i); 174*8e3e3a7aSWarner Losh luaL_argcheck(L, uchar(c) == c, i, "value out of range"); 175*8e3e3a7aSWarner Losh p[i - 1] = uchar(c); 176*8e3e3a7aSWarner Losh } 177*8e3e3a7aSWarner Losh luaL_pushresultsize(&b, n); 178*8e3e3a7aSWarner Losh return 1; 179*8e3e3a7aSWarner Losh } 180*8e3e3a7aSWarner Losh 181*8e3e3a7aSWarner Losh 182*8e3e3a7aSWarner Losh static int writer (lua_State *L, const void *b, size_t size, void *B) { 183*8e3e3a7aSWarner Losh (void)L; 184*8e3e3a7aSWarner Losh luaL_addlstring((luaL_Buffer *) B, (const char *)b, size); 185*8e3e3a7aSWarner Losh return 0; 186*8e3e3a7aSWarner Losh } 187*8e3e3a7aSWarner Losh 188*8e3e3a7aSWarner Losh 189*8e3e3a7aSWarner Losh static int str_dump (lua_State *L) { 190*8e3e3a7aSWarner Losh luaL_Buffer b; 191*8e3e3a7aSWarner Losh int strip = lua_toboolean(L, 2); 192*8e3e3a7aSWarner Losh luaL_checktype(L, 1, LUA_TFUNCTION); 193*8e3e3a7aSWarner Losh lua_settop(L, 1); 194*8e3e3a7aSWarner Losh luaL_buffinit(L,&b); 195*8e3e3a7aSWarner Losh if (lua_dump(L, writer, &b, strip) != 0) 196*8e3e3a7aSWarner Losh return luaL_error(L, "unable to dump given function"); 197*8e3e3a7aSWarner Losh luaL_pushresult(&b); 198*8e3e3a7aSWarner Losh return 1; 199*8e3e3a7aSWarner Losh } 200*8e3e3a7aSWarner Losh 201*8e3e3a7aSWarner Losh 202*8e3e3a7aSWarner Losh 203*8e3e3a7aSWarner Losh /* 204*8e3e3a7aSWarner Losh ** {====================================================== 205*8e3e3a7aSWarner Losh ** PATTERN MATCHING 206*8e3e3a7aSWarner Losh ** ======================================================= 207*8e3e3a7aSWarner Losh */ 208*8e3e3a7aSWarner Losh 209*8e3e3a7aSWarner Losh 210*8e3e3a7aSWarner Losh #define CAP_UNFINISHED (-1) 211*8e3e3a7aSWarner Losh #define CAP_POSITION (-2) 212*8e3e3a7aSWarner Losh 213*8e3e3a7aSWarner Losh 214*8e3e3a7aSWarner Losh typedef struct MatchState { 215*8e3e3a7aSWarner Losh const char *src_init; /* init of source string */ 216*8e3e3a7aSWarner Losh const char *src_end; /* end ('\0') of source string */ 217*8e3e3a7aSWarner Losh const char *p_end; /* end ('\0') of pattern */ 218*8e3e3a7aSWarner Losh lua_State *L; 219*8e3e3a7aSWarner Losh int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 220*8e3e3a7aSWarner Losh unsigned char level; /* total number of captures (finished or unfinished) */ 221*8e3e3a7aSWarner Losh struct { 222*8e3e3a7aSWarner Losh const char *init; 223*8e3e3a7aSWarner Losh ptrdiff_t len; 224*8e3e3a7aSWarner Losh } capture[LUA_MAXCAPTURES]; 225*8e3e3a7aSWarner Losh } MatchState; 226*8e3e3a7aSWarner Losh 227*8e3e3a7aSWarner Losh 228*8e3e3a7aSWarner Losh /* recursive function */ 229*8e3e3a7aSWarner Losh static const char *match (MatchState *ms, const char *s, const char *p); 230*8e3e3a7aSWarner Losh 231*8e3e3a7aSWarner Losh 232*8e3e3a7aSWarner Losh /* maximum recursion depth for 'match' */ 233*8e3e3a7aSWarner Losh #if !defined(MAXCCALLS) 234*8e3e3a7aSWarner Losh #define MAXCCALLS 200 235*8e3e3a7aSWarner Losh #endif 236*8e3e3a7aSWarner Losh 237*8e3e3a7aSWarner Losh 238*8e3e3a7aSWarner Losh #define L_ESC '%' 239*8e3e3a7aSWarner Losh #define SPECIALS "^$*+?.([%-" 240*8e3e3a7aSWarner Losh 241*8e3e3a7aSWarner Losh 242*8e3e3a7aSWarner Losh static int check_capture (MatchState *ms, int l) { 243*8e3e3a7aSWarner Losh l -= '1'; 244*8e3e3a7aSWarner Losh if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 245*8e3e3a7aSWarner Losh return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 246*8e3e3a7aSWarner Losh return l; 247*8e3e3a7aSWarner Losh } 248*8e3e3a7aSWarner Losh 249*8e3e3a7aSWarner Losh 250*8e3e3a7aSWarner Losh static int capture_to_close (MatchState *ms) { 251*8e3e3a7aSWarner Losh int level = ms->level; 252*8e3e3a7aSWarner Losh for (level--; level>=0; level--) 253*8e3e3a7aSWarner Losh if (ms->capture[level].len == CAP_UNFINISHED) return level; 254*8e3e3a7aSWarner Losh return luaL_error(ms->L, "invalid pattern capture"); 255*8e3e3a7aSWarner Losh } 256*8e3e3a7aSWarner Losh 257*8e3e3a7aSWarner Losh 258*8e3e3a7aSWarner Losh static const char *classend (MatchState *ms, const char *p) { 259*8e3e3a7aSWarner Losh switch (*p++) { 260*8e3e3a7aSWarner Losh case L_ESC: { 261*8e3e3a7aSWarner Losh if (p == ms->p_end) 262*8e3e3a7aSWarner Losh luaL_error(ms->L, "malformed pattern (ends with '%%')"); 263*8e3e3a7aSWarner Losh return p+1; 264*8e3e3a7aSWarner Losh } 265*8e3e3a7aSWarner Losh case '[': { 266*8e3e3a7aSWarner Losh if (*p == '^') p++; 267*8e3e3a7aSWarner Losh do { /* look for a ']' */ 268*8e3e3a7aSWarner Losh if (p == ms->p_end) 269*8e3e3a7aSWarner Losh luaL_error(ms->L, "malformed pattern (missing ']')"); 270*8e3e3a7aSWarner Losh if (*(p++) == L_ESC && p < ms->p_end) 271*8e3e3a7aSWarner Losh p++; /* skip escapes (e.g. '%]') */ 272*8e3e3a7aSWarner Losh } while (*p != ']'); 273*8e3e3a7aSWarner Losh return p+1; 274*8e3e3a7aSWarner Losh } 275*8e3e3a7aSWarner Losh default: { 276*8e3e3a7aSWarner Losh return p; 277*8e3e3a7aSWarner Losh } 278*8e3e3a7aSWarner Losh } 279*8e3e3a7aSWarner Losh } 280*8e3e3a7aSWarner Losh 281*8e3e3a7aSWarner Losh 282*8e3e3a7aSWarner Losh static int match_class (int c, int cl) { 283*8e3e3a7aSWarner Losh int res; 284*8e3e3a7aSWarner Losh switch (tolower(cl)) { 285*8e3e3a7aSWarner Losh case 'a' : res = isalpha(c); break; 286*8e3e3a7aSWarner Losh case 'c' : res = iscntrl(c); break; 287*8e3e3a7aSWarner Losh case 'd' : res = isdigit(c); break; 288*8e3e3a7aSWarner Losh case 'g' : res = isgraph(c); break; 289*8e3e3a7aSWarner Losh case 'l' : res = islower(c); break; 290*8e3e3a7aSWarner Losh case 'p' : res = ispunct(c); break; 291*8e3e3a7aSWarner Losh case 's' : res = isspace(c); break; 292*8e3e3a7aSWarner Losh case 'u' : res = isupper(c); break; 293*8e3e3a7aSWarner Losh case 'w' : res = isalnum(c); break; 294*8e3e3a7aSWarner Losh case 'x' : res = isxdigit(c); break; 295*8e3e3a7aSWarner Losh case 'z' : res = (c == 0); break; /* deprecated option */ 296*8e3e3a7aSWarner Losh default: return (cl == c); 297*8e3e3a7aSWarner Losh } 298*8e3e3a7aSWarner Losh return (islower(cl) ? res : !res); 299*8e3e3a7aSWarner Losh } 300*8e3e3a7aSWarner Losh 301*8e3e3a7aSWarner Losh 302*8e3e3a7aSWarner Losh static int matchbracketclass (int c, const char *p, const char *ec) { 303*8e3e3a7aSWarner Losh int sig = 1; 304*8e3e3a7aSWarner Losh if (*(p+1) == '^') { 305*8e3e3a7aSWarner Losh sig = 0; 306*8e3e3a7aSWarner Losh p++; /* skip the '^' */ 307*8e3e3a7aSWarner Losh } 308*8e3e3a7aSWarner Losh while (++p < ec) { 309*8e3e3a7aSWarner Losh if (*p == L_ESC) { 310*8e3e3a7aSWarner Losh p++; 311*8e3e3a7aSWarner Losh if (match_class(c, uchar(*p))) 312*8e3e3a7aSWarner Losh return sig; 313*8e3e3a7aSWarner Losh } 314*8e3e3a7aSWarner Losh else if ((*(p+1) == '-') && (p+2 < ec)) { 315*8e3e3a7aSWarner Losh p+=2; 316*8e3e3a7aSWarner Losh if (uchar(*(p-2)) <= c && c <= uchar(*p)) 317*8e3e3a7aSWarner Losh return sig; 318*8e3e3a7aSWarner Losh } 319*8e3e3a7aSWarner Losh else if (uchar(*p) == c) return sig; 320*8e3e3a7aSWarner Losh } 321*8e3e3a7aSWarner Losh return !sig; 322*8e3e3a7aSWarner Losh } 323*8e3e3a7aSWarner Losh 324*8e3e3a7aSWarner Losh 325*8e3e3a7aSWarner Losh static int singlematch (MatchState *ms, const char *s, const char *p, 326*8e3e3a7aSWarner Losh const char *ep) { 327*8e3e3a7aSWarner Losh if (s >= ms->src_end) 328*8e3e3a7aSWarner Losh return 0; 329*8e3e3a7aSWarner Losh else { 330*8e3e3a7aSWarner Losh int c = uchar(*s); 331*8e3e3a7aSWarner Losh switch (*p) { 332*8e3e3a7aSWarner Losh case '.': return 1; /* matches any char */ 333*8e3e3a7aSWarner Losh case L_ESC: return match_class(c, uchar(*(p+1))); 334*8e3e3a7aSWarner Losh case '[': return matchbracketclass(c, p, ep-1); 335*8e3e3a7aSWarner Losh default: return (uchar(*p) == c); 336*8e3e3a7aSWarner Losh } 337*8e3e3a7aSWarner Losh } 338*8e3e3a7aSWarner Losh } 339*8e3e3a7aSWarner Losh 340*8e3e3a7aSWarner Losh 341*8e3e3a7aSWarner Losh static const char *matchbalance (MatchState *ms, const char *s, 342*8e3e3a7aSWarner Losh const char *p) { 343*8e3e3a7aSWarner Losh if (p >= ms->p_end - 1) 344*8e3e3a7aSWarner Losh luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')"); 345*8e3e3a7aSWarner Losh if (*s != *p) return NULL; 346*8e3e3a7aSWarner Losh else { 347*8e3e3a7aSWarner Losh int b = *p; 348*8e3e3a7aSWarner Losh int e = *(p+1); 349*8e3e3a7aSWarner Losh int cont = 1; 350*8e3e3a7aSWarner Losh while (++s < ms->src_end) { 351*8e3e3a7aSWarner Losh if (*s == e) { 352*8e3e3a7aSWarner Losh if (--cont == 0) return s+1; 353*8e3e3a7aSWarner Losh } 354*8e3e3a7aSWarner Losh else if (*s == b) cont++; 355*8e3e3a7aSWarner Losh } 356*8e3e3a7aSWarner Losh } 357*8e3e3a7aSWarner Losh return NULL; /* string ends out of balance */ 358*8e3e3a7aSWarner Losh } 359*8e3e3a7aSWarner Losh 360*8e3e3a7aSWarner Losh 361*8e3e3a7aSWarner Losh static const char *max_expand (MatchState *ms, const char *s, 362*8e3e3a7aSWarner Losh const char *p, const char *ep) { 363*8e3e3a7aSWarner Losh ptrdiff_t i = 0; /* counts maximum expand for item */ 364*8e3e3a7aSWarner Losh while (singlematch(ms, s + i, p, ep)) 365*8e3e3a7aSWarner Losh i++; 366*8e3e3a7aSWarner Losh /* keeps trying to match with the maximum repetitions */ 367*8e3e3a7aSWarner Losh while (i>=0) { 368*8e3e3a7aSWarner Losh const char *res = match(ms, (s+i), ep+1); 369*8e3e3a7aSWarner Losh if (res) return res; 370*8e3e3a7aSWarner Losh i--; /* else didn't match; reduce 1 repetition to try again */ 371*8e3e3a7aSWarner Losh } 372*8e3e3a7aSWarner Losh return NULL; 373*8e3e3a7aSWarner Losh } 374*8e3e3a7aSWarner Losh 375*8e3e3a7aSWarner Losh 376*8e3e3a7aSWarner Losh static const char *min_expand (MatchState *ms, const char *s, 377*8e3e3a7aSWarner Losh const char *p, const char *ep) { 378*8e3e3a7aSWarner Losh for (;;) { 379*8e3e3a7aSWarner Losh const char *res = match(ms, s, ep+1); 380*8e3e3a7aSWarner Losh if (res != NULL) 381*8e3e3a7aSWarner Losh return res; 382*8e3e3a7aSWarner Losh else if (singlematch(ms, s, p, ep)) 383*8e3e3a7aSWarner Losh s++; /* try with one more repetition */ 384*8e3e3a7aSWarner Losh else return NULL; 385*8e3e3a7aSWarner Losh } 386*8e3e3a7aSWarner Losh } 387*8e3e3a7aSWarner Losh 388*8e3e3a7aSWarner Losh 389*8e3e3a7aSWarner Losh static const char *start_capture (MatchState *ms, const char *s, 390*8e3e3a7aSWarner Losh const char *p, int what) { 391*8e3e3a7aSWarner Losh const char *res; 392*8e3e3a7aSWarner Losh int level = ms->level; 393*8e3e3a7aSWarner Losh if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 394*8e3e3a7aSWarner Losh ms->capture[level].init = s; 395*8e3e3a7aSWarner Losh ms->capture[level].len = what; 396*8e3e3a7aSWarner Losh ms->level = level+1; 397*8e3e3a7aSWarner Losh if ((res=match(ms, s, p)) == NULL) /* match failed? */ 398*8e3e3a7aSWarner Losh ms->level--; /* undo capture */ 399*8e3e3a7aSWarner Losh return res; 400*8e3e3a7aSWarner Losh } 401*8e3e3a7aSWarner Losh 402*8e3e3a7aSWarner Losh 403*8e3e3a7aSWarner Losh static const char *end_capture (MatchState *ms, const char *s, 404*8e3e3a7aSWarner Losh const char *p) { 405*8e3e3a7aSWarner Losh int l = capture_to_close(ms); 406*8e3e3a7aSWarner Losh const char *res; 407*8e3e3a7aSWarner Losh ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 408*8e3e3a7aSWarner Losh if ((res = match(ms, s, p)) == NULL) /* match failed? */ 409*8e3e3a7aSWarner Losh ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 410*8e3e3a7aSWarner Losh return res; 411*8e3e3a7aSWarner Losh } 412*8e3e3a7aSWarner Losh 413*8e3e3a7aSWarner Losh 414*8e3e3a7aSWarner Losh static const char *match_capture (MatchState *ms, const char *s, int l) { 415*8e3e3a7aSWarner Losh size_t len; 416*8e3e3a7aSWarner Losh l = check_capture(ms, l); 417*8e3e3a7aSWarner Losh len = ms->capture[l].len; 418*8e3e3a7aSWarner Losh if ((size_t)(ms->src_end-s) >= len && 419*8e3e3a7aSWarner Losh memcmp(ms->capture[l].init, s, len) == 0) 420*8e3e3a7aSWarner Losh return s+len; 421*8e3e3a7aSWarner Losh else return NULL; 422*8e3e3a7aSWarner Losh } 423*8e3e3a7aSWarner Losh 424*8e3e3a7aSWarner Losh 425*8e3e3a7aSWarner Losh static const char *match (MatchState *ms, const char *s, const char *p) { 426*8e3e3a7aSWarner Losh if (ms->matchdepth-- == 0) 427*8e3e3a7aSWarner Losh luaL_error(ms->L, "pattern too complex"); 428*8e3e3a7aSWarner Losh init: /* using goto's to optimize tail recursion */ 429*8e3e3a7aSWarner Losh if (p != ms->p_end) { /* end of pattern? */ 430*8e3e3a7aSWarner Losh switch (*p) { 431*8e3e3a7aSWarner Losh case '(': { /* start capture */ 432*8e3e3a7aSWarner Losh if (*(p + 1) == ')') /* position capture? */ 433*8e3e3a7aSWarner Losh s = start_capture(ms, s, p + 2, CAP_POSITION); 434*8e3e3a7aSWarner Losh else 435*8e3e3a7aSWarner Losh s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 436*8e3e3a7aSWarner Losh break; 437*8e3e3a7aSWarner Losh } 438*8e3e3a7aSWarner Losh case ')': { /* end capture */ 439*8e3e3a7aSWarner Losh s = end_capture(ms, s, p + 1); 440*8e3e3a7aSWarner Losh break; 441*8e3e3a7aSWarner Losh } 442*8e3e3a7aSWarner Losh case '$': { 443*8e3e3a7aSWarner Losh if ((p + 1) != ms->p_end) /* is the '$' the last char in pattern? */ 444*8e3e3a7aSWarner Losh goto dflt; /* no; go to default */ 445*8e3e3a7aSWarner Losh s = (s == ms->src_end) ? s : NULL; /* check end of string */ 446*8e3e3a7aSWarner Losh break; 447*8e3e3a7aSWarner Losh } 448*8e3e3a7aSWarner Losh case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 449*8e3e3a7aSWarner Losh switch (*(p + 1)) { 450*8e3e3a7aSWarner Losh case 'b': { /* balanced string? */ 451*8e3e3a7aSWarner Losh s = matchbalance(ms, s, p + 2); 452*8e3e3a7aSWarner Losh if (s != NULL) { 453*8e3e3a7aSWarner Losh p += 4; goto init; /* return match(ms, s, p + 4); */ 454*8e3e3a7aSWarner Losh } /* else fail (s == NULL) */ 455*8e3e3a7aSWarner Losh break; 456*8e3e3a7aSWarner Losh } 457*8e3e3a7aSWarner Losh case 'f': { /* frontier? */ 458*8e3e3a7aSWarner Losh const char *ep; char previous; 459*8e3e3a7aSWarner Losh p += 2; 460*8e3e3a7aSWarner Losh if (*p != '[') 461*8e3e3a7aSWarner Losh luaL_error(ms->L, "missing '[' after '%%f' in pattern"); 462*8e3e3a7aSWarner Losh ep = classend(ms, p); /* points to what is next */ 463*8e3e3a7aSWarner Losh previous = (s == ms->src_init) ? '\0' : *(s - 1); 464*8e3e3a7aSWarner Losh if (!matchbracketclass(uchar(previous), p, ep - 1) && 465*8e3e3a7aSWarner Losh matchbracketclass(uchar(*s), p, ep - 1)) { 466*8e3e3a7aSWarner Losh p = ep; goto init; /* return match(ms, s, ep); */ 467*8e3e3a7aSWarner Losh } 468*8e3e3a7aSWarner Losh s = NULL; /* match failed */ 469*8e3e3a7aSWarner Losh break; 470*8e3e3a7aSWarner Losh } 471*8e3e3a7aSWarner Losh case '0': case '1': case '2': case '3': 472*8e3e3a7aSWarner Losh case '4': case '5': case '6': case '7': 473*8e3e3a7aSWarner Losh case '8': case '9': { /* capture results (%0-%9)? */ 474*8e3e3a7aSWarner Losh s = match_capture(ms, s, uchar(*(p + 1))); 475*8e3e3a7aSWarner Losh if (s != NULL) { 476*8e3e3a7aSWarner Losh p += 2; goto init; /* return match(ms, s, p + 2) */ 477*8e3e3a7aSWarner Losh } 478*8e3e3a7aSWarner Losh break; 479*8e3e3a7aSWarner Losh } 480*8e3e3a7aSWarner Losh default: goto dflt; 481*8e3e3a7aSWarner Losh } 482*8e3e3a7aSWarner Losh break; 483*8e3e3a7aSWarner Losh } 484*8e3e3a7aSWarner Losh default: dflt: { /* pattern class plus optional suffix */ 485*8e3e3a7aSWarner Losh const char *ep = classend(ms, p); /* points to optional suffix */ 486*8e3e3a7aSWarner Losh /* does not match at least once? */ 487*8e3e3a7aSWarner Losh if (!singlematch(ms, s, p, ep)) { 488*8e3e3a7aSWarner Losh if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 489*8e3e3a7aSWarner Losh p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 490*8e3e3a7aSWarner Losh } 491*8e3e3a7aSWarner Losh else /* '+' or no suffix */ 492*8e3e3a7aSWarner Losh s = NULL; /* fail */ 493*8e3e3a7aSWarner Losh } 494*8e3e3a7aSWarner Losh else { /* matched once */ 495*8e3e3a7aSWarner Losh switch (*ep) { /* handle optional suffix */ 496*8e3e3a7aSWarner Losh case '?': { /* optional */ 497*8e3e3a7aSWarner Losh const char *res; 498*8e3e3a7aSWarner Losh if ((res = match(ms, s + 1, ep + 1)) != NULL) 499*8e3e3a7aSWarner Losh s = res; 500*8e3e3a7aSWarner Losh else { 501*8e3e3a7aSWarner Losh p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 502*8e3e3a7aSWarner Losh } 503*8e3e3a7aSWarner Losh break; 504*8e3e3a7aSWarner Losh } 505*8e3e3a7aSWarner Losh case '+': /* 1 or more repetitions */ 506*8e3e3a7aSWarner Losh s++; /* 1 match already done */ 507*8e3e3a7aSWarner Losh /* FALLTHROUGH */ 508*8e3e3a7aSWarner Losh case '*': /* 0 or more repetitions */ 509*8e3e3a7aSWarner Losh s = max_expand(ms, s, p, ep); 510*8e3e3a7aSWarner Losh break; 511*8e3e3a7aSWarner Losh case '-': /* 0 or more repetitions (minimum) */ 512*8e3e3a7aSWarner Losh s = min_expand(ms, s, p, ep); 513*8e3e3a7aSWarner Losh break; 514*8e3e3a7aSWarner Losh default: /* no suffix */ 515*8e3e3a7aSWarner Losh s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 516*8e3e3a7aSWarner Losh } 517*8e3e3a7aSWarner Losh } 518*8e3e3a7aSWarner Losh break; 519*8e3e3a7aSWarner Losh } 520*8e3e3a7aSWarner Losh } 521*8e3e3a7aSWarner Losh } 522*8e3e3a7aSWarner Losh ms->matchdepth++; 523*8e3e3a7aSWarner Losh return s; 524*8e3e3a7aSWarner Losh } 525*8e3e3a7aSWarner Losh 526*8e3e3a7aSWarner Losh 527*8e3e3a7aSWarner Losh 528*8e3e3a7aSWarner Losh static const char *lmemfind (const char *s1, size_t l1, 529*8e3e3a7aSWarner Losh const char *s2, size_t l2) { 530*8e3e3a7aSWarner Losh if (l2 == 0) return s1; /* empty strings are everywhere */ 531*8e3e3a7aSWarner Losh else if (l2 > l1) return NULL; /* avoids a negative 'l1' */ 532*8e3e3a7aSWarner Losh else { 533*8e3e3a7aSWarner Losh const char *init; /* to search for a '*s2' inside 's1' */ 534*8e3e3a7aSWarner Losh l2--; /* 1st char will be checked by 'memchr' */ 535*8e3e3a7aSWarner Losh l1 = l1-l2; /* 's2' cannot be found after that */ 536*8e3e3a7aSWarner Losh while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 537*8e3e3a7aSWarner Losh init++; /* 1st char is already checked */ 538*8e3e3a7aSWarner Losh if (memcmp(init, s2+1, l2) == 0) 539*8e3e3a7aSWarner Losh return init-1; 540*8e3e3a7aSWarner Losh else { /* correct 'l1' and 's1' to try again */ 541*8e3e3a7aSWarner Losh l1 -= init-s1; 542*8e3e3a7aSWarner Losh s1 = init; 543*8e3e3a7aSWarner Losh } 544*8e3e3a7aSWarner Losh } 545*8e3e3a7aSWarner Losh return NULL; /* not found */ 546*8e3e3a7aSWarner Losh } 547*8e3e3a7aSWarner Losh } 548*8e3e3a7aSWarner Losh 549*8e3e3a7aSWarner Losh 550*8e3e3a7aSWarner Losh static void push_onecapture (MatchState *ms, int i, const char *s, 551*8e3e3a7aSWarner Losh const char *e) { 552*8e3e3a7aSWarner Losh if (i >= ms->level) { 553*8e3e3a7aSWarner Losh if (i == 0) /* ms->level == 0, too */ 554*8e3e3a7aSWarner Losh lua_pushlstring(ms->L, s, e - s); /* add whole match */ 555*8e3e3a7aSWarner Losh else 556*8e3e3a7aSWarner Losh luaL_error(ms->L, "invalid capture index %%%d", i + 1); 557*8e3e3a7aSWarner Losh } 558*8e3e3a7aSWarner Losh else { 559*8e3e3a7aSWarner Losh ptrdiff_t l = ms->capture[i].len; 560*8e3e3a7aSWarner Losh if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); 561*8e3e3a7aSWarner Losh if (l == CAP_POSITION) 562*8e3e3a7aSWarner Losh lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1); 563*8e3e3a7aSWarner Losh else 564*8e3e3a7aSWarner Losh lua_pushlstring(ms->L, ms->capture[i].init, l); 565*8e3e3a7aSWarner Losh } 566*8e3e3a7aSWarner Losh } 567*8e3e3a7aSWarner Losh 568*8e3e3a7aSWarner Losh 569*8e3e3a7aSWarner Losh static int push_captures (MatchState *ms, const char *s, const char *e) { 570*8e3e3a7aSWarner Losh int i; 571*8e3e3a7aSWarner Losh int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 572*8e3e3a7aSWarner Losh luaL_checkstack(ms->L, nlevels, "too many captures"); 573*8e3e3a7aSWarner Losh for (i = 0; i < nlevels; i++) 574*8e3e3a7aSWarner Losh push_onecapture(ms, i, s, e); 575*8e3e3a7aSWarner Losh return nlevels; /* number of strings pushed */ 576*8e3e3a7aSWarner Losh } 577*8e3e3a7aSWarner Losh 578*8e3e3a7aSWarner Losh 579*8e3e3a7aSWarner Losh /* check whether pattern has no special characters */ 580*8e3e3a7aSWarner Losh static int nospecials (const char *p, size_t l) { 581*8e3e3a7aSWarner Losh size_t upto = 0; 582*8e3e3a7aSWarner Losh do { 583*8e3e3a7aSWarner Losh if (strpbrk(p + upto, SPECIALS)) 584*8e3e3a7aSWarner Losh return 0; /* pattern has a special character */ 585*8e3e3a7aSWarner Losh upto += strlen(p + upto) + 1; /* may have more after \0 */ 586*8e3e3a7aSWarner Losh } while (upto <= l); 587*8e3e3a7aSWarner Losh return 1; /* no special chars found */ 588*8e3e3a7aSWarner Losh } 589*8e3e3a7aSWarner Losh 590*8e3e3a7aSWarner Losh 591*8e3e3a7aSWarner Losh static void prepstate (MatchState *ms, lua_State *L, 592*8e3e3a7aSWarner Losh const char *s, size_t ls, const char *p, size_t lp) { 593*8e3e3a7aSWarner Losh ms->L = L; 594*8e3e3a7aSWarner Losh ms->matchdepth = MAXCCALLS; 595*8e3e3a7aSWarner Losh ms->src_init = s; 596*8e3e3a7aSWarner Losh ms->src_end = s + ls; 597*8e3e3a7aSWarner Losh ms->p_end = p + lp; 598*8e3e3a7aSWarner Losh } 599*8e3e3a7aSWarner Losh 600*8e3e3a7aSWarner Losh 601*8e3e3a7aSWarner Losh static void reprepstate (MatchState *ms) { 602*8e3e3a7aSWarner Losh ms->level = 0; 603*8e3e3a7aSWarner Losh lua_assert(ms->matchdepth == MAXCCALLS); 604*8e3e3a7aSWarner Losh } 605*8e3e3a7aSWarner Losh 606*8e3e3a7aSWarner Losh 607*8e3e3a7aSWarner Losh static int str_find_aux (lua_State *L, int find) { 608*8e3e3a7aSWarner Losh size_t ls, lp; 609*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &ls); 610*8e3e3a7aSWarner Losh const char *p = luaL_checklstring(L, 2, &lp); 611*8e3e3a7aSWarner Losh lua_Integer init = posrelat(luaL_optinteger(L, 3, 1), ls); 612*8e3e3a7aSWarner Losh if (init < 1) init = 1; 613*8e3e3a7aSWarner Losh else if (init > (lua_Integer)ls + 1) { /* start after string's end? */ 614*8e3e3a7aSWarner Losh lua_pushnil(L); /* cannot find anything */ 615*8e3e3a7aSWarner Losh return 1; 616*8e3e3a7aSWarner Losh } 617*8e3e3a7aSWarner Losh /* explicit request or no special characters? */ 618*8e3e3a7aSWarner Losh if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 619*8e3e3a7aSWarner Losh /* do a plain search */ 620*8e3e3a7aSWarner Losh const char *s2 = lmemfind(s + init - 1, ls - (size_t)init + 1, p, lp); 621*8e3e3a7aSWarner Losh if (s2) { 622*8e3e3a7aSWarner Losh lua_pushinteger(L, (s2 - s) + 1); 623*8e3e3a7aSWarner Losh lua_pushinteger(L, (s2 - s) + lp); 624*8e3e3a7aSWarner Losh return 2; 625*8e3e3a7aSWarner Losh } 626*8e3e3a7aSWarner Losh } 627*8e3e3a7aSWarner Losh else { 628*8e3e3a7aSWarner Losh MatchState ms; 629*8e3e3a7aSWarner Losh const char *s1 = s + init - 1; 630*8e3e3a7aSWarner Losh int anchor = (*p == '^'); 631*8e3e3a7aSWarner Losh if (anchor) { 632*8e3e3a7aSWarner Losh p++; lp--; /* skip anchor character */ 633*8e3e3a7aSWarner Losh } 634*8e3e3a7aSWarner Losh prepstate(&ms, L, s, ls, p, lp); 635*8e3e3a7aSWarner Losh do { 636*8e3e3a7aSWarner Losh const char *res; 637*8e3e3a7aSWarner Losh reprepstate(&ms); 638*8e3e3a7aSWarner Losh if ((res=match(&ms, s1, p)) != NULL) { 639*8e3e3a7aSWarner Losh if (find) { 640*8e3e3a7aSWarner Losh lua_pushinteger(L, (s1 - s) + 1); /* start */ 641*8e3e3a7aSWarner Losh lua_pushinteger(L, res - s); /* end */ 642*8e3e3a7aSWarner Losh return push_captures(&ms, NULL, 0) + 2; 643*8e3e3a7aSWarner Losh } 644*8e3e3a7aSWarner Losh else 645*8e3e3a7aSWarner Losh return push_captures(&ms, s1, res); 646*8e3e3a7aSWarner Losh } 647*8e3e3a7aSWarner Losh } while (s1++ < ms.src_end && !anchor); 648*8e3e3a7aSWarner Losh } 649*8e3e3a7aSWarner Losh lua_pushnil(L); /* not found */ 650*8e3e3a7aSWarner Losh return 1; 651*8e3e3a7aSWarner Losh } 652*8e3e3a7aSWarner Losh 653*8e3e3a7aSWarner Losh 654*8e3e3a7aSWarner Losh static int str_find (lua_State *L) { 655*8e3e3a7aSWarner Losh return str_find_aux(L, 1); 656*8e3e3a7aSWarner Losh } 657*8e3e3a7aSWarner Losh 658*8e3e3a7aSWarner Losh 659*8e3e3a7aSWarner Losh static int str_match (lua_State *L) { 660*8e3e3a7aSWarner Losh return str_find_aux(L, 0); 661*8e3e3a7aSWarner Losh } 662*8e3e3a7aSWarner Losh 663*8e3e3a7aSWarner Losh 664*8e3e3a7aSWarner Losh /* state for 'gmatch' */ 665*8e3e3a7aSWarner Losh typedef struct GMatchState { 666*8e3e3a7aSWarner Losh const char *src; /* current position */ 667*8e3e3a7aSWarner Losh const char *p; /* pattern */ 668*8e3e3a7aSWarner Losh const char *lastmatch; /* end of last match */ 669*8e3e3a7aSWarner Losh MatchState ms; /* match state */ 670*8e3e3a7aSWarner Losh } GMatchState; 671*8e3e3a7aSWarner Losh 672*8e3e3a7aSWarner Losh 673*8e3e3a7aSWarner Losh static int gmatch_aux (lua_State *L) { 674*8e3e3a7aSWarner Losh GMatchState *gm = (GMatchState *)lua_touserdata(L, lua_upvalueindex(3)); 675*8e3e3a7aSWarner Losh const char *src; 676*8e3e3a7aSWarner Losh gm->ms.L = L; 677*8e3e3a7aSWarner Losh for (src = gm->src; src <= gm->ms.src_end; src++) { 678*8e3e3a7aSWarner Losh const char *e; 679*8e3e3a7aSWarner Losh reprepstate(&gm->ms); 680*8e3e3a7aSWarner Losh if ((e = match(&gm->ms, src, gm->p)) != NULL && e != gm->lastmatch) { 681*8e3e3a7aSWarner Losh gm->src = gm->lastmatch = e; 682*8e3e3a7aSWarner Losh return push_captures(&gm->ms, src, e); 683*8e3e3a7aSWarner Losh } 684*8e3e3a7aSWarner Losh } 685*8e3e3a7aSWarner Losh return 0; /* not found */ 686*8e3e3a7aSWarner Losh } 687*8e3e3a7aSWarner Losh 688*8e3e3a7aSWarner Losh 689*8e3e3a7aSWarner Losh static int gmatch (lua_State *L) { 690*8e3e3a7aSWarner Losh size_t ls, lp; 691*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, 1, &ls); 692*8e3e3a7aSWarner Losh const char *p = luaL_checklstring(L, 2, &lp); 693*8e3e3a7aSWarner Losh GMatchState *gm; 694*8e3e3a7aSWarner Losh lua_settop(L, 2); /* keep them on closure to avoid being collected */ 695*8e3e3a7aSWarner Losh gm = (GMatchState *)lua_newuserdata(L, sizeof(GMatchState)); 696*8e3e3a7aSWarner Losh prepstate(&gm->ms, L, s, ls, p, lp); 697*8e3e3a7aSWarner Losh gm->src = s; gm->p = p; gm->lastmatch = NULL; 698*8e3e3a7aSWarner Losh lua_pushcclosure(L, gmatch_aux, 3); 699*8e3e3a7aSWarner Losh return 1; 700*8e3e3a7aSWarner Losh } 701*8e3e3a7aSWarner Losh 702*8e3e3a7aSWarner Losh 703*8e3e3a7aSWarner Losh static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 704*8e3e3a7aSWarner Losh const char *e) { 705*8e3e3a7aSWarner Losh size_t l, i; 706*8e3e3a7aSWarner Losh lua_State *L = ms->L; 707*8e3e3a7aSWarner Losh const char *news = lua_tolstring(L, 3, &l); 708*8e3e3a7aSWarner Losh for (i = 0; i < l; i++) { 709*8e3e3a7aSWarner Losh if (news[i] != L_ESC) 710*8e3e3a7aSWarner Losh luaL_addchar(b, news[i]); 711*8e3e3a7aSWarner Losh else { 712*8e3e3a7aSWarner Losh i++; /* skip ESC */ 713*8e3e3a7aSWarner Losh if (!isdigit(uchar(news[i]))) { 714*8e3e3a7aSWarner Losh if (news[i] != L_ESC) 715*8e3e3a7aSWarner Losh luaL_error(L, "invalid use of '%c' in replacement string", L_ESC); 716*8e3e3a7aSWarner Losh luaL_addchar(b, news[i]); 717*8e3e3a7aSWarner Losh } 718*8e3e3a7aSWarner Losh else if (news[i] == '0') 719*8e3e3a7aSWarner Losh luaL_addlstring(b, s, e - s); 720*8e3e3a7aSWarner Losh else { 721*8e3e3a7aSWarner Losh push_onecapture(ms, news[i] - '1', s, e); 722*8e3e3a7aSWarner Losh luaL_tolstring(L, -1, NULL); /* if number, convert it to string */ 723*8e3e3a7aSWarner Losh lua_remove(L, -2); /* remove original value */ 724*8e3e3a7aSWarner Losh luaL_addvalue(b); /* add capture to accumulated result */ 725*8e3e3a7aSWarner Losh } 726*8e3e3a7aSWarner Losh } 727*8e3e3a7aSWarner Losh } 728*8e3e3a7aSWarner Losh } 729*8e3e3a7aSWarner Losh 730*8e3e3a7aSWarner Losh 731*8e3e3a7aSWarner Losh static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, 732*8e3e3a7aSWarner Losh const char *e, int tr) { 733*8e3e3a7aSWarner Losh lua_State *L = ms->L; 734*8e3e3a7aSWarner Losh switch (tr) { 735*8e3e3a7aSWarner Losh case LUA_TFUNCTION: { 736*8e3e3a7aSWarner Losh int n; 737*8e3e3a7aSWarner Losh lua_pushvalue(L, 3); 738*8e3e3a7aSWarner Losh n = push_captures(ms, s, e); 739*8e3e3a7aSWarner Losh lua_call(L, n, 1); 740*8e3e3a7aSWarner Losh break; 741*8e3e3a7aSWarner Losh } 742*8e3e3a7aSWarner Losh case LUA_TTABLE: { 743*8e3e3a7aSWarner Losh push_onecapture(ms, 0, s, e); 744*8e3e3a7aSWarner Losh lua_gettable(L, 3); 745*8e3e3a7aSWarner Losh break; 746*8e3e3a7aSWarner Losh } 747*8e3e3a7aSWarner Losh default: { /* LUA_TNUMBER or LUA_TSTRING */ 748*8e3e3a7aSWarner Losh add_s(ms, b, s, e); 749*8e3e3a7aSWarner Losh return; 750*8e3e3a7aSWarner Losh } 751*8e3e3a7aSWarner Losh } 752*8e3e3a7aSWarner Losh if (!lua_toboolean(L, -1)) { /* nil or false? */ 753*8e3e3a7aSWarner Losh lua_pop(L, 1); 754*8e3e3a7aSWarner Losh lua_pushlstring(L, s, e - s); /* keep original text */ 755*8e3e3a7aSWarner Losh } 756*8e3e3a7aSWarner Losh else if (!lua_isstring(L, -1)) 757*8e3e3a7aSWarner Losh luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); 758*8e3e3a7aSWarner Losh luaL_addvalue(b); /* add result to accumulator */ 759*8e3e3a7aSWarner Losh } 760*8e3e3a7aSWarner Losh 761*8e3e3a7aSWarner Losh 762*8e3e3a7aSWarner Losh static int str_gsub (lua_State *L) { 763*8e3e3a7aSWarner Losh size_t srcl, lp; 764*8e3e3a7aSWarner Losh const char *src = luaL_checklstring(L, 1, &srcl); /* subject */ 765*8e3e3a7aSWarner Losh const char *p = luaL_checklstring(L, 2, &lp); /* pattern */ 766*8e3e3a7aSWarner Losh const char *lastmatch = NULL; /* end of last match */ 767*8e3e3a7aSWarner Losh int tr = lua_type(L, 3); /* replacement type */ 768*8e3e3a7aSWarner Losh lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */ 769*8e3e3a7aSWarner Losh int anchor = (*p == '^'); 770*8e3e3a7aSWarner Losh lua_Integer n = 0; /* replacement count */ 771*8e3e3a7aSWarner Losh MatchState ms; 772*8e3e3a7aSWarner Losh luaL_Buffer b; 773*8e3e3a7aSWarner Losh luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 774*8e3e3a7aSWarner Losh tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 775*8e3e3a7aSWarner Losh "string/function/table expected"); 776*8e3e3a7aSWarner Losh luaL_buffinit(L, &b); 777*8e3e3a7aSWarner Losh if (anchor) { 778*8e3e3a7aSWarner Losh p++; lp--; /* skip anchor character */ 779*8e3e3a7aSWarner Losh } 780*8e3e3a7aSWarner Losh prepstate(&ms, L, src, srcl, p, lp); 781*8e3e3a7aSWarner Losh while (n < max_s) { 782*8e3e3a7aSWarner Losh const char *e; 783*8e3e3a7aSWarner Losh reprepstate(&ms); /* (re)prepare state for new match */ 784*8e3e3a7aSWarner Losh if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */ 785*8e3e3a7aSWarner Losh n++; 786*8e3e3a7aSWarner Losh add_value(&ms, &b, src, e, tr); /* add replacement to buffer */ 787*8e3e3a7aSWarner Losh src = lastmatch = e; 788*8e3e3a7aSWarner Losh } 789*8e3e3a7aSWarner Losh else if (src < ms.src_end) /* otherwise, skip one character */ 790*8e3e3a7aSWarner Losh luaL_addchar(&b, *src++); 791*8e3e3a7aSWarner Losh else break; /* end of subject */ 792*8e3e3a7aSWarner Losh if (anchor) break; 793*8e3e3a7aSWarner Losh } 794*8e3e3a7aSWarner Losh luaL_addlstring(&b, src, ms.src_end-src); 795*8e3e3a7aSWarner Losh luaL_pushresult(&b); 796*8e3e3a7aSWarner Losh lua_pushinteger(L, n); /* number of substitutions */ 797*8e3e3a7aSWarner Losh return 2; 798*8e3e3a7aSWarner Losh } 799*8e3e3a7aSWarner Losh 800*8e3e3a7aSWarner Losh /* }====================================================== */ 801*8e3e3a7aSWarner Losh 802*8e3e3a7aSWarner Losh 803*8e3e3a7aSWarner Losh 804*8e3e3a7aSWarner Losh /* 805*8e3e3a7aSWarner Losh ** {====================================================== 806*8e3e3a7aSWarner Losh ** STRING FORMAT 807*8e3e3a7aSWarner Losh ** ======================================================= 808*8e3e3a7aSWarner Losh */ 809*8e3e3a7aSWarner Losh 810*8e3e3a7aSWarner Losh #if !defined(lua_number2strx) /* { */ 811*8e3e3a7aSWarner Losh 812*8e3e3a7aSWarner Losh /* 813*8e3e3a7aSWarner Losh ** Hexadecimal floating-point formatter 814*8e3e3a7aSWarner Losh */ 815*8e3e3a7aSWarner Losh 816*8e3e3a7aSWarner Losh #include <math.h> 817*8e3e3a7aSWarner Losh 818*8e3e3a7aSWarner Losh #define SIZELENMOD (sizeof(LUA_NUMBER_FRMLEN)/sizeof(char)) 819*8e3e3a7aSWarner Losh 820*8e3e3a7aSWarner Losh 821*8e3e3a7aSWarner Losh /* 822*8e3e3a7aSWarner Losh ** Number of bits that goes into the first digit. It can be any value 823*8e3e3a7aSWarner Losh ** between 1 and 4; the following definition tries to align the number 824*8e3e3a7aSWarner Losh ** to nibble boundaries by making what is left after that first digit a 825*8e3e3a7aSWarner Losh ** multiple of 4. 826*8e3e3a7aSWarner Losh */ 827*8e3e3a7aSWarner Losh #define L_NBFD ((l_mathlim(MANT_DIG) - 1)%4 + 1) 828*8e3e3a7aSWarner Losh 829*8e3e3a7aSWarner Losh 830*8e3e3a7aSWarner Losh /* 831*8e3e3a7aSWarner Losh ** Add integer part of 'x' to buffer and return new 'x' 832*8e3e3a7aSWarner Losh */ 833*8e3e3a7aSWarner Losh static lua_Number adddigit (char *buff, int n, lua_Number x) { 834*8e3e3a7aSWarner Losh lua_Number dd = l_mathop(floor)(x); /* get integer part from 'x' */ 835*8e3e3a7aSWarner Losh int d = (int)dd; 836*8e3e3a7aSWarner Losh buff[n] = (d < 10 ? d + '0' : d - 10 + 'a'); /* add to buffer */ 837*8e3e3a7aSWarner Losh return x - dd; /* return what is left */ 838*8e3e3a7aSWarner Losh } 839*8e3e3a7aSWarner Losh 840*8e3e3a7aSWarner Losh 841*8e3e3a7aSWarner Losh static int num2straux (char *buff, int sz, lua_Number x) { 842*8e3e3a7aSWarner Losh /* if 'inf' or 'NaN', format it like '%g' */ 843*8e3e3a7aSWarner Losh if (x != x || x == (lua_Number)HUGE_VAL || x == -(lua_Number)HUGE_VAL) 844*8e3e3a7aSWarner Losh return l_sprintf(buff, sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)x); 845*8e3e3a7aSWarner Losh else if (x == 0) { /* can be -0... */ 846*8e3e3a7aSWarner Losh /* create "0" or "-0" followed by exponent */ 847*8e3e3a7aSWarner Losh return l_sprintf(buff, sz, LUA_NUMBER_FMT "x0p+0", (LUAI_UACNUMBER)x); 848*8e3e3a7aSWarner Losh } 849*8e3e3a7aSWarner Losh else { 850*8e3e3a7aSWarner Losh int e; 851*8e3e3a7aSWarner Losh lua_Number m = l_mathop(frexp)(x, &e); /* 'x' fraction and exponent */ 852*8e3e3a7aSWarner Losh int n = 0; /* character count */ 853*8e3e3a7aSWarner Losh if (m < 0) { /* is number negative? */ 854*8e3e3a7aSWarner Losh buff[n++] = '-'; /* add signal */ 855*8e3e3a7aSWarner Losh m = -m; /* make it positive */ 856*8e3e3a7aSWarner Losh } 857*8e3e3a7aSWarner Losh buff[n++] = '0'; buff[n++] = 'x'; /* add "0x" */ 858*8e3e3a7aSWarner Losh m = adddigit(buff, n++, m * (1 << L_NBFD)); /* add first digit */ 859*8e3e3a7aSWarner Losh e -= L_NBFD; /* this digit goes before the radix point */ 860*8e3e3a7aSWarner Losh if (m > 0) { /* more digits? */ 861*8e3e3a7aSWarner Losh buff[n++] = lua_getlocaledecpoint(); /* add radix point */ 862*8e3e3a7aSWarner Losh do { /* add as many digits as needed */ 863*8e3e3a7aSWarner Losh m = adddigit(buff, n++, m * 16); 864*8e3e3a7aSWarner Losh } while (m > 0); 865*8e3e3a7aSWarner Losh } 866*8e3e3a7aSWarner Losh n += l_sprintf(buff + n, sz - n, "p%+d", e); /* add exponent */ 867*8e3e3a7aSWarner Losh lua_assert(n < sz); 868*8e3e3a7aSWarner Losh return n; 869*8e3e3a7aSWarner Losh } 870*8e3e3a7aSWarner Losh } 871*8e3e3a7aSWarner Losh 872*8e3e3a7aSWarner Losh 873*8e3e3a7aSWarner Losh static int lua_number2strx (lua_State *L, char *buff, int sz, 874*8e3e3a7aSWarner Losh const char *fmt, lua_Number x) { 875*8e3e3a7aSWarner Losh int n = num2straux(buff, sz, x); 876*8e3e3a7aSWarner Losh if (fmt[SIZELENMOD] == 'A') { 877*8e3e3a7aSWarner Losh int i; 878*8e3e3a7aSWarner Losh for (i = 0; i < n; i++) 879*8e3e3a7aSWarner Losh buff[i] = toupper(uchar(buff[i])); 880*8e3e3a7aSWarner Losh } 881*8e3e3a7aSWarner Losh else if (fmt[SIZELENMOD] != 'a') 882*8e3e3a7aSWarner Losh luaL_error(L, "modifiers for format '%%a'/'%%A' not implemented"); 883*8e3e3a7aSWarner Losh return n; 884*8e3e3a7aSWarner Losh } 885*8e3e3a7aSWarner Losh 886*8e3e3a7aSWarner Losh #endif /* } */ 887*8e3e3a7aSWarner Losh 888*8e3e3a7aSWarner Losh 889*8e3e3a7aSWarner Losh /* 890*8e3e3a7aSWarner Losh ** Maximum size of each formatted item. This maximum size is produced 891*8e3e3a7aSWarner Losh ** by format('%.99f', -maxfloat), and is equal to 99 + 3 ('-', '.', 892*8e3e3a7aSWarner Losh ** and '\0') + number of decimal digits to represent maxfloat (which 893*8e3e3a7aSWarner Losh ** is maximum exponent + 1). (99+3+1 then rounded to 120 for "extra 894*8e3e3a7aSWarner Losh ** expenses", such as locale-dependent stuff) 895*8e3e3a7aSWarner Losh */ 896*8e3e3a7aSWarner Losh #define MAX_ITEM (120 + l_mathlim(MAX_10_EXP)) 897*8e3e3a7aSWarner Losh 898*8e3e3a7aSWarner Losh 899*8e3e3a7aSWarner Losh /* valid flags in a format specification */ 900*8e3e3a7aSWarner Losh #define FLAGS "-+ #0" 901*8e3e3a7aSWarner Losh 902*8e3e3a7aSWarner Losh /* 903*8e3e3a7aSWarner Losh ** maximum size of each format specification (such as "%-099.99d") 904*8e3e3a7aSWarner Losh */ 905*8e3e3a7aSWarner Losh #define MAX_FORMAT 32 906*8e3e3a7aSWarner Losh 907*8e3e3a7aSWarner Losh 908*8e3e3a7aSWarner Losh static void addquoted (luaL_Buffer *b, const char *s, size_t len) { 909*8e3e3a7aSWarner Losh luaL_addchar(b, '"'); 910*8e3e3a7aSWarner Losh while (len--) { 911*8e3e3a7aSWarner Losh if (*s == '"' || *s == '\\' || *s == '\n') { 912*8e3e3a7aSWarner Losh luaL_addchar(b, '\\'); 913*8e3e3a7aSWarner Losh luaL_addchar(b, *s); 914*8e3e3a7aSWarner Losh } 915*8e3e3a7aSWarner Losh else if (iscntrl(uchar(*s))) { 916*8e3e3a7aSWarner Losh char buff[10]; 917*8e3e3a7aSWarner Losh if (!isdigit(uchar(*(s+1)))) 918*8e3e3a7aSWarner Losh l_sprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); 919*8e3e3a7aSWarner Losh else 920*8e3e3a7aSWarner Losh l_sprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); 921*8e3e3a7aSWarner Losh luaL_addstring(b, buff); 922*8e3e3a7aSWarner Losh } 923*8e3e3a7aSWarner Losh else 924*8e3e3a7aSWarner Losh luaL_addchar(b, *s); 925*8e3e3a7aSWarner Losh s++; 926*8e3e3a7aSWarner Losh } 927*8e3e3a7aSWarner Losh luaL_addchar(b, '"'); 928*8e3e3a7aSWarner Losh } 929*8e3e3a7aSWarner Losh 930*8e3e3a7aSWarner Losh 931*8e3e3a7aSWarner Losh /* 932*8e3e3a7aSWarner Losh ** Ensures the 'buff' string uses a dot as the radix character. 933*8e3e3a7aSWarner Losh */ 934*8e3e3a7aSWarner Losh static void checkdp (char *buff, int nb) { 935*8e3e3a7aSWarner Losh if (memchr(buff, '.', nb) == NULL) { /* no dot? */ 936*8e3e3a7aSWarner Losh char point = lua_getlocaledecpoint(); /* try locale point */ 937*8e3e3a7aSWarner Losh char *ppoint = (char *)memchr(buff, point, nb); 938*8e3e3a7aSWarner Losh if (ppoint) *ppoint = '.'; /* change it to a dot */ 939*8e3e3a7aSWarner Losh } 940*8e3e3a7aSWarner Losh } 941*8e3e3a7aSWarner Losh 942*8e3e3a7aSWarner Losh 943*8e3e3a7aSWarner Losh static void addliteral (lua_State *L, luaL_Buffer *b, int arg) { 944*8e3e3a7aSWarner Losh switch (lua_type(L, arg)) { 945*8e3e3a7aSWarner Losh case LUA_TSTRING: { 946*8e3e3a7aSWarner Losh size_t len; 947*8e3e3a7aSWarner Losh const char *s = lua_tolstring(L, arg, &len); 948*8e3e3a7aSWarner Losh addquoted(b, s, len); 949*8e3e3a7aSWarner Losh break; 950*8e3e3a7aSWarner Losh } 951*8e3e3a7aSWarner Losh case LUA_TNUMBER: { 952*8e3e3a7aSWarner Losh char *buff = luaL_prepbuffsize(b, MAX_ITEM); 953*8e3e3a7aSWarner Losh int nb; 954*8e3e3a7aSWarner Losh if (!lua_isinteger(L, arg)) { /* float? */ 955*8e3e3a7aSWarner Losh lua_Number n = lua_tonumber(L, arg); /* write as hexa ('%a') */ 956*8e3e3a7aSWarner Losh nb = lua_number2strx(L, buff, MAX_ITEM, "%" LUA_NUMBER_FRMLEN "a", n); 957*8e3e3a7aSWarner Losh checkdp(buff, nb); /* ensure it uses a dot */ 958*8e3e3a7aSWarner Losh } 959*8e3e3a7aSWarner Losh else { /* integers */ 960*8e3e3a7aSWarner Losh lua_Integer n = lua_tointeger(L, arg); 961*8e3e3a7aSWarner Losh const char *format = (n == LUA_MININTEGER) /* corner case? */ 962*8e3e3a7aSWarner Losh ? "0x%" LUA_INTEGER_FRMLEN "x" /* use hexa */ 963*8e3e3a7aSWarner Losh : LUA_INTEGER_FMT; /* else use default format */ 964*8e3e3a7aSWarner Losh nb = l_sprintf(buff, MAX_ITEM, format, (LUAI_UACINT)n); 965*8e3e3a7aSWarner Losh } 966*8e3e3a7aSWarner Losh luaL_addsize(b, nb); 967*8e3e3a7aSWarner Losh break; 968*8e3e3a7aSWarner Losh } 969*8e3e3a7aSWarner Losh case LUA_TNIL: case LUA_TBOOLEAN: { 970*8e3e3a7aSWarner Losh luaL_tolstring(L, arg, NULL); 971*8e3e3a7aSWarner Losh luaL_addvalue(b); 972*8e3e3a7aSWarner Losh break; 973*8e3e3a7aSWarner Losh } 974*8e3e3a7aSWarner Losh default: { 975*8e3e3a7aSWarner Losh luaL_argerror(L, arg, "value has no literal form"); 976*8e3e3a7aSWarner Losh } 977*8e3e3a7aSWarner Losh } 978*8e3e3a7aSWarner Losh } 979*8e3e3a7aSWarner Losh 980*8e3e3a7aSWarner Losh 981*8e3e3a7aSWarner Losh static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 982*8e3e3a7aSWarner Losh const char *p = strfrmt; 983*8e3e3a7aSWarner Losh while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ 984*8e3e3a7aSWarner Losh if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char)) 985*8e3e3a7aSWarner Losh luaL_error(L, "invalid format (repeated flags)"); 986*8e3e3a7aSWarner Losh if (isdigit(uchar(*p))) p++; /* skip width */ 987*8e3e3a7aSWarner Losh if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 988*8e3e3a7aSWarner Losh if (*p == '.') { 989*8e3e3a7aSWarner Losh p++; 990*8e3e3a7aSWarner Losh if (isdigit(uchar(*p))) p++; /* skip precision */ 991*8e3e3a7aSWarner Losh if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 992*8e3e3a7aSWarner Losh } 993*8e3e3a7aSWarner Losh if (isdigit(uchar(*p))) 994*8e3e3a7aSWarner Losh luaL_error(L, "invalid format (width or precision too long)"); 995*8e3e3a7aSWarner Losh *(form++) = '%'; 996*8e3e3a7aSWarner Losh memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char)); 997*8e3e3a7aSWarner Losh form += (p - strfrmt) + 1; 998*8e3e3a7aSWarner Losh *form = '\0'; 999*8e3e3a7aSWarner Losh return p; 1000*8e3e3a7aSWarner Losh } 1001*8e3e3a7aSWarner Losh 1002*8e3e3a7aSWarner Losh 1003*8e3e3a7aSWarner Losh /* 1004*8e3e3a7aSWarner Losh ** add length modifier into formats 1005*8e3e3a7aSWarner Losh */ 1006*8e3e3a7aSWarner Losh static void addlenmod (char *form, const char *lenmod) { 1007*8e3e3a7aSWarner Losh size_t l = strlen(form); 1008*8e3e3a7aSWarner Losh size_t lm = strlen(lenmod); 1009*8e3e3a7aSWarner Losh char spec = form[l - 1]; 1010*8e3e3a7aSWarner Losh strcpy(form + l - 1, lenmod); 1011*8e3e3a7aSWarner Losh form[l + lm - 1] = spec; 1012*8e3e3a7aSWarner Losh form[l + lm] = '\0'; 1013*8e3e3a7aSWarner Losh } 1014*8e3e3a7aSWarner Losh 1015*8e3e3a7aSWarner Losh 1016*8e3e3a7aSWarner Losh static int str_format (lua_State *L) { 1017*8e3e3a7aSWarner Losh int top = lua_gettop(L); 1018*8e3e3a7aSWarner Losh int arg = 1; 1019*8e3e3a7aSWarner Losh size_t sfl; 1020*8e3e3a7aSWarner Losh const char *strfrmt = luaL_checklstring(L, arg, &sfl); 1021*8e3e3a7aSWarner Losh const char *strfrmt_end = strfrmt+sfl; 1022*8e3e3a7aSWarner Losh luaL_Buffer b; 1023*8e3e3a7aSWarner Losh luaL_buffinit(L, &b); 1024*8e3e3a7aSWarner Losh while (strfrmt < strfrmt_end) { 1025*8e3e3a7aSWarner Losh if (*strfrmt != L_ESC) 1026*8e3e3a7aSWarner Losh luaL_addchar(&b, *strfrmt++); 1027*8e3e3a7aSWarner Losh else if (*++strfrmt == L_ESC) 1028*8e3e3a7aSWarner Losh luaL_addchar(&b, *strfrmt++); /* %% */ 1029*8e3e3a7aSWarner Losh else { /* format item */ 1030*8e3e3a7aSWarner Losh char form[MAX_FORMAT]; /* to store the format ('%...') */ 1031*8e3e3a7aSWarner Losh char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ 1032*8e3e3a7aSWarner Losh int nb = 0; /* number of bytes in added item */ 1033*8e3e3a7aSWarner Losh if (++arg > top) 1034*8e3e3a7aSWarner Losh luaL_argerror(L, arg, "no value"); 1035*8e3e3a7aSWarner Losh strfrmt = scanformat(L, strfrmt, form); 1036*8e3e3a7aSWarner Losh switch (*strfrmt++) { 1037*8e3e3a7aSWarner Losh case 'c': { 1038*8e3e3a7aSWarner Losh nb = l_sprintf(buff, MAX_ITEM, form, (int)luaL_checkinteger(L, arg)); 1039*8e3e3a7aSWarner Losh break; 1040*8e3e3a7aSWarner Losh } 1041*8e3e3a7aSWarner Losh case 'd': case 'i': 1042*8e3e3a7aSWarner Losh case 'o': case 'u': case 'x': case 'X': { 1043*8e3e3a7aSWarner Losh lua_Integer n = luaL_checkinteger(L, arg); 1044*8e3e3a7aSWarner Losh addlenmod(form, LUA_INTEGER_FRMLEN); 1045*8e3e3a7aSWarner Losh nb = l_sprintf(buff, MAX_ITEM, form, (LUAI_UACINT)n); 1046*8e3e3a7aSWarner Losh break; 1047*8e3e3a7aSWarner Losh } 1048*8e3e3a7aSWarner Losh case 'a': case 'A': 1049*8e3e3a7aSWarner Losh addlenmod(form, LUA_NUMBER_FRMLEN); 1050*8e3e3a7aSWarner Losh nb = lua_number2strx(L, buff, MAX_ITEM, form, 1051*8e3e3a7aSWarner Losh luaL_checknumber(L, arg)); 1052*8e3e3a7aSWarner Losh break; 1053*8e3e3a7aSWarner Losh case 'e': case 'E': case 'f': 1054*8e3e3a7aSWarner Losh case 'g': case 'G': { 1055*8e3e3a7aSWarner Losh lua_Number n = luaL_checknumber(L, arg); 1056*8e3e3a7aSWarner Losh addlenmod(form, LUA_NUMBER_FRMLEN); 1057*8e3e3a7aSWarner Losh nb = l_sprintf(buff, MAX_ITEM, form, (LUAI_UACNUMBER)n); 1058*8e3e3a7aSWarner Losh break; 1059*8e3e3a7aSWarner Losh } 1060*8e3e3a7aSWarner Losh case 'q': { 1061*8e3e3a7aSWarner Losh addliteral(L, &b, arg); 1062*8e3e3a7aSWarner Losh break; 1063*8e3e3a7aSWarner Losh } 1064*8e3e3a7aSWarner Losh case 's': { 1065*8e3e3a7aSWarner Losh size_t l; 1066*8e3e3a7aSWarner Losh const char *s = luaL_tolstring(L, arg, &l); 1067*8e3e3a7aSWarner Losh if (form[2] == '\0') /* no modifiers? */ 1068*8e3e3a7aSWarner Losh luaL_addvalue(&b); /* keep entire string */ 1069*8e3e3a7aSWarner Losh else { 1070*8e3e3a7aSWarner Losh luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); 1071*8e3e3a7aSWarner Losh if (!strchr(form, '.') && l >= 100) { 1072*8e3e3a7aSWarner Losh /* no precision and string is too long to be formatted */ 1073*8e3e3a7aSWarner Losh luaL_addvalue(&b); /* keep entire string */ 1074*8e3e3a7aSWarner Losh } 1075*8e3e3a7aSWarner Losh else { /* format the string into 'buff' */ 1076*8e3e3a7aSWarner Losh nb = l_sprintf(buff, MAX_ITEM, form, s); 1077*8e3e3a7aSWarner Losh lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 1078*8e3e3a7aSWarner Losh } 1079*8e3e3a7aSWarner Losh } 1080*8e3e3a7aSWarner Losh break; 1081*8e3e3a7aSWarner Losh } 1082*8e3e3a7aSWarner Losh default: { /* also treat cases 'pnLlh' */ 1083*8e3e3a7aSWarner Losh return luaL_error(L, "invalid option '%%%c' to 'format'", 1084*8e3e3a7aSWarner Losh *(strfrmt - 1)); 1085*8e3e3a7aSWarner Losh } 1086*8e3e3a7aSWarner Losh } 1087*8e3e3a7aSWarner Losh lua_assert(nb < MAX_ITEM); 1088*8e3e3a7aSWarner Losh luaL_addsize(&b, nb); 1089*8e3e3a7aSWarner Losh } 1090*8e3e3a7aSWarner Losh } 1091*8e3e3a7aSWarner Losh luaL_pushresult(&b); 1092*8e3e3a7aSWarner Losh return 1; 1093*8e3e3a7aSWarner Losh } 1094*8e3e3a7aSWarner Losh 1095*8e3e3a7aSWarner Losh /* }====================================================== */ 1096*8e3e3a7aSWarner Losh 1097*8e3e3a7aSWarner Losh 1098*8e3e3a7aSWarner Losh /* 1099*8e3e3a7aSWarner Losh ** {====================================================== 1100*8e3e3a7aSWarner Losh ** PACK/UNPACK 1101*8e3e3a7aSWarner Losh ** ======================================================= 1102*8e3e3a7aSWarner Losh */ 1103*8e3e3a7aSWarner Losh 1104*8e3e3a7aSWarner Losh 1105*8e3e3a7aSWarner Losh /* value used for padding */ 1106*8e3e3a7aSWarner Losh #if !defined(LUAL_PACKPADBYTE) 1107*8e3e3a7aSWarner Losh #define LUAL_PACKPADBYTE 0x00 1108*8e3e3a7aSWarner Losh #endif 1109*8e3e3a7aSWarner Losh 1110*8e3e3a7aSWarner Losh /* maximum size for the binary representation of an integer */ 1111*8e3e3a7aSWarner Losh #define MAXINTSIZE 16 1112*8e3e3a7aSWarner Losh 1113*8e3e3a7aSWarner Losh /* number of bits in a character */ 1114*8e3e3a7aSWarner Losh #define NB CHAR_BIT 1115*8e3e3a7aSWarner Losh 1116*8e3e3a7aSWarner Losh /* mask for one character (NB 1's) */ 1117*8e3e3a7aSWarner Losh #define MC ((1 << NB) - 1) 1118*8e3e3a7aSWarner Losh 1119*8e3e3a7aSWarner Losh /* size of a lua_Integer */ 1120*8e3e3a7aSWarner Losh #define SZINT ((int)sizeof(lua_Integer)) 1121*8e3e3a7aSWarner Losh 1122*8e3e3a7aSWarner Losh 1123*8e3e3a7aSWarner Losh /* dummy union to get native endianness */ 1124*8e3e3a7aSWarner Losh static const union { 1125*8e3e3a7aSWarner Losh int dummy; 1126*8e3e3a7aSWarner Losh char little; /* true iff machine is little endian */ 1127*8e3e3a7aSWarner Losh } nativeendian = {1}; 1128*8e3e3a7aSWarner Losh 1129*8e3e3a7aSWarner Losh 1130*8e3e3a7aSWarner Losh /* dummy structure to get native alignment requirements */ 1131*8e3e3a7aSWarner Losh struct cD { 1132*8e3e3a7aSWarner Losh char c; 1133*8e3e3a7aSWarner Losh union { double d; void *p; lua_Integer i; lua_Number n; } u; 1134*8e3e3a7aSWarner Losh }; 1135*8e3e3a7aSWarner Losh 1136*8e3e3a7aSWarner Losh #define MAXALIGN (offsetof(struct cD, u)) 1137*8e3e3a7aSWarner Losh 1138*8e3e3a7aSWarner Losh 1139*8e3e3a7aSWarner Losh /* 1140*8e3e3a7aSWarner Losh ** Union for serializing floats 1141*8e3e3a7aSWarner Losh */ 1142*8e3e3a7aSWarner Losh typedef union Ftypes { 1143*8e3e3a7aSWarner Losh float f; 1144*8e3e3a7aSWarner Losh double d; 1145*8e3e3a7aSWarner Losh lua_Number n; 1146*8e3e3a7aSWarner Losh char buff[5 * sizeof(lua_Number)]; /* enough for any float type */ 1147*8e3e3a7aSWarner Losh } Ftypes; 1148*8e3e3a7aSWarner Losh 1149*8e3e3a7aSWarner Losh 1150*8e3e3a7aSWarner Losh /* 1151*8e3e3a7aSWarner Losh ** information to pack/unpack stuff 1152*8e3e3a7aSWarner Losh */ 1153*8e3e3a7aSWarner Losh typedef struct Header { 1154*8e3e3a7aSWarner Losh lua_State *L; 1155*8e3e3a7aSWarner Losh int islittle; 1156*8e3e3a7aSWarner Losh int maxalign; 1157*8e3e3a7aSWarner Losh } Header; 1158*8e3e3a7aSWarner Losh 1159*8e3e3a7aSWarner Losh 1160*8e3e3a7aSWarner Losh /* 1161*8e3e3a7aSWarner Losh ** options for pack/unpack 1162*8e3e3a7aSWarner Losh */ 1163*8e3e3a7aSWarner Losh typedef enum KOption { 1164*8e3e3a7aSWarner Losh Kint, /* signed integers */ 1165*8e3e3a7aSWarner Losh Kuint, /* unsigned integers */ 1166*8e3e3a7aSWarner Losh Kfloat, /* floating-point numbers */ 1167*8e3e3a7aSWarner Losh Kchar, /* fixed-length strings */ 1168*8e3e3a7aSWarner Losh Kstring, /* strings with prefixed length */ 1169*8e3e3a7aSWarner Losh Kzstr, /* zero-terminated strings */ 1170*8e3e3a7aSWarner Losh Kpadding, /* padding */ 1171*8e3e3a7aSWarner Losh Kpaddalign, /* padding for alignment */ 1172*8e3e3a7aSWarner Losh Knop /* no-op (configuration or spaces) */ 1173*8e3e3a7aSWarner Losh } KOption; 1174*8e3e3a7aSWarner Losh 1175*8e3e3a7aSWarner Losh 1176*8e3e3a7aSWarner Losh /* 1177*8e3e3a7aSWarner Losh ** Read an integer numeral from string 'fmt' or return 'df' if 1178*8e3e3a7aSWarner Losh ** there is no numeral 1179*8e3e3a7aSWarner Losh */ 1180*8e3e3a7aSWarner Losh static int digit (int c) { return '0' <= c && c <= '9'; } 1181*8e3e3a7aSWarner Losh 1182*8e3e3a7aSWarner Losh static int getnum (const char **fmt, int df) { 1183*8e3e3a7aSWarner Losh if (!digit(**fmt)) /* no number? */ 1184*8e3e3a7aSWarner Losh return df; /* return default value */ 1185*8e3e3a7aSWarner Losh else { 1186*8e3e3a7aSWarner Losh int a = 0; 1187*8e3e3a7aSWarner Losh do { 1188*8e3e3a7aSWarner Losh a = a*10 + (*((*fmt)++) - '0'); 1189*8e3e3a7aSWarner Losh } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); 1190*8e3e3a7aSWarner Losh return a; 1191*8e3e3a7aSWarner Losh } 1192*8e3e3a7aSWarner Losh } 1193*8e3e3a7aSWarner Losh 1194*8e3e3a7aSWarner Losh 1195*8e3e3a7aSWarner Losh /* 1196*8e3e3a7aSWarner Losh ** Read an integer numeral and raises an error if it is larger 1197*8e3e3a7aSWarner Losh ** than the maximum size for integers. 1198*8e3e3a7aSWarner Losh */ 1199*8e3e3a7aSWarner Losh static int getnumlimit (Header *h, const char **fmt, int df) { 1200*8e3e3a7aSWarner Losh int sz = getnum(fmt, df); 1201*8e3e3a7aSWarner Losh if (sz > MAXINTSIZE || sz <= 0) 1202*8e3e3a7aSWarner Losh luaL_error(h->L, "integral size (%d) out of limits [1,%d]", 1203*8e3e3a7aSWarner Losh sz, MAXINTSIZE); 1204*8e3e3a7aSWarner Losh return sz; 1205*8e3e3a7aSWarner Losh } 1206*8e3e3a7aSWarner Losh 1207*8e3e3a7aSWarner Losh 1208*8e3e3a7aSWarner Losh /* 1209*8e3e3a7aSWarner Losh ** Initialize Header 1210*8e3e3a7aSWarner Losh */ 1211*8e3e3a7aSWarner Losh static void initheader (lua_State *L, Header *h) { 1212*8e3e3a7aSWarner Losh h->L = L; 1213*8e3e3a7aSWarner Losh h->islittle = nativeendian.little; 1214*8e3e3a7aSWarner Losh h->maxalign = 1; 1215*8e3e3a7aSWarner Losh } 1216*8e3e3a7aSWarner Losh 1217*8e3e3a7aSWarner Losh 1218*8e3e3a7aSWarner Losh /* 1219*8e3e3a7aSWarner Losh ** Read and classify next option. 'size' is filled with option's size. 1220*8e3e3a7aSWarner Losh */ 1221*8e3e3a7aSWarner Losh static KOption getoption (Header *h, const char **fmt, int *size) { 1222*8e3e3a7aSWarner Losh int opt = *((*fmt)++); 1223*8e3e3a7aSWarner Losh *size = 0; /* default */ 1224*8e3e3a7aSWarner Losh switch (opt) { 1225*8e3e3a7aSWarner Losh case 'b': *size = sizeof(char); return Kint; 1226*8e3e3a7aSWarner Losh case 'B': *size = sizeof(char); return Kuint; 1227*8e3e3a7aSWarner Losh case 'h': *size = sizeof(short); return Kint; 1228*8e3e3a7aSWarner Losh case 'H': *size = sizeof(short); return Kuint; 1229*8e3e3a7aSWarner Losh case 'l': *size = sizeof(long); return Kint; 1230*8e3e3a7aSWarner Losh case 'L': *size = sizeof(long); return Kuint; 1231*8e3e3a7aSWarner Losh case 'j': *size = sizeof(lua_Integer); return Kint; 1232*8e3e3a7aSWarner Losh case 'J': *size = sizeof(lua_Integer); return Kuint; 1233*8e3e3a7aSWarner Losh case 'T': *size = sizeof(size_t); return Kuint; 1234*8e3e3a7aSWarner Losh case 'f': *size = sizeof(float); return Kfloat; 1235*8e3e3a7aSWarner Losh case 'd': *size = sizeof(double); return Kfloat; 1236*8e3e3a7aSWarner Losh case 'n': *size = sizeof(lua_Number); return Kfloat; 1237*8e3e3a7aSWarner Losh case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; 1238*8e3e3a7aSWarner Losh case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; 1239*8e3e3a7aSWarner Losh case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; 1240*8e3e3a7aSWarner Losh case 'c': 1241*8e3e3a7aSWarner Losh *size = getnum(fmt, -1); 1242*8e3e3a7aSWarner Losh if (*size == -1) 1243*8e3e3a7aSWarner Losh luaL_error(h->L, "missing size for format option 'c'"); 1244*8e3e3a7aSWarner Losh return Kchar; 1245*8e3e3a7aSWarner Losh case 'z': return Kzstr; 1246*8e3e3a7aSWarner Losh case 'x': *size = 1; return Kpadding; 1247*8e3e3a7aSWarner Losh case 'X': return Kpaddalign; 1248*8e3e3a7aSWarner Losh case ' ': break; 1249*8e3e3a7aSWarner Losh case '<': h->islittle = 1; break; 1250*8e3e3a7aSWarner Losh case '>': h->islittle = 0; break; 1251*8e3e3a7aSWarner Losh case '=': h->islittle = nativeendian.little; break; 1252*8e3e3a7aSWarner Losh case '!': h->maxalign = getnumlimit(h, fmt, MAXALIGN); break; 1253*8e3e3a7aSWarner Losh default: luaL_error(h->L, "invalid format option '%c'", opt); 1254*8e3e3a7aSWarner Losh } 1255*8e3e3a7aSWarner Losh return Knop; 1256*8e3e3a7aSWarner Losh } 1257*8e3e3a7aSWarner Losh 1258*8e3e3a7aSWarner Losh 1259*8e3e3a7aSWarner Losh /* 1260*8e3e3a7aSWarner Losh ** Read, classify, and fill other details about the next option. 1261*8e3e3a7aSWarner Losh ** 'psize' is filled with option's size, 'notoalign' with its 1262*8e3e3a7aSWarner Losh ** alignment requirements. 1263*8e3e3a7aSWarner Losh ** Local variable 'size' gets the size to be aligned. (Kpadal option 1264*8e3e3a7aSWarner Losh ** always gets its full alignment, other options are limited by 1265*8e3e3a7aSWarner Losh ** the maximum alignment ('maxalign'). Kchar option needs no alignment 1266*8e3e3a7aSWarner Losh ** despite its size. 1267*8e3e3a7aSWarner Losh */ 1268*8e3e3a7aSWarner Losh static KOption getdetails (Header *h, size_t totalsize, 1269*8e3e3a7aSWarner Losh const char **fmt, int *psize, int *ntoalign) { 1270*8e3e3a7aSWarner Losh KOption opt = getoption(h, fmt, psize); 1271*8e3e3a7aSWarner Losh int align = *psize; /* usually, alignment follows size */ 1272*8e3e3a7aSWarner Losh if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ 1273*8e3e3a7aSWarner Losh if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) 1274*8e3e3a7aSWarner Losh luaL_argerror(h->L, 1, "invalid next option for option 'X'"); 1275*8e3e3a7aSWarner Losh } 1276*8e3e3a7aSWarner Losh if (align <= 1 || opt == Kchar) /* need no alignment? */ 1277*8e3e3a7aSWarner Losh *ntoalign = 0; 1278*8e3e3a7aSWarner Losh else { 1279*8e3e3a7aSWarner Losh if (align > h->maxalign) /* enforce maximum alignment */ 1280*8e3e3a7aSWarner Losh align = h->maxalign; 1281*8e3e3a7aSWarner Losh if ((align & (align - 1)) != 0) /* is 'align' not a power of 2? */ 1282*8e3e3a7aSWarner Losh luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); 1283*8e3e3a7aSWarner Losh *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); 1284*8e3e3a7aSWarner Losh } 1285*8e3e3a7aSWarner Losh return opt; 1286*8e3e3a7aSWarner Losh } 1287*8e3e3a7aSWarner Losh 1288*8e3e3a7aSWarner Losh 1289*8e3e3a7aSWarner Losh /* 1290*8e3e3a7aSWarner Losh ** Pack integer 'n' with 'size' bytes and 'islittle' endianness. 1291*8e3e3a7aSWarner Losh ** The final 'if' handles the case when 'size' is larger than 1292*8e3e3a7aSWarner Losh ** the size of a Lua integer, correcting the extra sign-extension 1293*8e3e3a7aSWarner Losh ** bytes if necessary (by default they would be zeros). 1294*8e3e3a7aSWarner Losh */ 1295*8e3e3a7aSWarner Losh static void packint (luaL_Buffer *b, lua_Unsigned n, 1296*8e3e3a7aSWarner Losh int islittle, int size, int neg) { 1297*8e3e3a7aSWarner Losh char *buff = luaL_prepbuffsize(b, size); 1298*8e3e3a7aSWarner Losh int i; 1299*8e3e3a7aSWarner Losh buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ 1300*8e3e3a7aSWarner Losh for (i = 1; i < size; i++) { 1301*8e3e3a7aSWarner Losh n >>= NB; 1302*8e3e3a7aSWarner Losh buff[islittle ? i : size - 1 - i] = (char)(n & MC); 1303*8e3e3a7aSWarner Losh } 1304*8e3e3a7aSWarner Losh if (neg && size > SZINT) { /* negative number need sign extension? */ 1305*8e3e3a7aSWarner Losh for (i = SZINT; i < size; i++) /* correct extra bytes */ 1306*8e3e3a7aSWarner Losh buff[islittle ? i : size - 1 - i] = (char)MC; 1307*8e3e3a7aSWarner Losh } 1308*8e3e3a7aSWarner Losh luaL_addsize(b, size); /* add result to buffer */ 1309*8e3e3a7aSWarner Losh } 1310*8e3e3a7aSWarner Losh 1311*8e3e3a7aSWarner Losh 1312*8e3e3a7aSWarner Losh /* 1313*8e3e3a7aSWarner Losh ** Copy 'size' bytes from 'src' to 'dest', correcting endianness if 1314*8e3e3a7aSWarner Losh ** given 'islittle' is different from native endianness. 1315*8e3e3a7aSWarner Losh */ 1316*8e3e3a7aSWarner Losh static void copywithendian (volatile char *dest, volatile const char *src, 1317*8e3e3a7aSWarner Losh int size, int islittle) { 1318*8e3e3a7aSWarner Losh if (islittle == nativeendian.little) { 1319*8e3e3a7aSWarner Losh while (size-- != 0) 1320*8e3e3a7aSWarner Losh *(dest++) = *(src++); 1321*8e3e3a7aSWarner Losh } 1322*8e3e3a7aSWarner Losh else { 1323*8e3e3a7aSWarner Losh dest += size - 1; 1324*8e3e3a7aSWarner Losh while (size-- != 0) 1325*8e3e3a7aSWarner Losh *(dest--) = *(src++); 1326*8e3e3a7aSWarner Losh } 1327*8e3e3a7aSWarner Losh } 1328*8e3e3a7aSWarner Losh 1329*8e3e3a7aSWarner Losh 1330*8e3e3a7aSWarner Losh static int str_pack (lua_State *L) { 1331*8e3e3a7aSWarner Losh luaL_Buffer b; 1332*8e3e3a7aSWarner Losh Header h; 1333*8e3e3a7aSWarner Losh const char *fmt = luaL_checkstring(L, 1); /* format string */ 1334*8e3e3a7aSWarner Losh int arg = 1; /* current argument to pack */ 1335*8e3e3a7aSWarner Losh size_t totalsize = 0; /* accumulate total size of result */ 1336*8e3e3a7aSWarner Losh initheader(L, &h); 1337*8e3e3a7aSWarner Losh lua_pushnil(L); /* mark to separate arguments from string buffer */ 1338*8e3e3a7aSWarner Losh luaL_buffinit(L, &b); 1339*8e3e3a7aSWarner Losh while (*fmt != '\0') { 1340*8e3e3a7aSWarner Losh int size, ntoalign; 1341*8e3e3a7aSWarner Losh KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1342*8e3e3a7aSWarner Losh totalsize += ntoalign + size; 1343*8e3e3a7aSWarner Losh while (ntoalign-- > 0) 1344*8e3e3a7aSWarner Losh luaL_addchar(&b, LUAL_PACKPADBYTE); /* fill alignment */ 1345*8e3e3a7aSWarner Losh arg++; 1346*8e3e3a7aSWarner Losh switch (opt) { 1347*8e3e3a7aSWarner Losh case Kint: { /* signed integers */ 1348*8e3e3a7aSWarner Losh lua_Integer n = luaL_checkinteger(L, arg); 1349*8e3e3a7aSWarner Losh if (size < SZINT) { /* need overflow check? */ 1350*8e3e3a7aSWarner Losh lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); 1351*8e3e3a7aSWarner Losh luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); 1352*8e3e3a7aSWarner Losh } 1353*8e3e3a7aSWarner Losh packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); 1354*8e3e3a7aSWarner Losh break; 1355*8e3e3a7aSWarner Losh } 1356*8e3e3a7aSWarner Losh case Kuint: { /* unsigned integers */ 1357*8e3e3a7aSWarner Losh lua_Integer n = luaL_checkinteger(L, arg); 1358*8e3e3a7aSWarner Losh if (size < SZINT) /* need overflow check? */ 1359*8e3e3a7aSWarner Losh luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), 1360*8e3e3a7aSWarner Losh arg, "unsigned overflow"); 1361*8e3e3a7aSWarner Losh packint(&b, (lua_Unsigned)n, h.islittle, size, 0); 1362*8e3e3a7aSWarner Losh break; 1363*8e3e3a7aSWarner Losh } 1364*8e3e3a7aSWarner Losh case Kfloat: { /* floating-point options */ 1365*8e3e3a7aSWarner Losh volatile Ftypes u; 1366*8e3e3a7aSWarner Losh char *buff = luaL_prepbuffsize(&b, size); 1367*8e3e3a7aSWarner Losh lua_Number n = luaL_checknumber(L, arg); /* get argument */ 1368*8e3e3a7aSWarner Losh if (size == sizeof(u.f)) u.f = (float)n; /* copy it into 'u' */ 1369*8e3e3a7aSWarner Losh else if (size == sizeof(u.d)) u.d = (double)n; 1370*8e3e3a7aSWarner Losh else u.n = n; 1371*8e3e3a7aSWarner Losh /* move 'u' to final result, correcting endianness if needed */ 1372*8e3e3a7aSWarner Losh copywithendian(buff, u.buff, size, h.islittle); 1373*8e3e3a7aSWarner Losh luaL_addsize(&b, size); 1374*8e3e3a7aSWarner Losh break; 1375*8e3e3a7aSWarner Losh } 1376*8e3e3a7aSWarner Losh case Kchar: { /* fixed-size string */ 1377*8e3e3a7aSWarner Losh size_t len; 1378*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, arg, &len); 1379*8e3e3a7aSWarner Losh luaL_argcheck(L, len <= (size_t)size, arg, 1380*8e3e3a7aSWarner Losh "string longer than given size"); 1381*8e3e3a7aSWarner Losh luaL_addlstring(&b, s, len); /* add string */ 1382*8e3e3a7aSWarner Losh while (len++ < (size_t)size) /* pad extra space */ 1383*8e3e3a7aSWarner Losh luaL_addchar(&b, LUAL_PACKPADBYTE); 1384*8e3e3a7aSWarner Losh break; 1385*8e3e3a7aSWarner Losh } 1386*8e3e3a7aSWarner Losh case Kstring: { /* strings with length count */ 1387*8e3e3a7aSWarner Losh size_t len; 1388*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, arg, &len); 1389*8e3e3a7aSWarner Losh luaL_argcheck(L, size >= (int)sizeof(size_t) || 1390*8e3e3a7aSWarner Losh len < ((size_t)1 << (size * NB)), 1391*8e3e3a7aSWarner Losh arg, "string length does not fit in given size"); 1392*8e3e3a7aSWarner Losh packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ 1393*8e3e3a7aSWarner Losh luaL_addlstring(&b, s, len); 1394*8e3e3a7aSWarner Losh totalsize += len; 1395*8e3e3a7aSWarner Losh break; 1396*8e3e3a7aSWarner Losh } 1397*8e3e3a7aSWarner Losh case Kzstr: { /* zero-terminated string */ 1398*8e3e3a7aSWarner Losh size_t len; 1399*8e3e3a7aSWarner Losh const char *s = luaL_checklstring(L, arg, &len); 1400*8e3e3a7aSWarner Losh luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); 1401*8e3e3a7aSWarner Losh luaL_addlstring(&b, s, len); 1402*8e3e3a7aSWarner Losh luaL_addchar(&b, '\0'); /* add zero at the end */ 1403*8e3e3a7aSWarner Losh totalsize += len + 1; 1404*8e3e3a7aSWarner Losh break; 1405*8e3e3a7aSWarner Losh } 1406*8e3e3a7aSWarner Losh case Kpadding: luaL_addchar(&b, LUAL_PACKPADBYTE); /* FALLTHROUGH */ 1407*8e3e3a7aSWarner Losh case Kpaddalign: case Knop: 1408*8e3e3a7aSWarner Losh arg--; /* undo increment */ 1409*8e3e3a7aSWarner Losh break; 1410*8e3e3a7aSWarner Losh } 1411*8e3e3a7aSWarner Losh } 1412*8e3e3a7aSWarner Losh luaL_pushresult(&b); 1413*8e3e3a7aSWarner Losh return 1; 1414*8e3e3a7aSWarner Losh } 1415*8e3e3a7aSWarner Losh 1416*8e3e3a7aSWarner Losh 1417*8e3e3a7aSWarner Losh static int str_packsize (lua_State *L) { 1418*8e3e3a7aSWarner Losh Header h; 1419*8e3e3a7aSWarner Losh const char *fmt = luaL_checkstring(L, 1); /* format string */ 1420*8e3e3a7aSWarner Losh size_t totalsize = 0; /* accumulate total size of result */ 1421*8e3e3a7aSWarner Losh initheader(L, &h); 1422*8e3e3a7aSWarner Losh while (*fmt != '\0') { 1423*8e3e3a7aSWarner Losh int size, ntoalign; 1424*8e3e3a7aSWarner Losh KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1425*8e3e3a7aSWarner Losh size += ntoalign; /* total space used by option */ 1426*8e3e3a7aSWarner Losh luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, 1427*8e3e3a7aSWarner Losh "format result too large"); 1428*8e3e3a7aSWarner Losh totalsize += size; 1429*8e3e3a7aSWarner Losh switch (opt) { 1430*8e3e3a7aSWarner Losh case Kstring: /* strings with length count */ 1431*8e3e3a7aSWarner Losh case Kzstr: /* zero-terminated string */ 1432*8e3e3a7aSWarner Losh luaL_argerror(L, 1, "variable-length format"); 1433*8e3e3a7aSWarner Losh /* call never return, but to avoid warnings: *//* FALLTHROUGH */ 1434*8e3e3a7aSWarner Losh default: break; 1435*8e3e3a7aSWarner Losh } 1436*8e3e3a7aSWarner Losh } 1437*8e3e3a7aSWarner Losh lua_pushinteger(L, (lua_Integer)totalsize); 1438*8e3e3a7aSWarner Losh return 1; 1439*8e3e3a7aSWarner Losh } 1440*8e3e3a7aSWarner Losh 1441*8e3e3a7aSWarner Losh 1442*8e3e3a7aSWarner Losh /* 1443*8e3e3a7aSWarner Losh ** Unpack an integer with 'size' bytes and 'islittle' endianness. 1444*8e3e3a7aSWarner Losh ** If size is smaller than the size of a Lua integer and integer 1445*8e3e3a7aSWarner Losh ** is signed, must do sign extension (propagating the sign to the 1446*8e3e3a7aSWarner Losh ** higher bits); if size is larger than the size of a Lua integer, 1447*8e3e3a7aSWarner Losh ** it must check the unread bytes to see whether they do not cause an 1448*8e3e3a7aSWarner Losh ** overflow. 1449*8e3e3a7aSWarner Losh */ 1450*8e3e3a7aSWarner Losh static lua_Integer unpackint (lua_State *L, const char *str, 1451*8e3e3a7aSWarner Losh int islittle, int size, int issigned) { 1452*8e3e3a7aSWarner Losh lua_Unsigned res = 0; 1453*8e3e3a7aSWarner Losh int i; 1454*8e3e3a7aSWarner Losh int limit = (size <= SZINT) ? size : SZINT; 1455*8e3e3a7aSWarner Losh for (i = limit - 1; i >= 0; i--) { 1456*8e3e3a7aSWarner Losh res <<= NB; 1457*8e3e3a7aSWarner Losh res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; 1458*8e3e3a7aSWarner Losh } 1459*8e3e3a7aSWarner Losh if (size < SZINT) { /* real size smaller than lua_Integer? */ 1460*8e3e3a7aSWarner Losh if (issigned) { /* needs sign extension? */ 1461*8e3e3a7aSWarner Losh lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); 1462*8e3e3a7aSWarner Losh res = ((res ^ mask) - mask); /* do sign extension */ 1463*8e3e3a7aSWarner Losh } 1464*8e3e3a7aSWarner Losh } 1465*8e3e3a7aSWarner Losh else if (size > SZINT) { /* must check unread bytes */ 1466*8e3e3a7aSWarner Losh int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; 1467*8e3e3a7aSWarner Losh for (i = limit; i < size; i++) { 1468*8e3e3a7aSWarner Losh if ((unsigned char)str[islittle ? i : size - 1 - i] != mask) 1469*8e3e3a7aSWarner Losh luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); 1470*8e3e3a7aSWarner Losh } 1471*8e3e3a7aSWarner Losh } 1472*8e3e3a7aSWarner Losh return (lua_Integer)res; 1473*8e3e3a7aSWarner Losh } 1474*8e3e3a7aSWarner Losh 1475*8e3e3a7aSWarner Losh 1476*8e3e3a7aSWarner Losh static int str_unpack (lua_State *L) { 1477*8e3e3a7aSWarner Losh Header h; 1478*8e3e3a7aSWarner Losh const char *fmt = luaL_checkstring(L, 1); 1479*8e3e3a7aSWarner Losh size_t ld; 1480*8e3e3a7aSWarner Losh const char *data = luaL_checklstring(L, 2, &ld); 1481*8e3e3a7aSWarner Losh size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1; 1482*8e3e3a7aSWarner Losh int n = 0; /* number of results */ 1483*8e3e3a7aSWarner Losh luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1484*8e3e3a7aSWarner Losh initheader(L, &h); 1485*8e3e3a7aSWarner Losh while (*fmt != '\0') { 1486*8e3e3a7aSWarner Losh int size, ntoalign; 1487*8e3e3a7aSWarner Losh KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); 1488*8e3e3a7aSWarner Losh if ((size_t)ntoalign + size > ~pos || pos + ntoalign + size > ld) 1489*8e3e3a7aSWarner Losh luaL_argerror(L, 2, "data string too short"); 1490*8e3e3a7aSWarner Losh pos += ntoalign; /* skip alignment */ 1491*8e3e3a7aSWarner Losh /* stack space for item + next position */ 1492*8e3e3a7aSWarner Losh luaL_checkstack(L, 2, "too many results"); 1493*8e3e3a7aSWarner Losh n++; 1494*8e3e3a7aSWarner Losh switch (opt) { 1495*8e3e3a7aSWarner Losh case Kint: 1496*8e3e3a7aSWarner Losh case Kuint: { 1497*8e3e3a7aSWarner Losh lua_Integer res = unpackint(L, data + pos, h.islittle, size, 1498*8e3e3a7aSWarner Losh (opt == Kint)); 1499*8e3e3a7aSWarner Losh lua_pushinteger(L, res); 1500*8e3e3a7aSWarner Losh break; 1501*8e3e3a7aSWarner Losh } 1502*8e3e3a7aSWarner Losh case Kfloat: { 1503*8e3e3a7aSWarner Losh volatile Ftypes u; 1504*8e3e3a7aSWarner Losh lua_Number num; 1505*8e3e3a7aSWarner Losh copywithendian(u.buff, data + pos, size, h.islittle); 1506*8e3e3a7aSWarner Losh if (size == sizeof(u.f)) num = (lua_Number)u.f; 1507*8e3e3a7aSWarner Losh else if (size == sizeof(u.d)) num = (lua_Number)u.d; 1508*8e3e3a7aSWarner Losh else num = u.n; 1509*8e3e3a7aSWarner Losh lua_pushnumber(L, num); 1510*8e3e3a7aSWarner Losh break; 1511*8e3e3a7aSWarner Losh } 1512*8e3e3a7aSWarner Losh case Kchar: { 1513*8e3e3a7aSWarner Losh lua_pushlstring(L, data + pos, size); 1514*8e3e3a7aSWarner Losh break; 1515*8e3e3a7aSWarner Losh } 1516*8e3e3a7aSWarner Losh case Kstring: { 1517*8e3e3a7aSWarner Losh size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); 1518*8e3e3a7aSWarner Losh luaL_argcheck(L, pos + len + size <= ld, 2, "data string too short"); 1519*8e3e3a7aSWarner Losh lua_pushlstring(L, data + pos + size, len); 1520*8e3e3a7aSWarner Losh pos += len; /* skip string */ 1521*8e3e3a7aSWarner Losh break; 1522*8e3e3a7aSWarner Losh } 1523*8e3e3a7aSWarner Losh case Kzstr: { 1524*8e3e3a7aSWarner Losh size_t len = (int)strlen(data + pos); 1525*8e3e3a7aSWarner Losh lua_pushlstring(L, data + pos, len); 1526*8e3e3a7aSWarner Losh pos += len + 1; /* skip string plus final '\0' */ 1527*8e3e3a7aSWarner Losh break; 1528*8e3e3a7aSWarner Losh } 1529*8e3e3a7aSWarner Losh case Kpaddalign: case Kpadding: case Knop: 1530*8e3e3a7aSWarner Losh n--; /* undo increment */ 1531*8e3e3a7aSWarner Losh break; 1532*8e3e3a7aSWarner Losh } 1533*8e3e3a7aSWarner Losh pos += size; 1534*8e3e3a7aSWarner Losh } 1535*8e3e3a7aSWarner Losh lua_pushinteger(L, pos + 1); /* next position */ 1536*8e3e3a7aSWarner Losh return n + 1; 1537*8e3e3a7aSWarner Losh } 1538*8e3e3a7aSWarner Losh 1539*8e3e3a7aSWarner Losh /* }====================================================== */ 1540*8e3e3a7aSWarner Losh 1541*8e3e3a7aSWarner Losh 1542*8e3e3a7aSWarner Losh static const luaL_Reg strlib[] = { 1543*8e3e3a7aSWarner Losh {"byte", str_byte}, 1544*8e3e3a7aSWarner Losh {"char", str_char}, 1545*8e3e3a7aSWarner Losh {"dump", str_dump}, 1546*8e3e3a7aSWarner Losh {"find", str_find}, 1547*8e3e3a7aSWarner Losh {"format", str_format}, 1548*8e3e3a7aSWarner Losh {"gmatch", gmatch}, 1549*8e3e3a7aSWarner Losh {"gsub", str_gsub}, 1550*8e3e3a7aSWarner Losh {"len", str_len}, 1551*8e3e3a7aSWarner Losh {"lower", str_lower}, 1552*8e3e3a7aSWarner Losh {"match", str_match}, 1553*8e3e3a7aSWarner Losh {"rep", str_rep}, 1554*8e3e3a7aSWarner Losh {"reverse", str_reverse}, 1555*8e3e3a7aSWarner Losh {"sub", str_sub}, 1556*8e3e3a7aSWarner Losh {"upper", str_upper}, 1557*8e3e3a7aSWarner Losh {"pack", str_pack}, 1558*8e3e3a7aSWarner Losh {"packsize", str_packsize}, 1559*8e3e3a7aSWarner Losh {"unpack", str_unpack}, 1560*8e3e3a7aSWarner Losh {NULL, NULL} 1561*8e3e3a7aSWarner Losh }; 1562*8e3e3a7aSWarner Losh 1563*8e3e3a7aSWarner Losh 1564*8e3e3a7aSWarner Losh static void createmetatable (lua_State *L) { 1565*8e3e3a7aSWarner Losh lua_createtable(L, 0, 1); /* table to be metatable for strings */ 1566*8e3e3a7aSWarner Losh lua_pushliteral(L, ""); /* dummy string */ 1567*8e3e3a7aSWarner Losh lua_pushvalue(L, -2); /* copy table */ 1568*8e3e3a7aSWarner Losh lua_setmetatable(L, -2); /* set table as metatable for strings */ 1569*8e3e3a7aSWarner Losh lua_pop(L, 1); /* pop dummy string */ 1570*8e3e3a7aSWarner Losh lua_pushvalue(L, -2); /* get string library */ 1571*8e3e3a7aSWarner Losh lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1572*8e3e3a7aSWarner Losh lua_pop(L, 1); /* pop metatable */ 1573*8e3e3a7aSWarner Losh } 1574*8e3e3a7aSWarner Losh 1575*8e3e3a7aSWarner Losh 1576*8e3e3a7aSWarner Losh /* 1577*8e3e3a7aSWarner Losh ** Open string library 1578*8e3e3a7aSWarner Losh */ 1579*8e3e3a7aSWarner Losh LUAMOD_API int luaopen_string (lua_State *L) { 1580*8e3e3a7aSWarner Losh luaL_newlib(L, strlib); 1581*8e3e3a7aSWarner Losh createmetatable(L); 1582*8e3e3a7aSWarner Losh return 1; 1583*8e3e3a7aSWarner Losh } 1584*8e3e3a7aSWarner Losh 1585