157698Sbostic /*- 257698Sbostic * Copyright (c) 1992 The Regents of the University of California. 357698Sbostic * All rights reserved. 457698Sbostic * 557698Sbostic * This code is derived from software contributed to Berkeley by 657698Sbostic * Rodney Ruddock of the University of Guelph. 757698Sbostic * 857698Sbostic * %sccs.include.redist.c% 957698Sbostic */ 1057698Sbostic 1157698Sbostic #ifndef lint 12*59475Sbostic static char sccsid[] = "@(#)re.c 5.4 (Berkeley) 04/28/93"; 1357698Sbostic #endif /* not lint */ 1457698Sbostic 1557710Sbostic #include <sys/types.h> 1657710Sbostic 1757710Sbostic #include <regex.h> 1857710Sbostic #include <setjmp.h> 1957710Sbostic #include <stdio.h> 2057710Sbostic #include <stdlib.h> 2157710Sbostic #include <string.h> 2257710Sbostic 2358315Sbostic #ifdef DBI 2458315Sbostic #include <db.h> 2558315Sbostic #endif 2658315Sbostic 2757698Sbostic #include "ed.h" 2857710Sbostic #include "extern.h" 2957698Sbostic 3057698Sbostic /* 3157698Sbostic * This finds the n-th occurrence of an RE in a line. If '^' was at the start 3257710Sbostic * of the RE then look once (in case n=1). There is no standard RE interface 3357710Sbostic * to do this. Returns 0 for success. NOTE: the #ifdef REG_STARTEND is if 3457710Sbostic * the regex package has the BSD extensions to it. 3557698Sbostic */ 3657698Sbostic int 3757698Sbostic #ifdef REG_STARTEND 3857698Sbostic regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, len, pass) 3957698Sbostic #else 4057698Sbostic regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, offset, pass) 4157698Sbostic #endif 4257710Sbostic regex_t *reprecomp; 4357710Sbostic char *strg; 4457710Sbostic size_t num_subexp; 4557710Sbostic regmatch_t reprematch[]; 4657710Sbostic int flags, n; 4757698Sbostic #ifdef REG_STARTEND 4857710Sbostic size_t len; 4957698Sbostic #else 5057710Sbostic size_t *offset; 5157698Sbostic #endif 5257710Sbostic int pass; /* if pass == 0 .rm_so user set, else set default */ 5357698Sbostic { 54*59475Sbostic int l_cnt, l_flag=0; 5557698Sbostic #ifndef REG_STARTEND 5657710Sbostic char *l_offset = strg; 5757698Sbostic #endif 5857698Sbostic 5957710Sbostic if (n <= 0) 6057710Sbostic return (REG_NOMATCH); 6157698Sbostic #ifdef REG_STARTEND 6257710Sbostic flags = (flags | REG_STARTEND); 6357710Sbostic if (pass) 6457710Sbostic reprematch[0].rm_so = 0; 6557710Sbostic reprematch[0].rm_eo = len; 66*59475Sbostic if (!reprematch[0].rm_so) 67*59475Sbostic l_flag = 1; 6857698Sbostic #else 6957710Sbostic strg = &strg[offset]; 70*59475Sbostic if (!offset) 71*59475Sbostic l_flag = 1; 7257698Sbostic #endif 7357710Sbostic for (l_cnt = 0;;) { 7457710Sbostic if (regexec(reprecomp, 7557710Sbostic strg, num_subexp, reprematch, flags) == 0) 7657710Sbostic l_cnt++; 7757710Sbostic else 7857710Sbostic return (REG_NOMATCH); 79*59475Sbostic /* to skip over null RE matchings */ 80*59475Sbostic if (l_flag) 81*59475Sbostic l_flag = 0; 82*59475Sbostic else 83*59475Sbostic if (reprematch[0].rm_so == reprematch[0].rm_eo) { 84*59475Sbostic l_cnt--; 85*59475Sbostic if ((++reprematch[0].rm_eo) > len) 86*59475Sbostic return (REG_NOMATCH); 87*59475Sbostic } 8857710Sbostic if (l_cnt >= n) 8957710Sbostic break; 9057698Sbostic #ifdef REG_STARTEND 9157710Sbostic reprematch[0].rm_so = reprematch[0].rm_eo; 9257710Sbostic reprematch[0].rm_eo = len; 9357698Sbostic #else 9457710Sbostic strg = &strg[reprematch[0].rm_eo]; 9557698Sbostic #endif 9657710Sbostic /* if a "^" started the current RE we only loop once */ 9757710Sbostic if (RE_sol) 9857710Sbostic return (REG_NOMATCH); 9957710Sbostic } 10057698Sbostic #ifndef REG_STARTEND 10157710Sbostic *offset = (size_t) (strg - l_offset); 10257698Sbostic #endif 10357710Sbostic return (0); /* success */ 10457710Sbostic } 10557698Sbostic 10657698Sbostic /* 10757698Sbostic * Replace in the line specified at the found locations with the 10857698Sbostic * specified replacement. There is no standard RE interface to do 10957698Sbostic * this. 11057698Sbostic */ 11157710Sbostic char * 11257698Sbostic #ifdef REG_STARTEND 11357710Sbostic re_replace(line, num_subexp, repmatch, replacer) 11457698Sbostic #else 11557710Sbostic re_replace(line, num_subexp, repmatch, replacer, offset) 11657698Sbostic #endif 11757710Sbostic char *line; 11857710Sbostic size_t num_subexp; 11957710Sbostic regmatch_t repmatch[]; 12057710Sbostic char *replacer; 12157698Sbostic #ifndef REG_STARTEND 12257710Sbostic size_t offset; 12357698Sbostic #endif 12457698Sbostic { 12557710Sbostic static char *l_prev_r = NULL; 12657710Sbostic static int l_prev_r_flag = 0; 12757710Sbostic regoff_t l_len_before, l_len_whole, l_slen[RE_SEC]; 12857710Sbostic int l_cnt, l_len_new = 0, l_new_rm_eo = 0; 12957710Sbostic char *l_string, *l_head; 13057698Sbostic 13157710Sbostic if (l_prev_r_flag == 0) { 13257710Sbostic l_prev_r_flag = 1; 13357710Sbostic l_prev_r = NULL; 13457710Sbostic } 13557710Sbostic l_head = replacer; 13657710Sbostic /* Length of what stays the same before. */ 13757710Sbostic l_len_before = (repmatch[0].rm_so); 13857710Sbostic l_len_whole = strlen(line); 13957710Sbostic if (num_subexp > RE_SEC - 1) 14057710Sbostic num_subexp = RE_SEC - 1; 14157710Sbostic for (l_cnt = 0; l_cnt <= num_subexp; l_cnt++) 14257710Sbostic l_slen[l_cnt] = 14357710Sbostic (repmatch[l_cnt].rm_eo) - (repmatch[l_cnt].rm_so); 14457698Sbostic 14557710Sbostic /* 14657710Sbostic * l_slen[0] == len of what is to be replaced. 14757710Sbostic * l_slen[1-9] == len of each backref. 14857710Sbostic */ 14957710Sbostic if ((*replacer == '%') && (replacer[1] == 1)) { 15057710Sbostic l_string = calloc(l_len_whole - l_slen[0] + 15157710Sbostic (strlen(l_prev_r)) + 2, sizeof(char)); 15257710Sbostic if (l_string == NULL) { 15357710Sbostic /* *errnum = -1; */ 15457710Sbostic strcpy(help_msg, "out of memory error"); 15557710Sbostic return (NULL); 15657710Sbostic } 15757698Sbostic #ifdef REG_STARTEND 15857710Sbostic bcopy(line, l_string, (int) l_len_before); 15957698Sbostic #else 16057710Sbostic bcopy(line, l_string, (int) l_len_before + offset); 16157698Sbostic #endif 16257698Sbostic #ifdef REG_STARTEND 16357710Sbostic l_string[l_len_before] = '\0'; 16457698Sbostic #else 16557710Sbostic l_string[l_len_before + offset] = '\0'; 16657698Sbostic #endif 16757710Sbostic strcat(l_string, l_prev_r); 16857698Sbostic #ifdef REG_STARTEND 16957710Sbostic strcat(l_string, &line[repmatch[0].rm_eo]); 17057698Sbostic #else 17157710Sbostic strcat(l_string, &line[repmatch[0].rm_eo + offset]); 17257698Sbostic #endif 17357710Sbostic return (l_string); 17457710Sbostic } 17557698Sbostic 17657710Sbostic /* Figure out length of new line first. */ 17757710Sbostic while (*replacer != '\0') { 17857710Sbostic /* Add in the length of the RE match. */ 17957710Sbostic if (*replacer == '&') 18057710Sbostic l_len_new = l_len_new + l_slen[0]; 18157710Sbostic /* Add in the length of a backref. */ 18257710Sbostic else if (*replacer == '\\') { 18357710Sbostic replacer++; 18457710Sbostic if ((*replacer > '0') && 18557710Sbostic (*replacer < ('9' + 1)) && 18657710Sbostic (repmatch[*replacer - '0'].rm_so > -1)) 18757710Sbostic /* -1 - -1 = 0 */ 18857710Sbostic l_len_new = l_len_new + l_slen[*replacer - '0']; 18957710Sbostic else 19057710Sbostic l_len_new++; 19157710Sbostic } else 19257710Sbostic l_len_new++; 19357710Sbostic replacer++; 19457710Sbostic } 19557698Sbostic 19657710Sbostic /* Create the line of an appropriate length. */ 19757710Sbostic l_string = 19857710Sbostic calloc(l_len_whole - l_slen[0] + l_len_new + 2, sizeof(char)); 19957710Sbostic if (l_string == NULL) { 20057710Sbostic strcpy(help_msg, "out of memory error"); 20157710Sbostic return (NULL); 20257710Sbostic } 20357710Sbostic if (l_prev_r != NULL) 20457710Sbostic free(l_prev_r); 20557710Sbostic l_prev_r = calloc(l_len_new + 2, sizeof(char)); 20657710Sbostic if (l_prev_r == NULL) { 20757710Sbostic strcpy(help_msg, "out of memory error"); 20857710Sbostic return (NULL); 20957710Sbostic } 21057710Sbostic /* Copy over what doesn't change before the chars to be replaced. */ 21157698Sbostic #ifdef REG_STARTEND 21257710Sbostic bcopy(line, l_string, (int) l_len_before); 21357698Sbostic #else 21457710Sbostic bcopy(line, l_string, l_len_before + offset); 21557698Sbostic #endif 21657698Sbostic #ifdef REG_STARTEND 21757710Sbostic l_string[l_len_before] = '\0'; 21857698Sbostic #else 21957710Sbostic l_string[l_len_before + offset] = '\0'; 22057698Sbostic #endif 22157710Sbostic l_prev_r[0] = '\0'; 22257698Sbostic 22357710Sbostic /* Make the replacement. */ 22457710Sbostic replacer = l_head; 22557710Sbostic while (*replacer != '\0') { 22657710Sbostic /* Put what matched the RE into the replacement. */ 22757710Sbostic if (*replacer == '&') { 22857698Sbostic #ifdef REG_STARTEND 22957710Sbostic strncat(l_string, 23057710Sbostic &line[repmatch[0].rm_so], (int)l_slen[0]); 23157710Sbostic strncat(l_prev_r, 23257710Sbostic &line[repmatch[0].rm_so], (int) l_slen[0]); 23357698Sbostic #else 23457710Sbostic strncat(l_string, 23557710Sbostic &line[repmatch[0].rm_so + offset], (int) l_slen[0]); 23657710Sbostic strncat(l_prev_r, 23757710Sbostic &line[repmatch[0].rm_so + offset], (int) l_slen[0]); 23857698Sbostic #endif 23957710Sbostic } else if (*replacer == '\\') { 24057710Sbostic /* Likely a backref to be included. */ 24157710Sbostic replacer++; 24257710Sbostic if ((*replacer > '0') && (*replacer < ('9' + 1)) && 24357710Sbostic (repmatch[*replacer - '0'].rm_so > -1)) { 24457698Sbostic #ifdef REG_STARTEND 24557710Sbostic strncat(l_string, 24657710Sbostic &line[repmatch[*replacer - '0'].rm_so], 24757710Sbostic (int) l_slen[*replacer - '0']); 24857710Sbostic strncat(l_prev_r, 24957710Sbostic &line[repmatch[*replacer - '0'].rm_so], 25057710Sbostic (int) l_slen[*replacer - '0']); 25157698Sbostic #else 25257710Sbostic strncat(l_string, 25357710Sbostic &line[repmatch[*replacer - '0'].rm_so + 25457710Sbostic offset], (int) l_slen[*replacer - '0']); 25557710Sbostic strncat(l_prev_r, 25657710Sbostic &line[repmatch[*replacer - '0'].rm_so + 25757710Sbostic offset], (int) l_slen[*replacer - '0']); 25857698Sbostic #endif 25957710Sbostic } 26057710Sbostic /* Put the replacement in. */ 26157710Sbostic else { 26257710Sbostic strncat(l_string, replacer, 1); 26357710Sbostic strncat(l_prev_r, replacer, 1); 26457710Sbostic } 26557710Sbostic } 26657710Sbostic /* Put the replacement in. */ 26757710Sbostic else { 26857710Sbostic strncat(l_string, replacer, 1); 26957710Sbostic strncat(l_prev_r, replacer, 1); 27057710Sbostic } 27157710Sbostic replacer++; 27257710Sbostic } 27357698Sbostic 27457710Sbostic l_new_rm_eo = strlen(l_string); 27557698Sbostic 27657710Sbostic /* Copy over what was after the chars to be replaced to the new line. */ 27757698Sbostic #ifdef REG_STARTEND 27857710Sbostic strcat(l_string, &line[repmatch[0].rm_eo]); 27957698Sbostic #else 28057710Sbostic strcat(l_string, &line[repmatch[0].rm_eo + offset]); 28157698Sbostic #endif 28257698Sbostic 28357710Sbostic repmatch[0].rm_eo = l_new_rm_eo; /* Update rm_eo. */ 28457698Sbostic #ifndef REG_STARTEND 28557710Sbostic offset += l_new_rm_eo; /* Update offset. */ 28657698Sbostic #endif 28757710Sbostic return (l_string); /* Return the new line. */ 28857710Sbostic } 289