00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #if defined(LIBC_SCCS) && !defined(lint)
00041 static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
00042 #endif
00043
00044 #include <sys/types.h>
00045 #include <stdio.h>
00046 #include <string.h>
00047 #include <ctype.h>
00048 #include <limits.h>
00049 #include <stdlib.h>
00050 #include "regex.h"
00051
00052 #include "utils.h"
00053 #include "regex2.h"
00054
00055 #include "cclass.h"
00056 #include "cname.h"
00057
00058
00059
00060
00061
00062 struct parse {
00063 char *next;
00064 char *end;
00065 int error;
00066 sop *strip;
00067 sopno ssize;
00068 sopno slen;
00069 int ncsalloc;
00070 struct re_guts *g;
00071 # define NPAREN 10
00072 sopno pbegin[NPAREN];
00073 sopno pend[NPAREN];
00074 };
00075
00076
00077 #ifdef __cplusplus
00078 extern "C" {
00079 #endif
00080
00081
00082 static void p_ere __P((struct parse *p, int stop));
00083 static void p_ere_exp __P((struct parse *p));
00084 static void p_str __P((struct parse *p));
00085 static void p_bre __P((struct parse *p, int end1, int end2));
00086 static int p_simp_re __P((struct parse *p, int starordinary));
00087 static int p_count __P((struct parse *p));
00088 static void p_bracket __P((struct parse *p));
00089 static void p_b_term __P((struct parse *p, cset *cs));
00090 static void p_b_cclass __P((struct parse *p, cset *cs));
00091 static void p_b_eclass __P((struct parse *p, cset *cs));
00092 static char p_b_symbol __P((struct parse *p));
00093 static char p_b_coll_elem __P((struct parse *p, int endc));
00094 static char othercase __P((int ch));
00095 static void bothcases __P((struct parse *p, int ch));
00096 static void ordinary __P((struct parse *p, int ch));
00097 static void nonnewline __P((struct parse *p));
00098 static void repeat __P((struct parse *p, sopno start, int from, int to));
00099 static int seterr __P((struct parse *p, int e));
00100 static cset *allocset __P((struct parse *p));
00101 static void freeset __P((struct parse *p, cset *cs));
00102 static int freezeset __P((struct parse *p, cset *cs));
00103 static int firstch __P((struct parse *p, cset *cs));
00104 static int nch __P((struct parse *p, cset *cs));
00105 static void mcadd __P((struct parse *p, cset *cs, char *cp));
00106 static void mcsub __P((cset *cs, char *cp));
00107 static int mcin __P((cset *cs, char *cp));
00108 static char *mcfind __P((cset *cs, char *cp));
00109 static void mcinvert __P((struct parse *p, cset *cs));
00110 static void mccase __P((struct parse *p, cset *cs));
00111 static int isinsets __P((struct re_guts *g, int c));
00112 static int samesets __P((struct re_guts *g, int c1, int c2));
00113 static void categorize __P((struct parse *p, struct re_guts *g));
00114 static sopno dupl __P((struct parse *p, sopno start, sopno finish));
00115 static void doemit __P((struct parse *p, sop op, size_t opnd));
00116 static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
00117 static void dofwd __P((struct parse *p, sopno pos, sop value));
00118 static void enlarge __P((struct parse *p, sopno size));
00119 static void stripsnug __P((struct parse *p, struct re_guts *g));
00120 static void findmust __P((struct parse *p, struct re_guts *g));
00121 static sopno pluscount __P((struct parse *p, struct re_guts *g));
00122
00123 #ifdef __cplusplus
00124 }
00125 #endif
00126
00127
00128 static char nuls[10];
00129
00130
00131
00132
00133
00134 #define PEEK() (*p->next)
00135 #define PEEK2() (*(p->next+1))
00136 #define MORE() (p->next < p->end)
00137 #define MORE2() (p->next+1 < p->end)
00138 #define SEE(c) (MORE() && PEEK() == (c))
00139 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
00140 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
00141 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
00142 #define NEXT() (p->next++)
00143 #define NEXT2() (p->next += 2)
00144 #define NEXTn(n) (p->next += (n))
00145 #define GETNEXT() (*p->next++)
00146 #define SETERROR(e) seterr(p, (e))
00147 #define REQUIRE(co, e) ((co) || SETERROR(e))
00148 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
00149 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
00150 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
00151 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
00152 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
00153 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
00154 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
00155 #define HERE() (p->slen)
00156 #define THERE() (p->slen - 1)
00157 #define THERETHERE() (p->slen - 2)
00158 #define DROP(n) (p->slen -= (n))
00159
00160 #ifndef NDEBUG
00161 static int never = 0;
00162 #else
00163 #define never 0
00164 #endif
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 int __stdcall
00179 regcomp(preg, pattern, cflags)
00180 regex_t *preg;
00181 const char *pattern;
00182 int cflags;
00183 {
00184 struct parse pa;
00185 register struct re_guts *g;
00186 register struct parse *p = &pa;
00187 register int i;
00188 register size_t len;
00189 #ifdef REDEBUG
00190 # define GOODFLAGS(f) (f)
00191 #else
00192 # define GOODFLAGS(f) ((f)&~REG_DUMP)
00193 #endif
00194
00195 cflags = GOODFLAGS(cflags);
00196 if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
00197 return(REG_INVARG);
00198
00199 if (cflags®_PEND) {
00200 if (preg->re_endp < pattern)
00201 return(REG_INVARG);
00202 len = preg->re_endp - pattern;
00203 } else
00204 len = strlen((char *)pattern);
00205
00206
00207 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
00208 (NC-1)*sizeof(cat_t));
00209 if (g == NULL)
00210 return(REG_ESPACE);
00211 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;
00212 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
00213 p->slen = 0;
00214 if (p->strip == NULL) {
00215 free((char *)g);
00216 return(REG_ESPACE);
00217 }
00218
00219
00220 p->g = g;
00221 p->next = (char *)pattern;
00222 p->end = p->next + len;
00223 p->error = 0;
00224 p->ncsalloc = 0;
00225 for (i = 0; i < NPAREN; i++) {
00226 p->pbegin[i] = 0;
00227 p->pend[i] = 0;
00228 }
00229 g->csetsize = NC;
00230 g->sets = NULL;
00231 g->setbits = NULL;
00232 g->ncsets = 0;
00233 g->cflags = cflags;
00234 g->iflags = 0;
00235 g->nbol = 0;
00236 g->neol = 0;
00237 g->must = NULL;
00238 g->mlen = 0;
00239 g->nsub = 0;
00240 g->ncategories = 1;
00241 g->categories = &g->catspace[-(CHAR_MIN)];
00242 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
00243 g->backrefs = 0;
00244
00245
00246 EMIT(OEND, 0);
00247 g->firststate = THERE();
00248 if (cflags®_EXTENDED)
00249 p_ere(p, OUT);
00250 else if (cflags®_NOSPEC)
00251 p_str(p);
00252 else
00253 p_bre(p, OUT, OUT);
00254 EMIT(OEND, 0);
00255 g->laststate = THERE();
00256
00257
00258 categorize(p, g);
00259 stripsnug(p, g);
00260 findmust(p, g);
00261 g->nplus = pluscount(p, g);
00262 g->magic = MAGIC2;
00263 preg->re_nsub = g->nsub;
00264 preg->re_g = g;
00265 preg->re_magic = MAGIC1;
00266 #ifndef REDEBUG
00267
00268 if (g->iflags&BAD)
00269 SETERROR(REG_ASSERT);
00270 #endif
00271
00272
00273 if (p->error != 0)
00274 regfree(preg);
00275 return(p->error);
00276 }
00277
00278
00279
00280
00281
00282 static void
00283 p_ere(p, stop)
00284 register struct parse *p;
00285 int stop;
00286 {
00287 register char c;
00288 register sopno prevback;
00289 register sopno prevfwd;
00290 register sopno conc;
00291 register int first = 1;
00292
00293 for (;;) {
00294
00295 conc = HERE();
00296 while (MORE() && (c = PEEK()) != '|' && c != stop)
00297 p_ere_exp(p);
00298 REQUIRE(HERE() != conc, REG_EMPTY);
00299
00300 if (!EAT('|'))
00301 break;
00302
00303 if (first) {
00304 INSERT(OCH_, conc);
00305 prevfwd = conc;
00306 prevback = conc;
00307 first = 0;
00308 }
00309 ASTERN(OOR1, prevback);
00310 prevback = THERE();
00311 AHEAD(prevfwd);
00312 prevfwd = HERE();
00313 EMIT(OOR2, 0);
00314 }
00315
00316 if (!first) {
00317 AHEAD(prevfwd);
00318 ASTERN(O_CH, prevback);
00319 }
00320
00321 assert(!MORE() || SEE(stop));
00322 }
00323
00324
00325
00326
00327
00328 static void
00329 p_ere_exp(p)
00330 register struct parse *p;
00331 {
00332 register char c;
00333 register sopno pos;
00334 register int count;
00335 register int count2;
00336 register sopno subno;
00337 int wascaret = 0;
00338
00339 assert(MORE());
00340 c = GETNEXT();
00341
00342 pos = HERE();
00343 switch (c) {
00344 case '(':
00345 REQUIRE(MORE(), REG_EPAREN);
00346 p->g->nsub++;
00347 subno = p->g->nsub;
00348 if (subno < NPAREN)
00349 p->pbegin[subno] = HERE();
00350 EMIT(OLPAREN, subno);
00351 if (!SEE(')'))
00352 p_ere(p, ')');
00353 if (subno < NPAREN) {
00354 p->pend[subno] = HERE();
00355 assert(p->pend[subno] != 0);
00356 }
00357 EMIT(ORPAREN, subno);
00358 MUSTEAT(')', REG_EPAREN);
00359 break;
00360 #ifndef POSIX_MISTAKE
00361 case ')':
00362
00363
00364
00365
00366
00367
00368
00369 SETERROR(REG_EPAREN);
00370 break;
00371 #endif
00372 case '^':
00373 EMIT(OBOL, 0);
00374 p->g->iflags |= USEBOL;
00375 p->g->nbol++;
00376 wascaret = 1;
00377 break;
00378 case '$':
00379 EMIT(OEOL, 0);
00380 p->g->iflags |= USEEOL;
00381 p->g->neol++;
00382 break;
00383 case '|':
00384 SETERROR(REG_EMPTY);
00385 break;
00386 case '*':
00387 case '+':
00388 case '?':
00389 SETERROR(REG_BADRPT);
00390 break;
00391 case '.':
00392 if (p->g->cflags®_NEWLINE)
00393 nonnewline(p);
00394 else
00395 EMIT(OANY, 0);
00396 break;
00397 case '[':
00398 p_bracket(p);
00399 break;
00400 case '\\':
00401 REQUIRE(MORE(), REG_EESCAPE);
00402 c = GETNEXT();
00403 ordinary(p, c);
00404 break;
00405 case '{':
00406 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
00407
00408 default:
00409 ordinary(p, c);
00410 break;
00411 }
00412
00413 if (!MORE())
00414 return;
00415 c = PEEK();
00416
00417 if (!( c == '*' || c == '+' || c == '?' ||
00418 (c == '{' && MORE2() && isdigit(PEEK2())) ))
00419 return;
00420 NEXT();
00421
00422 REQUIRE(!wascaret, REG_BADRPT);
00423 switch (c) {
00424 case '*':
00425
00426 INSERT(OPLUS_, pos);
00427 ASTERN(O_PLUS, pos);
00428 INSERT(OQUEST_, pos);
00429 ASTERN(O_QUEST, pos);
00430 break;
00431 case '+':
00432 INSERT(OPLUS_, pos);
00433 ASTERN(O_PLUS, pos);
00434 break;
00435 case '?':
00436
00437 INSERT(OCH_, pos);
00438 ASTERN(OOR1, pos);
00439 AHEAD(pos);
00440 EMIT(OOR2, 0);
00441 AHEAD(THERE());
00442 ASTERN(O_CH, THERETHERE());
00443 break;
00444 case '{':
00445 count = p_count(p);
00446 if (EAT(',')) {
00447 if (isdigit(PEEK())) {
00448 count2 = p_count(p);
00449 REQUIRE(count <= count2, REG_BADBR);
00450 } else
00451 count2 = INFINITY;
00452 } else
00453 count2 = count;
00454 repeat(p, pos, count, count2);
00455 if (!EAT('}')) {
00456 while (MORE() && PEEK() != '}')
00457 NEXT();
00458 REQUIRE(MORE(), REG_EBRACE);
00459 SETERROR(REG_BADBR);
00460 }
00461 break;
00462 }
00463
00464 if (!MORE())
00465 return;
00466 c = PEEK();
00467 if (!( c == '*' || c == '+' || c == '?' ||
00468 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
00469 return;
00470 SETERROR(REG_BADRPT);
00471 }
00472
00473
00474
00475
00476
00477 static void
00478 p_str(p)
00479 register struct parse *p;
00480 {
00481 REQUIRE(MORE(), REG_EMPTY);
00482 while (MORE())
00483 ordinary(p, GETNEXT());
00484 }
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498 static void
00499 p_bre(p, end1, end2)
00500 register struct parse *p;
00501 register int end1;
00502 register int end2;
00503 {
00504 register sopno start = HERE();
00505 register int first = 1;
00506 register int wasdollar = 0;
00507
00508 if (EAT('^')) {
00509 EMIT(OBOL, 0);
00510 p->g->iflags |= USEBOL;
00511 p->g->nbol++;
00512 }
00513 while (MORE() && !SEETWO(end1, end2)) {
00514 wasdollar = p_simp_re(p, first);
00515 first = 0;
00516 }
00517 if (wasdollar) {
00518 DROP(1);
00519 EMIT(OEOL, 0);
00520 p->g->iflags |= USEEOL;
00521 p->g->neol++;
00522 }
00523
00524 REQUIRE(HERE() != start, REG_EMPTY);
00525 }
00526
00527
00528
00529
00530
00531 static int
00532 p_simp_re(p, starordinary)
00533 register struct parse *p;
00534 int starordinary;
00535 {
00536 register int c;
00537 register int count;
00538 register int count2;
00539 register sopno pos;
00540 register int i;
00541 register sopno subno;
00542 # define BACKSL (1<<CHAR_BIT)
00543
00544 pos = HERE();
00545
00546 assert(MORE());
00547 c = GETNEXT();
00548 if (c == '\\') {
00549 REQUIRE(MORE(), REG_EESCAPE);
00550 c = BACKSL | (unsigned char)GETNEXT();
00551 }
00552 switch (c) {
00553 case '.':
00554 if (p->g->cflags®_NEWLINE)
00555 nonnewline(p);
00556 else
00557 EMIT(OANY, 0);
00558 break;
00559 case '[':
00560 p_bracket(p);
00561 break;
00562 case BACKSL|'{':
00563 SETERROR(REG_BADRPT);
00564 break;
00565 case BACKSL|'(':
00566 p->g->nsub++;
00567 subno = p->g->nsub;
00568 if (subno < NPAREN)
00569 p->pbegin[subno] = HERE();
00570 EMIT(OLPAREN, subno);
00571
00572 if (MORE() && !SEETWO('\\', ')'))
00573 p_bre(p, '\\', ')');
00574 if (subno < NPAREN) {
00575 p->pend[subno] = HERE();
00576 assert(p->pend[subno] != 0);
00577 }
00578 EMIT(ORPAREN, subno);
00579 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
00580 break;
00581 case BACKSL|')':
00582 case BACKSL|'}':
00583 SETERROR(REG_EPAREN);
00584 break;
00585 case BACKSL|'1':
00586 case BACKSL|'2':
00587 case BACKSL|'3':
00588 case BACKSL|'4':
00589 case BACKSL|'5':
00590 case BACKSL|'6':
00591 case BACKSL|'7':
00592 case BACKSL|'8':
00593 case BACKSL|'9':
00594 i = (c&~BACKSL) - '0';
00595 assert(i < NPAREN);
00596 if (p->pend[i] != 0) {
00597 assert(i <= p->g->nsub);
00598 EMIT(OBACK_, i);
00599 assert(p->pbegin[i] != 0);
00600 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
00601 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
00602 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
00603 EMIT(O_BACK, i);
00604 } else
00605 SETERROR(REG_ESUBREG);
00606 p->g->backrefs = 1;
00607 break;
00608 case '*':
00609 REQUIRE(starordinary, REG_BADRPT);
00610
00611 default:
00612 ordinary(p, c &~ BACKSL);
00613 break;
00614 }
00615
00616 if (EAT('*')) {
00617
00618 INSERT(OPLUS_, pos);
00619 ASTERN(O_PLUS, pos);
00620 INSERT(OQUEST_, pos);
00621 ASTERN(O_QUEST, pos);
00622 } else if (EATTWO('\\', '{')) {
00623 count = p_count(p);
00624 if (EAT(',')) {
00625 if (MORE() && isdigit(PEEK())) {
00626 count2 = p_count(p);
00627 REQUIRE(count <= count2, REG_BADBR);
00628 } else
00629 count2 = INFINITY;
00630 } else
00631 count2 = count;
00632 repeat(p, pos, count, count2);
00633 if (!EATTWO('\\', '}')) {
00634 while (MORE() && !SEETWO('\\', '}'))
00635 NEXT();
00636 REQUIRE(MORE(), REG_EBRACE);
00637 SETERROR(REG_BADBR);
00638 }
00639 } else if (c == (unsigned char)'$')
00640 return(1);
00641
00642 return(0);
00643 }
00644
00645
00646
00647
00648
00649 static int
00650 p_count(p)
00651 register struct parse *p;
00652 {
00653 register int count = 0;
00654 register int ndigits = 0;
00655
00656 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
00657 count = count*10 + (GETNEXT() - '0');
00658 ndigits++;
00659 }
00660
00661 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
00662 return(count);
00663 }
00664
00665
00666
00667
00668
00669
00670
00671
00672 static void
00673 p_bracket(p)
00674 register struct parse *p;
00675 {
00676 register char c;
00677 register cset *cs = allocset(p);
00678 register int invert = 0;
00679
00680
00681 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
00682 EMIT(OBOW, 0);
00683 NEXTn(6);
00684 return;
00685 }
00686 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
00687 EMIT(OEOW, 0);
00688 NEXTn(6);
00689 return;
00690 }
00691
00692 if (EAT('^'))
00693 invert++;
00694 if (EAT(']'))
00695 CHadd(cs, ']');
00696 else if (EAT('-'))
00697 CHadd(cs, '-');
00698 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
00699 p_b_term(p, cs);
00700 if (EAT('-'))
00701 CHadd(cs, '-');
00702 MUSTEAT(']', REG_EBRACK);
00703
00704 if (p->error != 0)
00705 return;
00706
00707 if (p->g->cflags®_ICASE) {
00708 register int i;
00709 register int ci;
00710
00711 for (i = p->g->csetsize - 1; i >= 0; i--)
00712 if (CHIN(cs, i) && isalpha(i)) {
00713 ci = othercase(i);
00714 if (ci != i)
00715 CHadd(cs, ci);
00716 }
00717 if (cs->multis != NULL)
00718 mccase(p, cs);
00719 }
00720 if (invert) {
00721 register int i;
00722
00723 for (i = p->g->csetsize - 1; i >= 0; i--)
00724 if (CHIN(cs, i))
00725 CHsub(cs, i);
00726 else
00727 CHadd(cs, i);
00728 if (p->g->cflags®_NEWLINE)
00729 CHsub(cs, '\n');
00730 if (cs->multis != NULL)
00731 mcinvert(p, cs);
00732 }
00733
00734 assert(cs->multis == NULL);
00735
00736 if (nch(p, cs) == 1) {
00737 ordinary(p, firstch(p, cs));
00738 freeset(p, cs);
00739 } else
00740 EMIT(OANYOF, freezeset(p, cs));
00741 }
00742
00743
00744
00745
00746
00747 static void
00748 p_b_term(p, cs)
00749 register struct parse *p;
00750 register cset *cs;
00751 {
00752 register char c;
00753 register char start, finish;
00754 register int i;
00755
00756
00757 switch ((MORE()) ? PEEK() : '\0') {
00758 case '[':
00759 c = (MORE2()) ? PEEK2() : '\0';
00760 break;
00761 case '-':
00762 SETERROR(REG_ERANGE);
00763 return;
00764 break;
00765 default:
00766 c = '\0';
00767 break;
00768 }
00769
00770 switch (c) {
00771 case ':':
00772 NEXT2();
00773 REQUIRE(MORE(), REG_EBRACK);
00774 c = PEEK();
00775 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
00776 p_b_cclass(p, cs);
00777 REQUIRE(MORE(), REG_EBRACK);
00778 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
00779 break;
00780 case '=':
00781 NEXT2();
00782 REQUIRE(MORE(), REG_EBRACK);
00783 c = PEEK();
00784 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
00785 p_b_eclass(p, cs);
00786 REQUIRE(MORE(), REG_EBRACK);
00787 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
00788 break;
00789 default:
00790
00791 start = p_b_symbol(p);
00792 if (SEE('-') && MORE2() && PEEK2() != ']') {
00793
00794 NEXT();
00795 if (EAT('-'))
00796 finish = '-';
00797 else
00798 finish = p_b_symbol(p);
00799 } else
00800 finish = start;
00801
00802 REQUIRE(start <= finish, REG_ERANGE);
00803 for (i = start; i <= finish; i++)
00804 CHadd(cs, i);
00805 break;
00806 }
00807 }
00808
00809
00810
00811
00812
00813 static void
00814 p_b_cclass(p, cs)
00815 register struct parse *p;
00816 register cset *cs;
00817 {
00818 register char *sp = p->next;
00819 register struct cclass *cp;
00820 register size_t len;
00821 register char *u;
00822 register char c;
00823
00824 while (MORE() && isalpha(PEEK()))
00825 NEXT();
00826 len = p->next - sp;
00827 for (cp = cclasses; cp->name != NULL; cp++)
00828 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
00829 break;
00830 if (cp->name == NULL) {
00831
00832 SETERROR(REG_ECTYPE);
00833 return;
00834 }
00835
00836 u = cp->chars;
00837 while ((c = *u++) != '\0')
00838 CHadd(cs, c);
00839 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
00840 MCadd(p, cs, u);
00841 }
00842
00843
00844
00845
00846
00847
00848
00849 static void
00850 p_b_eclass(p, cs)
00851 register struct parse *p;
00852 register cset *cs;
00853 {
00854 register char c;
00855
00856 c = p_b_coll_elem(p, '=');
00857 CHadd(cs, c);
00858 }
00859
00860
00861
00862
00863
00864 static char
00865 p_b_symbol(p)
00866 register struct parse *p;
00867 {
00868 register char value;
00869
00870 REQUIRE(MORE(), REG_EBRACK);
00871 if (!EATTWO('[', '.'))
00872 return(GETNEXT());
00873
00874
00875 value = p_b_coll_elem(p, '.');
00876 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
00877 return(value);
00878 }
00879
00880
00881
00882
00883
00884 static char
00885 p_b_coll_elem(p, endc)
00886 register struct parse *p;
00887 int endc;
00888 {
00889 register char *sp = p->next;
00890 register struct cname *cp;
00891 register int len;
00892 register char c;
00893
00894 while (MORE() && !SEETWO(endc, ']'))
00895 NEXT();
00896 if (!MORE()) {
00897 SETERROR(REG_EBRACK);
00898 return(0);
00899 }
00900 len = p->next - sp;
00901 for (cp = cnames; cp->name != NULL; cp++)
00902 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
00903 return(cp->code);
00904 if (len == 1)
00905 return(*sp);
00906 SETERROR(REG_ECOLLATE);
00907 return(0);
00908 }
00909
00910
00911
00912
00913
00914 static char
00915 othercase(ch)
00916 int ch;
00917 {
00918 assert(isalpha(ch));
00919 if (isupper(ch))
00920 return(tolower(ch));
00921 else if (islower(ch))
00922 return(toupper(ch));
00923 else
00924 return(ch);
00925 }
00926
00927
00928
00929
00930
00931
00932
00933 static void
00934 bothcases(p, ch)
00935 register struct parse *p;
00936 int ch;
00937 {
00938 register char *oldnext = p->next;
00939 register char *oldend = p->end;
00940 char bracket[3];
00941
00942 assert(othercase(ch) != ch);
00943 p->next = bracket;
00944 p->end = bracket+2;
00945 bracket[0] = ch;
00946 bracket[1] = ']';
00947 bracket[2] = '\0';
00948 p_bracket(p);
00949 assert(p->next == bracket+2);
00950 p->next = oldnext;
00951 p->end = oldend;
00952 }
00953
00954
00955
00956
00957
00958 static void
00959 ordinary(p, ch)
00960 register struct parse *p;
00961 register int ch;
00962 {
00963 register cat_t *cap = p->g->categories;
00964
00965 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
00966 bothcases(p, ch);
00967 else {
00968 EMIT(OCHAR, (unsigned char)ch);
00969 if (cap[ch] == 0)
00970 cap[ch] = p->g->ncategories++;
00971 }
00972 }
00973
00974
00975
00976
00977
00978
00979
00980 static void
00981 nonnewline(p)
00982 register struct parse *p;
00983 {
00984 register char *oldnext = p->next;
00985 register char *oldend = p->end;
00986 char bracket[4];
00987
00988 p->next = bracket;
00989 p->end = bracket+3;
00990 bracket[0] = '^';
00991 bracket[1] = '\n';
00992 bracket[2] = ']';
00993 bracket[3] = '\0';
00994 p_bracket(p);
00995 assert(p->next == bracket+3);
00996 p->next = oldnext;
00997 p->end = oldend;
00998 }
00999
01000
01001
01002
01003
01004 static void
01005 repeat(p, start, from, to)
01006 register struct parse *p;
01007 sopno start;
01008 int from;
01009 int to;
01010 {
01011 register sopno finish = HERE();
01012 # define N 2
01013 # define INF 3
01014 # define REP(f, t) ((f)*8 + (t))
01015 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
01016 register sopno copy;
01017
01018 if (p->error != 0)
01019 return;
01020
01021 assert(from <= to);
01022
01023 switch (REP(MAP(from), MAP(to))) {
01024 case REP(0, 0):
01025 DROP(finish-start);
01026 break;
01027 case REP(0, 1):
01028 case REP(0, N):
01029 case REP(0, INF):
01030
01031 INSERT(OCH_, start);
01032 repeat(p, start+1, 1, to);
01033 ASTERN(OOR1, start);
01034 AHEAD(start);
01035 EMIT(OOR2, 0);
01036 AHEAD(THERE());
01037 ASTERN(O_CH, THERETHERE());
01038 break;
01039 case REP(1, 1):
01040
01041 break;
01042 case REP(1, N):
01043
01044 INSERT(OCH_, start);
01045 ASTERN(OOR1, start);
01046 AHEAD(start);
01047 EMIT(OOR2, 0);
01048 AHEAD(THERE());
01049 ASTERN(O_CH, THERETHERE());
01050 copy = dupl(p, start+1, finish+1);
01051 assert(copy == finish+4);
01052 repeat(p, copy, 1, to-1);
01053 break;
01054 case REP(1, INF):
01055 INSERT(OPLUS_, start);
01056 ASTERN(O_PLUS, start);
01057 break;
01058 case REP(N, N):
01059 copy = dupl(p, start, finish);
01060 repeat(p, copy, from-1, to-1);
01061 break;
01062 case REP(N, INF):
01063 copy = dupl(p, start, finish);
01064 repeat(p, copy, from-1, to);
01065 break;
01066 default:
01067 SETERROR(REG_ASSERT);
01068 break;
01069 }
01070 }
01071
01072
01073
01074
01075
01076 static int
01077 seterr(p, e)
01078 register struct parse *p;
01079 int e;
01080 {
01081 if (p->error == 0)
01082 p->error = e;
01083 p->next = nuls;
01084 p->end = nuls;
01085 return(0);
01086 }
01087
01088
01089
01090
01091
01092 static cset *
01093 allocset(p)
01094 register struct parse *p;
01095 {
01096 register int no = p->g->ncsets++;
01097 register size_t nc;
01098 register size_t nbytes;
01099 register cset *cs;
01100 register size_t css = (size_t)p->g->csetsize;
01101 register int i;
01102
01103 if (no >= p->ncsalloc) {
01104 p->ncsalloc += CHAR_BIT;
01105 nc = p->ncsalloc;
01106 assert(nc % CHAR_BIT == 0);
01107 nbytes = nc / CHAR_BIT * css;
01108 if (p->g->sets == NULL)
01109 p->g->sets = (cset *)malloc(nc * sizeof(cset));
01110 else
01111 p->g->sets = (cset *)realloc((char *)p->g->sets,
01112 nc * sizeof(cset));
01113 if (p->g->setbits == NULL)
01114 p->g->setbits = (uch *)malloc(nbytes);
01115 else {
01116 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
01117 nbytes);
01118
01119 for (i = 0; i < no; i++)
01120 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
01121 }
01122 if (p->g->sets != NULL && p->g->setbits != NULL)
01123 (void) memset((char *)p->g->setbits + (nbytes - css),
01124 0, css);
01125 else {
01126 no = 0;
01127 SETERROR(REG_ESPACE);
01128
01129 }
01130 }
01131
01132 assert(p->g->sets != NULL);
01133 cs = &p->g->sets[no];
01134 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
01135 cs->mask = 1 << ((no) % CHAR_BIT);
01136 cs->hash = 0;
01137 cs->smultis = 0;
01138 cs->multis = NULL;
01139
01140 return(cs);
01141 }
01142
01143
01144
01145
01146
01147 static void
01148 freeset(p, cs)
01149 register struct parse *p;
01150 register cset *cs;
01151 {
01152 register int i;
01153 register cset *top = &p->g->sets[p->g->ncsets];
01154 register size_t css = (size_t)p->g->csetsize;
01155
01156 for (i = 0; i < css; i++)
01157 CHsub(cs, i);
01158 if (cs == top-1)
01159 p->g->ncsets--;
01160 }
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172 static int
01173 freezeset(p, cs)
01174 register struct parse *p;
01175 register cset *cs;
01176 {
01177 register uch h = cs->hash;
01178 register int i;
01179 register cset *top = &p->g->sets[p->g->ncsets];
01180 register cset *cs2;
01181 register size_t css = (size_t)p->g->csetsize;
01182
01183
01184 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
01185 if (cs2->hash == h && cs2 != cs) {
01186
01187 for (i = 0; i < css; i++)
01188 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
01189 break;
01190 if (i == css)
01191 break;
01192 }
01193
01194 if (cs2 < top) {
01195 freeset(p, cs);
01196 cs = cs2;
01197 }
01198
01199 return((int)(cs - p->g->sets));
01200 }
01201
01202
01203
01204
01205
01206 static int
01207 firstch(p, cs)
01208 register struct parse *p;
01209 register cset *cs;
01210 {
01211 register int i;
01212 register size_t css = (size_t)p->g->csetsize;
01213
01214 for (i = 0; i < css; i++)
01215 if (CHIN(cs, i))
01216 return((char)i);
01217 assert(never);
01218 return(0);
01219 }
01220
01221
01222
01223
01224
01225 static int
01226 nch(p, cs)
01227 register struct parse *p;
01228 register cset *cs;
01229 {
01230 register int i;
01231 register size_t css = (size_t)p->g->csetsize;
01232 register int n = 0;
01233
01234 for (i = 0; i < css; i++)
01235 if (CHIN(cs, i))
01236 n++;
01237 return(n);
01238 }
01239
01240
01241
01242
01243
01244
01245 static void
01246 mcadd(p, cs, cp)
01247 register struct parse *p;
01248 register cset *cs;
01249 register char *cp;
01250 {
01251 register size_t oldend = cs->smultis;
01252
01253 cs->smultis += strlen(cp) + 1;
01254 if (cs->multis == NULL)
01255 cs->multis = malloc(cs->smultis);
01256 else
01257 cs->multis = realloc(cs->multis, cs->smultis);
01258 if (cs->multis == NULL) {
01259 SETERROR(REG_ESPACE);
01260 return;
01261 }
01262
01263 (void) strcpy(cs->multis + oldend - 1, cp);
01264 cs->multis[cs->smultis - 1] = '\0';
01265 }
01266
01267
01268
01269
01270
01271 static void
01272 mcsub(cs, cp)
01273 register cset *cs;
01274 register char *cp;
01275 {
01276 register char *fp = mcfind(cs, cp);
01277 register size_t len = strlen(fp);
01278
01279 assert(fp != NULL);
01280 (void) memmove(fp, fp + len + 1,
01281 cs->smultis - (fp + len + 1 - cs->multis));
01282 cs->smultis -= len;
01283
01284 if (cs->smultis == 0) {
01285 free(cs->multis);
01286 cs->multis = NULL;
01287 return;
01288 }
01289
01290 cs->multis = realloc(cs->multis, cs->smultis);
01291 assert(cs->multis != NULL);
01292 }
01293
01294
01295
01296
01297
01298 static int
01299 mcin(cs, cp)
01300 register cset *cs;
01301 register char *cp;
01302 {
01303 return(mcfind(cs, cp) != NULL);
01304 }
01305
01306
01307
01308
01309
01310 static char *
01311 mcfind(cs, cp)
01312 register cset *cs;
01313 register char *cp;
01314 {
01315 register char *p;
01316
01317 if (cs->multis == NULL)
01318 return(NULL);
01319 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
01320 if (strcmp(cp, p) == 0)
01321 return(p);
01322 return(NULL);
01323 }
01324
01325
01326
01327
01328
01329
01330
01331
01332 static void
01333 mcinvert(p, cs)
01334 register struct parse *p;
01335 register cset *cs;
01336 {
01337 assert(cs->multis == NULL);
01338 }
01339
01340
01341
01342
01343
01344
01345
01346
01347 static void
01348 mccase(p, cs)
01349 register struct parse *p;
01350 register cset *cs;
01351 {
01352 assert(cs->multis == NULL);
01353 }
01354
01355
01356
01357
01358
01359 static int
01360 isinsets(g, c)
01361 register struct re_guts *g;
01362 int c;
01363 {
01364 register uch *col;
01365 register int i;
01366 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
01367 register unsigned uc = (unsigned char)c;
01368
01369 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
01370 if (col[uc] != 0)
01371 return(1);
01372 return(0);
01373 }
01374
01375
01376
01377
01378
01379 static int
01380 samesets(g, c1, c2)
01381 register struct re_guts *g;
01382 int c1;
01383 int c2;
01384 {
01385 register uch *col;
01386 register int i;
01387 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
01388 register unsigned uc1 = (unsigned char)c1;
01389 register unsigned uc2 = (unsigned char)c2;
01390
01391 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
01392 if (col[uc1] != col[uc2])
01393 return(0);
01394 return(1);
01395 }
01396
01397
01398
01399
01400
01401 static void
01402 categorize(p, g)
01403 struct parse *p;
01404 register struct re_guts *g;
01405 {
01406 register cat_t *cats = g->categories;
01407 register int c;
01408 register int c2;
01409 register cat_t cat;
01410
01411
01412 if (p->error != 0)
01413 return;
01414
01415 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
01416 if (cats[c] == 0 && isinsets(g, c)) {
01417 cat = g->ncategories++;
01418 cats[c] = cat;
01419 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
01420 if (cats[c2] == 0 && samesets(g, c, c2))
01421 cats[c2] = cat;
01422 }
01423 }
01424
01425
01426
01427
01428
01429 static sopno
01430 dupl(p, start, finish)
01431 register struct parse *p;
01432 sopno start;
01433 sopno finish;
01434 {
01435 register sopno ret = HERE();
01436 register sopno len = finish - start;
01437
01438 assert(finish >= start);
01439 if (len == 0)
01440 return(ret);
01441 enlarge(p, p->ssize + len);
01442 assert(p->ssize >= p->slen + len);
01443 (void) memcpy((char *)(p->strip + p->slen),
01444 (char *)(p->strip + start), (size_t)len*sizeof(sop));
01445 p->slen += len;
01446 return(ret);
01447 }
01448
01449
01450
01451
01452
01453
01454
01455
01456
01457 static void
01458 doemit(p, op, opnd)
01459 register struct parse *p;
01460 sop op;
01461 size_t opnd;
01462 {
01463
01464 if (p->error != 0)
01465 return;
01466
01467
01468 assert(opnd < 1<<OPSHIFT);
01469
01470
01471 if (p->slen >= p->ssize)
01472 enlarge(p, (p->ssize+1) / 2 * 3);
01473 assert(p->slen < p->ssize);
01474
01475
01476 p->strip[p->slen++] = SOP(op, opnd);
01477 }
01478
01479
01480
01481
01482
01483 static void
01484 doinsert(p, op, opnd, pos)
01485 register struct parse *p;
01486 sop op;
01487 size_t opnd;
01488 sopno pos;
01489 {
01490 register sopno sn;
01491 register sop s;
01492 register int i;
01493
01494
01495 if (p->error != 0)
01496 return;
01497
01498 sn = HERE();
01499 EMIT(op, opnd);
01500 assert(HERE() == sn+1);
01501 s = p->strip[sn];
01502
01503
01504 assert(pos > 0);
01505 for (i = 1; i < NPAREN; i++) {
01506 if (p->pbegin[i] >= pos) {
01507 p->pbegin[i]++;
01508 }
01509 if (p->pend[i] >= pos) {
01510 p->pend[i]++;
01511 }
01512 }
01513
01514 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
01515 (HERE()-pos-1)*sizeof(sop));
01516 p->strip[pos] = s;
01517 }
01518
01519
01520
01521
01522
01523 static void
01524 dofwd(p, pos, value)
01525 register struct parse *p;
01526 register sopno pos;
01527 sop value;
01528 {
01529
01530 if (p->error != 0)
01531 return;
01532
01533 assert(value < 1<<OPSHIFT);
01534 p->strip[pos] = OP(p->strip[pos]) | value;
01535 }
01536
01537
01538
01539
01540
01541 static void
01542 enlarge(p, size)
01543 register struct parse *p;
01544 register sopno size;
01545 {
01546 register sop *sp;
01547
01548 if (p->ssize >= size)
01549 return;
01550
01551 sp = (sop *)realloc(p->strip, size*sizeof(sop));
01552 if (sp == NULL) {
01553 SETERROR(REG_ESPACE);
01554 return;
01555 }
01556 p->strip = sp;
01557 p->ssize = size;
01558 }
01559
01560
01561
01562
01563
01564 static void
01565 stripsnug(p, g)
01566 register struct parse *p;
01567 register struct re_guts *g;
01568 {
01569 g->nstates = p->slen;
01570 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
01571 if (g->strip == NULL) {
01572 SETERROR(REG_ESPACE);
01573 g->strip = p->strip;
01574 }
01575 }
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587 static void
01588 findmust(p, g)
01589 struct parse *p;
01590 register struct re_guts *g;
01591 {
01592 register sop *scan;
01593 sop *start;
01594 register sop *newstart;
01595 register sopno newlen;
01596 register sop s;
01597 register char *cp;
01598 register sopno i;
01599
01600
01601 if (p->error != 0)
01602 return;
01603
01604
01605 newlen = 0;
01606 scan = g->strip + 1;
01607 do {
01608 s = *scan++;
01609 switch (OP(s)) {
01610 case OCHAR:
01611 if (newlen == 0)
01612 newstart = scan - 1;
01613 newlen++;
01614 break;
01615 case OPLUS_:
01616 case OLPAREN:
01617 case ORPAREN:
01618 break;
01619 case OQUEST_:
01620 case OCH_:
01621 scan--;
01622 do {
01623 scan += OPND(s);
01624 s = *scan;
01625
01626 if (OP(s) != O_QUEST && OP(s) != O_CH &&
01627 OP(s) != OOR2) {
01628 g->iflags |= BAD;
01629 return;
01630 }
01631 } while (OP(s) != O_QUEST && OP(s) != O_CH);
01632
01633 default:
01634 if (newlen > g->mlen) {
01635 start = newstart;
01636 g->mlen = newlen;
01637 }
01638 newlen = 0;
01639 break;
01640 }
01641 } while (OP(s) != OEND);
01642
01643 if (g->mlen == 0)
01644 return;
01645
01646
01647 g->must = malloc((size_t)g->mlen + 1);
01648 if (g->must == NULL) {
01649 g->mlen = 0;
01650 return;
01651 }
01652 cp = g->must;
01653 scan = start;
01654 for (i = g->mlen; i > 0; i--) {
01655 while (OP(s = *scan++) != OCHAR)
01656 continue;
01657 assert(cp < g->must + g->mlen);
01658 *cp++ = (char)OPND(s);
01659 }
01660 assert(cp == g->must + g->mlen);
01661 *cp++ = '\0';
01662 }
01663
01664
01665
01666
01667
01668 static sopno
01669 pluscount(p, g)
01670 struct parse *p;
01671 register struct re_guts *g;
01672 {
01673 register sop *scan;
01674 register sop s;
01675 register sopno plusnest = 0;
01676 register sopno maxnest = 0;
01677
01678 if (p->error != 0)
01679 return(0);
01680
01681 scan = g->strip + 1;
01682 do {
01683 s = *scan++;
01684 switch (OP(s)) {
01685 case OPLUS_:
01686 plusnest++;
01687 break;
01688 case O_PLUS:
01689 if (plusnest > maxnest)
01690 maxnest = plusnest;
01691 plusnest--;
01692 break;
01693 }
01694 } while (OP(s) != OEND);
01695 if (plusnest != 0)
01696 g->iflags |= BAD;
01697 return(maxnest);
01698 }