Difference between revisions of "Tip 15: GCC Regex Library"
From Vlsiwiki
(10 intermediate revisions by the same user not shown) | |||
Line 15: | Line 15: | ||
void regfree(regex_t *); | void regfree(regex_t *); | ||
− | Beyond that, read the man page | + | Beyond that, read the man page [http://pubs.opengroup.org/onlinepubs/007908799/xsh/regex.h.html[here]] or the GCC man pages [http://www.gnu.org/s/libc/manual/html_node/Regular-Expressions.html[here]] |
+ | |||
+ | An example is below: | ||
+ | |||
+ | #include <stdio.h> | ||
+ | #include <regex.h> | ||
+ | int main(int argc, char **argv) | ||
+ | { | ||
+ | struct { | ||
+ | const char *input; | ||
+ | int expect; | ||
+ | } tests[] = { | ||
+ | /* should match */ | ||
+ | { "state : q0", 1 }, | ||
+ | { "state: q0", 1 }, | ||
+ | { "state:q0s", 1 }, | ||
+ | /* should not match */ | ||
+ | { "#state :q0", 0 }, | ||
+ | { "state q0", 0 }, | ||
+ | { "# state :q0", 0 }, | ||
+ | }; | ||
+ | int i; | ||
+ | regex_t start_state; | ||
+ | const char *pattern = "^[ \\t]*(state)[ \\t]*:.*$"; | ||
+ | if (regcomp(&start_state, pattern, REG_EXTENDED)) { | ||
+ | fprintf(stderr, "%s: bad pattern: '%s'\n", argv[0], pattern); | ||
+ | return 1; | ||
+ | } | ||
+ | for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { | ||
+ | int status = regexec(&start_state, tests[i].input, 0, NULL, 0); | ||
+ | printf("%s: %s (%s)\n", tests[i].input, | ||
+ | status == 0 ? "match" : "no match", | ||
+ | !status == !!tests[i].expect | ||
+ | ? "PASS" : "FAIL"); | ||
+ | } | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | Returns: | ||
+ | |||
+ | state : q0: match (PASS) | ||
+ | state: q0: match (PASS) | ||
+ | state:q0s: match (PASS) | ||
+ | #state :q0: no match (PASS) | ||
+ | state q0: no match (PASS) | ||
+ | # state :q0: no match (PASS) | ||
+ | |||
+ | Example 2: | ||
+ | |||
+ | #include <sys/types.h> | ||
+ | #include <regex.h> | ||
+ | #include <stdio.h> | ||
+ | int main(int argc, char **argv) { | ||
+ | int r; | ||
+ | regex_t reg; | ||
+ | ++argv; /* Danger! */ | ||
+ | if (r = regcomp(®, *argv, REG_NOSUB|REG_EXTENDED)) { | ||
+ | char errbuf[1024]; | ||
+ | regerror(r, ®, errbuf, sizeof(errbuf)); | ||
+ | printf("error: %s\n", errbuf); | ||
+ | return 1; | ||
+ | } | ||
+ | for (++argv; *argv; ++argv) { | ||
+ | if (regexec(®, *argv, 0, NULL, 0) == REG_NOMATCH) | ||
+ | continue; | ||
+ | printf("matched: %s\n", *argv); | ||
+ | } | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | This allows you to run an expression like: | ||
+ | ./regex '[ [:digit:] ]' 56789 alpha " " foo12bar | ||
+ | (with no spaces between the [ [ and the ] ] ) |
Latest revision as of 01:16, 9 March 2011
The C Regex library is standard with GCC and uses POSIX style regular expressions. This is opposed to Perl style regular expressions which are more common among scripting languages.
To include regex in C (or C++) you must include:
#include <sys/types.h> #include <regex.h>
There are 4 available command:
int regcomp(regex_t *, const char *, int); int regexec(const regex_t *, const char *, size_t, regmatch_t[], int); size_t regerror(int, const regex_t *, char *, size_t); void regfree(regex_t *);
Beyond that, read the man page [here] or the GCC man pages [here]
An example is below:
#include <stdio.h> #include <regex.h> int main(int argc, char **argv) { struct { const char *input; int expect; } tests[] = { /* should match */ { "state : q0", 1 }, { "state: q0", 1 }, { "state:q0s", 1 }, /* should not match */ { "#state :q0", 0 }, { "state q0", 0 }, { "# state :q0", 0 }, }; int i; regex_t start_state; const char *pattern = "^[ \\t]*(state)[ \\t]*:.*$"; if (regcomp(&start_state, pattern, REG_EXTENDED)) { fprintf(stderr, "%s: bad pattern: '%s'\n", argv[0], pattern); return 1; } for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { int status = regexec(&start_state, tests[i].input, 0, NULL, 0); printf("%s: %s (%s)\n", tests[i].input, status == 0 ? "match" : "no match", !status == !!tests[i].expect ? "PASS" : "FAIL"); } return 0; }
Returns:
state : q0: match (PASS) state: q0: match (PASS) state:q0s: match (PASS) #state :q0: no match (PASS) state q0: no match (PASS) # state :q0: no match (PASS)
Example 2:
#include <sys/types.h> #include <regex.h> #include <stdio.h> int main(int argc, char **argv) { int r; regex_t reg; ++argv; /* Danger! */ if (r = regcomp(®, *argv, REG_NOSUB|REG_EXTENDED)) { char errbuf[1024]; regerror(r, ®, errbuf, sizeof(errbuf)); printf("error: %s\n", errbuf); return 1; } for (++argv; *argv; ++argv) { if (regexec(®, *argv, 0, NULL, 0) == REG_NOMATCH) continue; printf("matched: %s\n", *argv); } return 0; }
This allows you to run an expression like:
./regex '[ [:digit:] ]' 56789 alpha " " foo12bar (with no spaces between the [ [ and the ] ] )