Tip 15: GCC Regex Library

From Vlsiwiki
Jump to: navigation, search

The C Regex library is standard with GCC and uses POSIX style regular expressions. This is opposed to Perl style regular expressions which are more common among scripting languages.

To include regex in C (or C++) you must include:

#include <sys/types.h>
#include <regex.h>

There are 4 available command:

int    regcomp(regex_t *, const char *, int);
int    regexec(const regex_t *, const char *, size_t, regmatch_t[], int);
size_t regerror(int, const regex_t *, char *, size_t);
void   regfree(regex_t *);

Beyond that, read the man page [here] or the GCC man pages [here]

An example is below:

#include <stdio.h>
#include <regex.h>
int main(int argc, char **argv)
{
  struct {
    const char *input;
    int expect;
  } tests[] = {
    /* should match */
    { "state : q0", 1 },
    { "state: q0",  1 },
    { "state:q0s",  1 },
    /* should not match */
    { "#state :q0",  0 },
    { "state q0",    0 },
    { "# state :q0", 0 },
  };
  int i;
  regex_t start_state;
  const char *pattern = "^[ \\t]*(state)[ \\t]*:.*$";
  if (regcomp(&start_state, pattern, REG_EXTENDED)) {
    fprintf(stderr, "%s: bad pattern: '%s'\n", argv[0], pattern);
    return 1;
  }
  for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
    int status = regexec(&start_state, tests[i].input, 0, NULL, 0);
    printf("%s: %s (%s)\n", tests[i].input,
                            status == 0 ? "match" : "no match",
                            !status == !!tests[i].expect
                              ? "PASS" : "FAIL");
  }
  return 0;
}

Returns:

state : q0: match (PASS)
state: q0: match (PASS)
state:q0s: match (PASS)
#state :q0: no match (PASS)
state q0: no match (PASS)
# state :q0: no match (PASS)

Example 2:

#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
int main(int argc, char **argv) {
  int r;
  regex_t reg;
  ++argv;  /* Danger! */
  if (r = regcomp(&reg, *argv, REG_NOSUB|REG_EXTENDED)) {
    char errbuf[1024];
    regerror(r, &reg, errbuf, sizeof(errbuf));
    printf("error: %s\n", errbuf);
    return 1;
  }
  for (++argv; *argv; ++argv) {
    if (regexec(&reg, *argv, 0, NULL, 0) == REG_NOMATCH)
      continue;
    printf("matched: %s\n", *argv);
  }
  return 0;
}

This allows you to run an expression like:

./regex '[ [:digit:] ]'   56789  alpha  "   "  foo12bar
(with no spaces between the [ [ and the ] ] )