diff options
author | Jérémy Zurcher <jeremy@asynk.ch> | 2013-11-23 03:05:05 +0100 |
---|---|---|
committer | Jérémy Zurcher <jeremy@asynk.ch> | 2013-11-23 03:05:05 +0100 |
commit | 013625545c75dd63e62dc4470b5b90ac428e735d (patch) | |
tree | 60aad636287671f5a1a2ed122de26c1dd19a6b6c /eo_tokenizer.rl | |
download | eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.zip eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.tar.gz |
initial commit: support C/C++ comments
Diffstat (limited to 'eo_tokenizer.rl')
-rw-r--r-- | eo_tokenizer.rl | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/eo_tokenizer.rl b/eo_tokenizer.rl new file mode 100644 index 0000000..b10218d --- /dev/null +++ b/eo_tokenizer.rl @@ -0,0 +1,165 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "eo_tokenizer.h" + +%%{ + machine common; + + action inc_line { + toknz->current_line += 1; + } + + action save_start_line { + toknz->token_start_line = toknz->current_line; + } + + cr = '\n'; + cr_neg = "[^\n]"; + ws = "[ \t\r]"; + newline = cr @inc_line; + ignore = (0x00..0x20 - cr)* newline?; + + c_comment = "/*" ( any | '\n' @inc_line )* :>> "*/"; + cpp_comment = "//" (any - cr)* newline; + comment = ( c_comment | cpp_comment ) > save_start_line; + +}%% + +%%{ + machine eo_tokenizer; + include common; + + access toknz->; + variable p toknz->p; + variable pe toknz->pe; + variable eof toknz->eof; + + action show_comment { + DBG("comment %03d:%03d", toknz->token_start_line, toknz->current_line); + } + + action show_ignore { + DBG("ignore %d (%d)", toknz->current_line, (toknz->te - toknz->ts)); + } + + action error { + /* TODO find a more elegant way, + * -> cant't use $err() or $!error on scanner + * -> fgoto another machine to eat the line, ts is set to NULL + */ + char buf[BUFSIZE]; + char *s, *d; + for (s = fpc, d = buf; (s <= toknz->pe); s++) + { + if ( (int)*s == 13 || (int)*s == 10) + break; + *d++ = *s; + } + *d = '\0'; + ERR("error line %d : %s...", toknz->current_line, buf); + toknz->cs = eo_tokenizer_error; + fbreak; + } + + main := |* + ignore => show_ignore; + comment => show_comment; + any => error; + *|; + +}%% + +%% write data; + +Eina_Bool +eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source) +{ + INF("tokenize %s...", source); + toknz->source = source; + + FILE *stream; + + int done = 0; + int have = 0; + + stream = fopen(toknz->source, "r"); + if (!stream) + { + ERR("unable to read in %s", toknz->source); + return EINA_FALSE; + } + + %% write init; + + while (!done) + { + int len; + int space; + + toknz->p = toknz->buf + have; + space = BUFSIZE - have; + + if (space == 0) + { + ERR("out of buffer space"); + fclose(stream); + exit(EXIT_FAILURE); + } + + len = fread(toknz->p, 1, space, stream); + if (len == 0) break; + toknz->pe = toknz->p + len; + + if (len < space) + { + toknz->eof = toknz->pe; + done = 1; + } + + %% write exec; + + if ( toknz->cs == %%{ write error; }%% ) + break; + + if ( toknz->ts == 0 ) + have = 0; + else + { + /* There is a prefix to preserve, shift it over. */ + have = toknz->pe - toknz->ts; + memmove( toknz->buf, toknz->ts, have); + toknz->te = toknz->buf + (toknz->te - toknz->ts); + toknz->ts = toknz->buf; + } + } + + fclose(stream); + + return EINA_TRUE; +} + +Eo_Tokenizer* +eo_tokenizer_get() +{ + Eo_Tokenizer *toknz = calloc(1, sizeof(Eo_Tokenizer)); + if (!toknz) return NULL; + + toknz->ts = NULL; + toknz->te = NULL; + /* toknz->top = 0; */ + toknz->source = NULL; + toknz->max_nesting = 10; + toknz->current_line = 1; + toknz->current_nesting = 0; + toknz->token_start_line = 0; + + return toknz; +} + +void +eo_tokenizer_free(Eo_Tokenizer *toknz) +{ + free(toknz); +} + |