summaryrefslogtreecommitdiffstats
path: root/eo_tokenizer.rl
diff options
context:
space:
mode:
authorJérémy Zurcher <jeremy@asynk.ch>2013-11-23 03:05:05 +0100
committerJérémy Zurcher <jeremy@asynk.ch>2013-11-23 03:05:05 +0100
commit013625545c75dd63e62dc4470b5b90ac428e735d (patch)
tree60aad636287671f5a1a2ed122de26c1dd19a6b6c /eo_tokenizer.rl
downloadeo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.zip
eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.tar.gz
initial commit: support C/C++ comments
Diffstat (limited to 'eo_tokenizer.rl')
-rw-r--r--eo_tokenizer.rl165
1 files changed, 165 insertions, 0 deletions
diff --git a/eo_tokenizer.rl b/eo_tokenizer.rl
new file mode 100644
index 0000000..b10218d
--- /dev/null
+++ b/eo_tokenizer.rl
@@ -0,0 +1,165 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "eo_tokenizer.h"
+
+%%{
+ machine common;
+
+ action inc_line {
+ toknz->current_line += 1;
+ }
+
+ action save_start_line {
+ toknz->token_start_line = toknz->current_line;
+ }
+
+ cr = '\n';
+ cr_neg = "[^\n]";
+ ws = "[ \t\r]";
+ newline = cr @inc_line;
+ ignore = (0x00..0x20 - cr)* newline?;
+
+ c_comment = "/*" ( any | '\n' @inc_line )* :>> "*/";
+ cpp_comment = "//" (any - cr)* newline;
+ comment = ( c_comment | cpp_comment ) > save_start_line;
+
+}%%
+
+%%{
+ machine eo_tokenizer;
+ include common;
+
+ access toknz->;
+ variable p toknz->p;
+ variable pe toknz->pe;
+ variable eof toknz->eof;
+
+ action show_comment {
+ DBG("comment %03d:%03d", toknz->token_start_line, toknz->current_line);
+ }
+
+ action show_ignore {
+ DBG("ignore %d (%d)", toknz->current_line, (toknz->te - toknz->ts));
+ }
+
+ action error {
+ /* TODO find a more elegant way,
+ * -> cant't use $err() or $!error on scanner
+ * -> fgoto another machine to eat the line, ts is set to NULL
+ */
+ char buf[BUFSIZE];
+ char *s, *d;
+ for (s = fpc, d = buf; (s <= toknz->pe); s++)
+ {
+ if ( (int)*s == 13 || (int)*s == 10)
+ break;
+ *d++ = *s;
+ }
+ *d = '\0';
+ ERR("error line %d : %s...", toknz->current_line, buf);
+ toknz->cs = eo_tokenizer_error;
+ fbreak;
+ }
+
+ main := |*
+ ignore => show_ignore;
+ comment => show_comment;
+ any => error;
+ *|;
+
+}%%
+
+%% write data;
+
+Eina_Bool
+eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source)
+{
+ INF("tokenize %s...", source);
+ toknz->source = source;
+
+ FILE *stream;
+
+ int done = 0;
+ int have = 0;
+
+ stream = fopen(toknz->source, "r");
+ if (!stream)
+ {
+ ERR("unable to read in %s", toknz->source);
+ return EINA_FALSE;
+ }
+
+ %% write init;
+
+ while (!done)
+ {
+ int len;
+ int space;
+
+ toknz->p = toknz->buf + have;
+ space = BUFSIZE - have;
+
+ if (space == 0)
+ {
+ ERR("out of buffer space");
+ fclose(stream);
+ exit(EXIT_FAILURE);
+ }
+
+ len = fread(toknz->p, 1, space, stream);
+ if (len == 0) break;
+ toknz->pe = toknz->p + len;
+
+ if (len < space)
+ {
+ toknz->eof = toknz->pe;
+ done = 1;
+ }
+
+ %% write exec;
+
+ if ( toknz->cs == %%{ write error; }%% )
+ break;
+
+ if ( toknz->ts == 0 )
+ have = 0;
+ else
+ {
+ /* There is a prefix to preserve, shift it over. */
+ have = toknz->pe - toknz->ts;
+ memmove( toknz->buf, toknz->ts, have);
+ toknz->te = toknz->buf + (toknz->te - toknz->ts);
+ toknz->ts = toknz->buf;
+ }
+ }
+
+ fclose(stream);
+
+ return EINA_TRUE;
+}
+
+Eo_Tokenizer*
+eo_tokenizer_get()
+{
+ Eo_Tokenizer *toknz = calloc(1, sizeof(Eo_Tokenizer));
+ if (!toknz) return NULL;
+
+ toknz->ts = NULL;
+ toknz->te = NULL;
+ /* toknz->top = 0; */
+ toknz->source = NULL;
+ toknz->max_nesting = 10;
+ toknz->current_line = 1;
+ toknz->current_nesting = 0;
+ toknz->token_start_line = 0;
+
+ return toknz;
+}
+
+void
+eo_tokenizer_free(Eo_Tokenizer *toknz)
+{
+ free(toknz);
+}
+