summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJérémy Zurcher <jeremy@asynk.ch>2013-11-23 03:05:05 +0100
committerJérémy Zurcher <jeremy@asynk.ch>2013-11-23 03:05:05 +0100
commit013625545c75dd63e62dc4470b5b90ac428e735d (patch)
tree60aad636287671f5a1a2ed122de26c1dd19a6b6c
downloadeo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.zip
eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.tar.gz
initial commit: support C/C++ comments
-rw-r--r--Makefile39
-rw-r--r--data/comments.c20
-rw-r--r--eo_tokenizer.h63
-rw-r--r--eo_tokenizer.rl165
-rw-r--r--main.c50
l---------tok.c1
6 files changed, 338 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..949524c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,39 @@
+
+CC = gcc
+CFLAGS = -ggdb3 `pkg-config --cflags --libs eina`
+BIN = eo_tokenizer
+SRC = main.c eo_tokenizer.c
+
+.PHONY: clean
+.SUFFIXES: .rl .xml .dot .png
+
+.rl.c:
+ ragel -s -o $@ $<
+
+.rl.xml:
+ ragel -x -o $@ $<
+
+.rl.dot:
+ ragel -V -p -S eo_tokenizer -o $@ $<
+
+.dot.png:
+ dot -Tpng >$@ $<
+
+.c.o:
+ $(CC) -c -Wall -I. $(CFLAGS) -D$(STD) -o $@ $<
+
+all: $(BIN)
+
+eo_tokenizer.c: eo_tokenizer.rl
+eo_tokenizer.xml: eo_tokenizer.rl
+eo_tokenizer.dot: eo_tokenizer.rl
+eo_tokenizer.png: eo_tokenizer.dot
+
+eo_tokenizer: main.c eo_tokenizer.c eo_tokenizer.h
+
+test-comments: eo_tokenizer
+ EINA_LOG_LEVELS=eo_toknz:4 ./eo_tokenizer ./data/comments.c
+
+clean:
+ rm -f *~ *.o *.dot *.png *.xml $(BIN).c $(BIN)
+
diff --git a/data/comments.c b/data/comments.c
new file mode 100644
index 0000000..5fd0216
--- /dev/null
+++ b/data/comments.c
@@ -0,0 +1,20 @@
+/* comment:1-1 */
+
+/*
+ comment:3-4 */
+/*
+ * comment:5-7
+ */
+
+// comment:9-9
+
+ /*
+comment:11-15
+ *
+ *
+ * *//*comment 15-16
+*/
+
+ Evas_object {
+ inherits { Eo_Base, Evas_Common_Interface }
+}
diff --git a/eo_tokenizer.h b/eo_tokenizer.h
new file mode 100644
index 0000000..536cdda
--- /dev/null
+++ b/eo_tokenizer.h
@@ -0,0 +1,63 @@
+#ifndef __EO_TOKENIZER_H__
+#define __EO_TOKENIZER_H__
+
+#include <Eina.h>
+
+extern int _eo_tokenizer_log_dom;
+#undef EINA_LOG_DOMAIN_DEFAULT
+#define EINA_LOG_DOMAIN_DEFAULT _eo_tokenizer_log_dom
+
+#ifdef CRITICAL
+#undef CRITICAL
+#endif
+#define CRITICAL(...) EINA_LOG_DOM_CRIT(_eo_tokenizer_log_dom, __VA_ARGS__)
+
+#ifdef ERR
+#undef ERR
+#endif
+#define ERR(...) EINA_LOG_DOM_ERR(_eo_tokenizer_log_dom, __VA_ARGS__)
+
+#ifdef WRN
+#undef WRN
+#endif
+#define WRN(...) EINA_LOG_DOM_WARN(_eo_tokenizer_log_dom, __VA_ARGS__)
+
+#ifdef INF
+#undef INF
+#endif
+#define INF(...) EINA_LOG_DOM_INFO(_eo_tokenizer_log_dom, __VA_ARGS__)
+
+#ifdef DBG
+#undef DBG
+#endif
+#define DBG(...) EINA_LOG_DOM_DBG(_eo_tokenizer_log_dom, __VA_ARGS__)
+
+#define BUFSIZE 256
+
+typedef struct _eo_tokenizer {
+ /* ragel vars */
+ int cs; /* current machine state */
+ int act; /* last pattern matched */
+ char *ts; /* current token match start */
+ char *te; /* current token match end */
+ char *p; /* data start */
+ char *pe; /* data end */
+ char *eof; /* eof = (EOF ? pe : NULL) */
+ /* int stack[10]; /1* state stack used by fret fcall ... *1/ */
+ /* int top; /1* stack pointer *1/ */
+
+ const char *source;
+ int current_line;
+ int current_nesting;
+ int max_nesting;
+ int token_start_line;
+ char buf[BUFSIZE];
+} Eo_Tokenizer;
+
+Eo_Tokenizer* eo_tokenizer_get();
+
+Eina_Bool eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source);
+
+void eo_tokenizer_free(Eo_Tokenizer *toknz);
+
+#endif /* __EO_TOKENIZER_H__ */
diff --git a/eo_tokenizer.rl b/eo_tokenizer.rl
new file mode 100644
index 0000000..b10218d
--- /dev/null
+++ b/eo_tokenizer.rl
@@ -0,0 +1,165 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "eo_tokenizer.h"
+
+%%{
+ machine common;
+
+ action inc_line {
+ toknz->current_line += 1;
+ }
+
+ action save_start_line {
+ toknz->token_start_line = toknz->current_line;
+ }
+
+ cr = '\n';
+ cr_neg = "[^\n]";
+ ws = "[ \t\r]";
+ newline = cr @inc_line;
+ ignore = (0x00..0x20 - cr)* newline?;
+
+ c_comment = "/*" ( any | '\n' @inc_line )* :>> "*/";
+ cpp_comment = "//" (any - cr)* newline;
+ comment = ( c_comment | cpp_comment ) > save_start_line;
+
+}%%
+
+%%{
+ machine eo_tokenizer;
+ include common;
+
+ access toknz->;
+ variable p toknz->p;
+ variable pe toknz->pe;
+ variable eof toknz->eof;
+
+ action show_comment {
+ DBG("comment %03d:%03d", toknz->token_start_line, toknz->current_line);
+ }
+
+ action show_ignore {
+ DBG("ignore %d (%d)", toknz->current_line, (toknz->te - toknz->ts));
+ }
+
+ action error {
+ /* TODO find a more elegant way,
+ * -> cant't use $err() or $!error on scanner
+ * -> fgoto another machine to eat the line, ts is set to NULL
+ */
+ char buf[BUFSIZE];
+ char *s, *d;
+ for (s = fpc, d = buf; (s <= toknz->pe); s++)
+ {
+ if ( (int)*s == 13 || (int)*s == 10)
+ break;
+ *d++ = *s;
+ }
+ *d = '\0';
+ ERR("error line %d : %s...", toknz->current_line, buf);
+ toknz->cs = eo_tokenizer_error;
+ fbreak;
+ }
+
+ main := |*
+ ignore => show_ignore;
+ comment => show_comment;
+ any => error;
+ *|;
+
+}%%
+
+%% write data;
+
+Eina_Bool
+eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source)
+{
+ INF("tokenize %s...", source);
+ toknz->source = source;
+
+ FILE *stream;
+
+ int done = 0;
+ int have = 0;
+
+ stream = fopen(toknz->source, "r");
+ if (!stream)
+ {
+ ERR("unable to read in %s", toknz->source);
+ return EINA_FALSE;
+ }
+
+ %% write init;
+
+ while (!done)
+ {
+ int len;
+ int space;
+
+ toknz->p = toknz->buf + have;
+ space = BUFSIZE - have;
+
+ if (space == 0)
+ {
+ ERR("out of buffer space");
+ fclose(stream);
+ exit(EXIT_FAILURE);
+ }
+
+ len = fread(toknz->p, 1, space, stream);
+ if (len == 0) break;
+ toknz->pe = toknz->p + len;
+
+ if (len < space)
+ {
+ toknz->eof = toknz->pe;
+ done = 1;
+ }
+
+ %% write exec;
+
+ if ( toknz->cs == %%{ write error; }%% )
+ break;
+
+ if ( toknz->ts == 0 )
+ have = 0;
+ else
+ {
+ /* There is a prefix to preserve, shift it over. */
+ have = toknz->pe - toknz->ts;
+ memmove( toknz->buf, toknz->ts, have);
+ toknz->te = toknz->buf + (toknz->te - toknz->ts);
+ toknz->ts = toknz->buf;
+ }
+ }
+
+ fclose(stream);
+
+ return EINA_TRUE;
+}
+
+Eo_Tokenizer*
+eo_tokenizer_get()
+{
+ Eo_Tokenizer *toknz = calloc(1, sizeof(Eo_Tokenizer));
+ if (!toknz) return NULL;
+
+ toknz->ts = NULL;
+ toknz->te = NULL;
+ /* toknz->top = 0; */
+ toknz->source = NULL;
+ toknz->max_nesting = 10;
+ toknz->current_line = 1;
+ toknz->current_nesting = 0;
+ toknz->token_start_line = 0;
+
+ return toknz;
+}
+
+void
+eo_tokenizer_free(Eo_Tokenizer *toknz)
+{
+ free(toknz);
+}
+
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..2b420f2
--- /dev/null
+++ b/main.c
@@ -0,0 +1,50 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "eo_tokenizer.h"
+
+int _eo_tokenizer_log_dom = -1;
+
+int main(int argc, char **argv)
+{
+ char *fpath;
+ Eo_Tokenizer *toknz;
+
+ if (argc < 2)
+ {
+ fprintf(stderr, "usage %s input_file\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ fpath = argv[1];
+
+ eina_init();
+
+ eina_log_color_disable_set(EINA_FALSE);
+ _eo_tokenizer_log_dom = eina_log_domain_register("eo_toknz", EINA_COLOR_CYAN);
+
+ if (access(fpath, F_OK) != 0)
+ {
+ ERR("error accessing file %s : %s", fpath, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ toknz = eo_tokenizer_get(fpath);
+ if (!toknz)
+ {
+ ERR("can't create eo_tokenizer");
+ eina_shutdown();
+ exit(EXIT_FAILURE);
+ }
+
+ eo_tokenizer_walk(toknz, fpath);
+
+ eo_tokenizer_free(toknz);
+
+ eina_shutdown();
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/tok.c b/tok.c
new file mode 120000
index 0000000..a66afa9
--- /dev/null
+++ b/tok.c
@@ -0,0 +1 @@
+eo_tokenizer.rl \ No newline at end of file