diff options
author | Jérémy Zurcher <jeremy@asynk.ch> | 2013-11-23 03:05:05 +0100 |
---|---|---|
committer | Jérémy Zurcher <jeremy@asynk.ch> | 2013-11-23 03:05:05 +0100 |
commit | 013625545c75dd63e62dc4470b5b90ac428e735d (patch) | |
tree | 60aad636287671f5a1a2ed122de26c1dd19a6b6c | |
download | eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.zip eo_tokenizer-013625545c75dd63e62dc4470b5b90ac428e735d.tar.gz |
initial commit: support C/C++ comments
-rw-r--r-- | Makefile | 39 | ||||
-rw-r--r-- | data/comments.c | 20 | ||||
-rw-r--r-- | eo_tokenizer.h | 63 | ||||
-rw-r--r-- | eo_tokenizer.rl | 165 | ||||
-rw-r--r-- | main.c | 50 | ||||
l--------- | tok.c | 1 |
6 files changed, 338 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..949524c --- /dev/null +++ b/Makefile @@ -0,0 +1,39 @@ + +CC = gcc +CFLAGS = -ggdb3 `pkg-config --cflags --libs eina` +BIN = eo_tokenizer +SRC = main.c eo_tokenizer.c + +.PHONY: clean +.SUFFIXES: .rl .xml .dot .png + +.rl.c: + ragel -s -o $@ $< + +.rl.xml: + ragel -x -o $@ $< + +.rl.dot: + ragel -V -p -S eo_tokenizer -o $@ $< + +.dot.png: + dot -Tpng >$@ $< + +.c.o: + $(CC) -c -Wall -I. $(CFLAGS) -D$(STD) -o $@ $< + +all: $(BIN) + +eo_tokenizer.c: eo_tokenizer.rl +eo_tokenizer.xml: eo_tokenizer.rl +eo_tokenizer.dot: eo_tokenizer.rl +eo_tokenizer.png: eo_tokenizer.dot + +eo_tokenizer: main.c eo_tokenizer.c eo_tokenizer.h + +test-comments: eo_tokenizer + EINA_LOG_LEVELS=eo_toknz:4 ./eo_tokenizer ./data/comments.c + +clean: + rm -f *~ *.o *.dot *.png *.xml $(BIN).c $(BIN) + diff --git a/data/comments.c b/data/comments.c new file mode 100644 index 0000000..5fd0216 --- /dev/null +++ b/data/comments.c @@ -0,0 +1,20 @@ +/* comment:1-1 */
+
+/*
+ comment:3-4 */
+/*
+ * comment:5-7
+ */
+
+// comment:9-9
+
+ /*
+comment:11-15
+ *
+ *
+ * *//*comment 15-16
+*/
+
+ Evas_object {
+ inherits { Eo_Base, Evas_Common_Interface }
+}
diff --git a/eo_tokenizer.h b/eo_tokenizer.h new file mode 100644 index 0000000..536cdda --- /dev/null +++ b/eo_tokenizer.h @@ -0,0 +1,63 @@ +#ifndef __EO_TOKENIZER_H__ +#define __EO_TOKENIZER_H__ + +#include <Eina.h> + +extern int _eo_tokenizer_log_dom; +#undef EINA_LOG_DOMAIN_DEFAULT +#define EINA_LOG_DOMAIN_DEFAULT _eo_tokenizer_log_dom + +#ifdef CRITICAL +#undef CRITICAL +#endif +#define CRITICAL(...) EINA_LOG_DOM_CRIT(_eo_tokenizer_log_dom, __VA_ARGS__) + +#ifdef ERR +#undef ERR +#endif +#define ERR(...) EINA_LOG_DOM_ERR(_eo_tokenizer_log_dom, __VA_ARGS__) + +#ifdef WRN +#undef WRN +#endif +#define WRN(...) EINA_LOG_DOM_WARN(_eo_tokenizer_log_dom, __VA_ARGS__) + +#ifdef INF +#undef INF +#endif +#define INF(...) EINA_LOG_DOM_INFO(_eo_tokenizer_log_dom, __VA_ARGS__) + +#ifdef DBG +#undef DBG +#endif +#define DBG(...) EINA_LOG_DOM_DBG(_eo_tokenizer_log_dom, __VA_ARGS__) + +#define BUFSIZE 256 + +typedef struct _eo_tokenizer { + /* ragel vars */ + int cs; /* current machine state */ + int act; /* last pattern matched */ + char *ts; /* current token match start */ + char *te; /* current token match end */ + char *p; /* data start */ + char *pe; /* data end */ + char *eof; /* eof = (EOF ? pe : NULL) */ + /* int stack[10]; /1* state stack used by fret fcall ... *1/ */ + /* int top; /1* stack pointer *1/ */ + + const char *source; + int current_line; + int current_nesting; + int max_nesting; + int token_start_line; + char buf[BUFSIZE]; +} Eo_Tokenizer; + +Eo_Tokenizer* eo_tokenizer_get(); + +Eina_Bool eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source); + +void eo_tokenizer_free(Eo_Tokenizer *toknz); + +#endif /* __EO_TOKENIZER_H__ */ diff --git a/eo_tokenizer.rl b/eo_tokenizer.rl new file mode 100644 index 0000000..b10218d --- /dev/null +++ b/eo_tokenizer.rl @@ -0,0 +1,165 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "eo_tokenizer.h" + +%%{ + machine common; + + action inc_line { + toknz->current_line += 1; + } + + action save_start_line { + toknz->token_start_line = toknz->current_line; + } + + cr = '\n'; + cr_neg = "[^\n]"; + ws = "[ \t\r]"; + newline = cr @inc_line; + ignore = (0x00..0x20 - cr)* newline?; + + c_comment = "/*" ( any | '\n' @inc_line )* :>> "*/"; + cpp_comment = "//" (any - cr)* newline; + comment = ( c_comment | cpp_comment ) > save_start_line; + +}%% + +%%{ + machine eo_tokenizer; + include common; + + access toknz->; + variable p toknz->p; + variable pe toknz->pe; + variable eof toknz->eof; + + action show_comment { + DBG("comment %03d:%03d", toknz->token_start_line, toknz->current_line); + } + + action show_ignore { + DBG("ignore %d (%d)", toknz->current_line, (toknz->te - toknz->ts)); + } + + action error { + /* TODO find a more elegant way, + * -> cant't use $err() or $!error on scanner + * -> fgoto another machine to eat the line, ts is set to NULL + */ + char buf[BUFSIZE]; + char *s, *d; + for (s = fpc, d = buf; (s <= toknz->pe); s++) + { + if ( (int)*s == 13 || (int)*s == 10) + break; + *d++ = *s; + } + *d = '\0'; + ERR("error line %d : %s...", toknz->current_line, buf); + toknz->cs = eo_tokenizer_error; + fbreak; + } + + main := |* + ignore => show_ignore; + comment => show_comment; + any => error; + *|; + +}%% + +%% write data; + +Eina_Bool +eo_tokenizer_walk(Eo_Tokenizer *toknz, const char *source) +{ + INF("tokenize %s...", source); + toknz->source = source; + + FILE *stream; + + int done = 0; + int have = 0; + + stream = fopen(toknz->source, "r"); + if (!stream) + { + ERR("unable to read in %s", toknz->source); + return EINA_FALSE; + } + + %% write init; + + while (!done) + { + int len; + int space; + + toknz->p = toknz->buf + have; + space = BUFSIZE - have; + + if (space == 0) + { + ERR("out of buffer space"); + fclose(stream); + exit(EXIT_FAILURE); + } + + len = fread(toknz->p, 1, space, stream); + if (len == 0) break; + toknz->pe = toknz->p + len; + + if (len < space) + { + toknz->eof = toknz->pe; + done = 1; + } + + %% write exec; + + if ( toknz->cs == %%{ write error; }%% ) + break; + + if ( toknz->ts == 0 ) + have = 0; + else + { + /* There is a prefix to preserve, shift it over. */ + have = toknz->pe - toknz->ts; + memmove( toknz->buf, toknz->ts, have); + toknz->te = toknz->buf + (toknz->te - toknz->ts); + toknz->ts = toknz->buf; + } + } + + fclose(stream); + + return EINA_TRUE; +} + +Eo_Tokenizer* +eo_tokenizer_get() +{ + Eo_Tokenizer *toknz = calloc(1, sizeof(Eo_Tokenizer)); + if (!toknz) return NULL; + + toknz->ts = NULL; + toknz->te = NULL; + /* toknz->top = 0; */ + toknz->source = NULL; + toknz->max_nesting = 10; + toknz->current_line = 1; + toknz->current_nesting = 0; + toknz->token_start_line = 0; + + return toknz; +} + +void +eo_tokenizer_free(Eo_Tokenizer *toknz) +{ + free(toknz); +} + @@ -0,0 +1,50 @@ +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "eo_tokenizer.h" + +int _eo_tokenizer_log_dom = -1; + +int main(int argc, char **argv) +{ + char *fpath; + Eo_Tokenizer *toknz; + + if (argc < 2) + { + fprintf(stderr, "usage %s input_file\n", argv[0]); + exit(EXIT_FAILURE); + } + + fpath = argv[1]; + + eina_init(); + + eina_log_color_disable_set(EINA_FALSE); + _eo_tokenizer_log_dom = eina_log_domain_register("eo_toknz", EINA_COLOR_CYAN); + + if (access(fpath, F_OK) != 0) + { + ERR("error accessing file %s : %s", fpath, strerror(errno)); + exit(EXIT_FAILURE); + } + + toknz = eo_tokenizer_get(fpath); + if (!toknz) + { + ERR("can't create eo_tokenizer"); + eina_shutdown(); + exit(EXIT_FAILURE); + } + + eo_tokenizer_walk(toknz, fpath); + + eo_tokenizer_free(toknz); + + eina_shutdown(); + + return EXIT_SUCCESS; +} + @@ -0,0 +1 @@ +eo_tokenizer.rl
\ No newline at end of file |