aboutsummaryrefslogtreecommitdiffstats
path: root/scanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'scanner.l')
-rw-r--r--scanner.l222
1 files changed, 222 insertions, 0 deletions
diff --git a/scanner.l b/scanner.l
new file mode 100644
index 0000000..1f9c73d
--- /dev/null
+++ b/scanner.l
@@ -0,0 +1,222 @@
+/* -*- mode: C -*- */
+/* --------------------------------------------------------------------------
+ libconfig - A library for processing structured configuration files
+ Copyright (C) 2005-2009 Mark A Lindner
+
+ This file is part of libconfig.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License
+ as published by the Free Software Foundation; either version 2.1 of
+ the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, see
+ <http://www.gnu.org/licenses/>.
+ ----------------------------------------------------------------------------
+*/
+
+%{
+#define YY_EXTRA_TYPE void*
+%}
+
+%option nounistd
+%option reentrant
+%option noyywrap
+%option yylineno
+%option nounput
+%option bison-bridge
+%option header-file="scanner.h"
+%option outfile="lex.yy.c"
+
+%{
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4996)
+#endif
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "grammar.h"
+#include "wincompat.h"
+
+/* this is somewhat kludgy, but I wanted to avoid building strings
+ dynamically during scanning */
+
+static char *make_string(char *s)
+{
+ char *r = ++s;
+ char *p, *q = r;
+ size_t len = strlen(r);
+ int esc = 0;
+
+ *(r + --len) = 0;
+
+ for(p = r; *p; p++)
+ {
+ if(*p == '\\')
+ {
+ if(! esc)
+ {
+ esc = 1;
+ continue;
+ }
+ }
+
+ if(esc)
+ {
+ if(*p == 'n')
+ *(q++) = '\n';
+ else if(*p == 'r')
+ *(q++) = '\r';
+ else if(*p == 'f')
+ *(q++) = '\f';
+ else if(*p == 't')
+ *(q++) = '\t';
+ else
+ *(q++) = *p;
+
+ esc = 0;
+ }
+
+ else if(*p == '\"') /* if we reached the end of a string segment, ... */
+ {
+ /* This construction allows for C-style string concatenation.
+ We don't bother to check for end-of-string here, as we depend
+ on the {string} definition to ensure a new opening quote exists.
+ We do, however, check for and discard all forms of comments
+ [that is, (#...$|//...$|[/][*]...[*][/])] between string segments. */
+
+ while (*++p != '\"') /* ... look for the start of the next segment */
+ {
+ if(*p == '#') /* check for #...$ comment */
+ {
+ while(*++p != '\n')
+ {
+ /* skip the rest of the line */
+ }
+ }
+ else if (*p == '/')
+ {
+ if(*++p == '/') /* check for //...$ comment */
+ {
+ while (*++p != '\n')
+ {
+ /* skip the rest of the line */
+ }
+ }
+ else /* must be '*', lead-in to an old C-style comment */
+ {
+ while (*++p != '*' || *(p+1) != '/')
+ {
+ /* skip all comment content */
+ }
+ ++p; /* step to the trailing slash, to skip it as well */
+ }
+ }
+ }
+ }
+ else
+ *(q++) = *p;
+ }
+
+ *q = 0;
+
+ return(r);
+}
+
+static unsigned long long fromhex(const char *s)
+{
+#ifdef __MINGW32__
+
+ // MinGW's strtoull() seems to be broken; it only returns the lower
+ // 32 bits...
+
+ const char *p = s;
+ unsigned long long val = 0;
+
+ if(*p != '0')
+ return(0);
+
+ ++p;
+
+ if(*p != 'x' && *p != 'X')
+ return(0);
+
+ for(++p; isxdigit(*p); ++p)
+ {
+ val <<= 4;
+ val |= ((*p < 'A') ? (*p & 0xF) : (9 + (*p & 0x7)));
+ }
+
+ return(val);
+
+#else // ! __MINGW32__
+
+ return(strtoull(s, NULL, 16));
+
+#endif // __MINGW32__
+}
+
+%}
+
+ws [ \t\f\r\n]+
+equals \=|\:
+comma ,
+group_start \{
+group_end \}
+true [Tt][Rr][Uu][Ee]
+false [Ff][Aa][Ll][Ss][Ee]
+name [A-Za-z\*][-A-Za-z0-9_\*]*
+quote \"
+integer [-+]?[0-9]+
+integer64 [-+]?[0-9]+L(L)?
+hex 0[Xx][0-9A-Fa-f]+
+hex64 0[Xx][0-9A-Fa-f]+L(L)?
+float ([-+]?([0-9]*)?\.[0-9]*([eE][-+]?[0-9]+)?)|([-+]?([0-9]+)(\.[0-9]*)?[eE][-+]?[0-9]+)
+segment {quote}([^\"\\]|\\.)*{quote}
+string {segment}(([ \t\f\r\n]*((#|\/\/).*\n|\/\*(.|\n)*\*\/)*)*{segment})*
+end ;
+array_start \[
+array_end \]
+list_start \(
+list_end \)
+comment (#|\/\/).*$
+
+%x COMMENT
+
+%%
+
+\/\* { BEGIN COMMENT; }
+<COMMENT>\*\/ { BEGIN INITIAL; }
+<COMMENT>. { /* ignore */ }
+<COMMENT>\n { }
+
+{ws} { /* skip */ }
+
+{equals} { return(TOK_EQUALS); }
+{comma} { return(TOK_COMMA); }
+{group_start} { return(TOK_GROUP_START); }
+{group_end} { return(TOK_GROUP_END); }
+{true} { yylval->ival = 1; return(TOK_BOOLEAN); }
+{false} { yylval->ival = 0; return(TOK_BOOLEAN); }
+{name} { yylval->sval = strdup(yytext); return(TOK_NAME); }
+{float} { yylval->fval = atof(yytext); return(TOK_FLOAT); }
+{integer} { yylval->ival = atoi(yytext); return(TOK_INTEGER); }
+{integer64} { yylval->llval = atoll(yytext); return(TOK_INTEGER64); }
+{hex} { yylval->ival = strtoul(yytext, NULL, 16); return(TOK_HEX); }
+{hex64} { yylval->llval = fromhex(yytext); return(TOK_HEX64); }
+{string} { yylval->sval = strdup(make_string(yytext)); return(TOK_STRING); }
+{array_start} { return(TOK_ARRAY_START); }
+{array_end} { return(TOK_ARRAY_END); }
+{list_start} { return(TOK_LIST_START); }
+{list_end} { return(TOK_LIST_END); }
+{end} { return(TOK_END); }
+{comment} { /* ignore */ }
+. { return(TOK_GARBAGE); }