diff options
Diffstat (limited to 'scanner.l')
-rw-r--r-- | scanner.l | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/scanner.l b/scanner.l new file mode 100644 index 0000000..1f9c73d --- /dev/null +++ b/scanner.l @@ -0,0 +1,222 @@ +/* -*- mode: C -*- */ +/* -------------------------------------------------------------------------- + libconfig - A library for processing structured configuration files + Copyright (C) 2005-2009 Mark A Lindner + + This file is part of libconfig. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License + as published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, see + <http://www.gnu.org/licenses/>. + ---------------------------------------------------------------------------- +*/ + +%{ +#define YY_EXTRA_TYPE void* +%} + +%option nounistd +%option reentrant +%option noyywrap +%option yylineno +%option nounput +%option bison-bridge +%option header-file="scanner.h" +%option outfile="lex.yy.c" + +%{ + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include "grammar.h" +#include "wincompat.h" + +/* this is somewhat kludgy, but I wanted to avoid building strings + dynamically during scanning */ + +static char *make_string(char *s) +{ + char *r = ++s; + char *p, *q = r; + size_t len = strlen(r); + int esc = 0; + + *(r + --len) = 0; + + for(p = r; *p; p++) + { + if(*p == '\\') + { + if(! esc) + { + esc = 1; + continue; + } + } + + if(esc) + { + if(*p == 'n') + *(q++) = '\n'; + else if(*p == 'r') + *(q++) = '\r'; + else if(*p == 'f') + *(q++) = '\f'; + else if(*p == 't') + *(q++) = '\t'; + else + *(q++) = *p; + + esc = 0; + } + + else if(*p == '\"') /* if we reached the end of a string segment, ... */ + { + /* This construction allows for C-style string concatenation. + We don't bother to check for end-of-string here, as we depend + on the {string} definition to ensure a new opening quote exists. + We do, however, check for and discard all forms of comments + [that is, (#...$|//...$|[/][*]...[*][/])] between string segments. */ + + while (*++p != '\"') /* ... look for the start of the next segment */ + { + if(*p == '#') /* check for #...$ comment */ + { + while(*++p != '\n') + { + /* skip the rest of the line */ + } + } + else if (*p == '/') + { + if(*++p == '/') /* check for //...$ comment */ + { + while (*++p != '\n') + { + /* skip the rest of the line */ + } + } + else /* must be '*', lead-in to an old C-style comment */ + { + while (*++p != '*' || *(p+1) != '/') + { + /* skip all comment content */ + } + ++p; /* step to the trailing slash, to skip it as well */ + } + } + } + } + else + *(q++) = *p; + } + + *q = 0; + + return(r); +} + +static unsigned long long fromhex(const char *s) +{ +#ifdef __MINGW32__ + + // MinGW's strtoull() seems to be broken; it only returns the lower + // 32 bits... + + const char *p = s; + unsigned long long val = 0; + + if(*p != '0') + return(0); + + ++p; + + if(*p != 'x' && *p != 'X') + return(0); + + for(++p; isxdigit(*p); ++p) + { + val <<= 4; + val |= ((*p < 'A') ? (*p & 0xF) : (9 + (*p & 0x7))); + } + + return(val); + +#else // ! __MINGW32__ + + return(strtoull(s, NULL, 16)); + +#endif // __MINGW32__ +} + +%} + +ws [ \t\f\r\n]+ +equals \=|\: +comma , +group_start \{ +group_end \} +true [Tt][Rr][Uu][Ee] +false [Ff][Aa][Ll][Ss][Ee] +name [A-Za-z\*][-A-Za-z0-9_\*]* +quote \" +integer [-+]?[0-9]+ +integer64 [-+]?[0-9]+L(L)? +hex 0[Xx][0-9A-Fa-f]+ +hex64 0[Xx][0-9A-Fa-f]+L(L)? +float ([-+]?([0-9]*)?\.[0-9]*([eE][-+]?[0-9]+)?)|([-+]?([0-9]+)(\.[0-9]*)?[eE][-+]?[0-9]+) +segment {quote}([^\"\\]|\\.)*{quote} +string {segment}(([ \t\f\r\n]*((#|\/\/).*\n|\/\*(.|\n)*\*\/)*)*{segment})* +end ; +array_start \[ +array_end \] +list_start \( +list_end \) +comment (#|\/\/).*$ + +%x COMMENT + +%% + +\/\* { BEGIN COMMENT; } +<COMMENT>\*\/ { BEGIN INITIAL; } +<COMMENT>. { /* ignore */ } +<COMMENT>\n { } + +{ws} { /* skip */ } + +{equals} { return(TOK_EQUALS); } +{comma} { return(TOK_COMMA); } +{group_start} { return(TOK_GROUP_START); } +{group_end} { return(TOK_GROUP_END); } +{true} { yylval->ival = 1; return(TOK_BOOLEAN); } +{false} { yylval->ival = 0; return(TOK_BOOLEAN); } +{name} { yylval->sval = strdup(yytext); return(TOK_NAME); } +{float} { yylval->fval = atof(yytext); return(TOK_FLOAT); } +{integer} { yylval->ival = atoi(yytext); return(TOK_INTEGER); } +{integer64} { yylval->llval = atoll(yytext); return(TOK_INTEGER64); } +{hex} { yylval->ival = strtoul(yytext, NULL, 16); return(TOK_HEX); } +{hex64} { yylval->llval = fromhex(yytext); return(TOK_HEX64); } +{string} { yylval->sval = strdup(make_string(yytext)); return(TOK_STRING); } +{array_start} { return(TOK_ARRAY_START); } +{array_end} { return(TOK_ARRAY_END); } +{list_start} { return(TOK_LIST_START); } +{list_end} { return(TOK_LIST_END); } +{end} { return(TOK_END); } +{comment} { /* ignore */ } +. { return(TOK_GARBAGE); } |