X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fgedcom_lex_common.c;h=902225f2f64b678827152cf8dc210ddf83d12059;hb=a9e4b4df0cb08b63e54593533002dd9eb18487b3;hp=045cea22d3ba46e2bb3c64500efafe5c784263d1;hpb=e6cd9cd58e0e23f70e75b140d16354af665d7b43;p=gedcom-parse.git diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c index 045cea2..902225f 100644 --- a/gedcom/gedcom_lex_common.c +++ b/gedcom/gedcom_lex_common.c @@ -21,7 +21,7 @@ /* $Id$ */ /* $Name$ */ -#ifndef IN_LEX +#if LEX_SECTION == 1 #include "gedcom_internal.h" #include "multilex.h" @@ -29,15 +29,20 @@ #include "gedcom.h" #include "gedcom.tabgen.h" #include "compat.h" +#include "utf8.h" static size_t encoding_width; static int current_level = -1; static int level_diff=MAXGEDCLEVEL; static size_t line_len = 0; -static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1]; -static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1]; -static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1]; +static struct conv_buffer* ptr_buffer = NULL; +static struct conv_buffer* tag_buffer = NULL; +static struct conv_buffer* str_buffer = NULL; + +#define INITIAL_PTR_BUFFER_LEN MAXGEDCPTRLEN * UTF_FACTOR + 1 +#define INITIAL_TAG_BUFFER_LEN MAXGEDCTAGLEN * UTF_FACTOR + 1 +#define INITIAL_STR_BUFFER_LEN MAXGEDCLINELEN * UTF_FACTOR + 1 #ifdef LEXER_TEST YYSTYPE gedcom_lval; @@ -136,17 +141,14 @@ static void error_unexpected_character(const char* str, char ch) gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch); } -static void yylex_cleanup() -{ - /* fix memory leak in lex */ - yy_delete_buffer(yy_current_buffer); - yy_current_buffer = NULL; -} +/* This is to bypass the iconv conversion (if the input is UTF-8 coming + from the program) */ +static int dummy_conv = 0; -#else /* of #ifndef IN_LEX */ +#elif LEX_SECTION == 2 #define TO_INTERNAL(STR,OUTBUF) \ - to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF)) + (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF)) #define INIT_LINE_LEN \ line_len = 0; @@ -164,9 +166,10 @@ static void yylex_cleanup() #define MKTAGACTION(THETAG) \ { CHECK_LINE_LEN; \ - gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \ gedcom_lval.tag.value = TAG_##THETAG; \ BEGIN(NORMAL); \ + line_no++; \ return TAG_##THETAG; \ } @@ -234,10 +237,11 @@ static void yylex_cleanup() #define ACTION_DIGITS \ - { int level = atoi(TO_INTERNAL(yytext, str_buf)); \ + { int level = atoi(TO_INTERNAL(yytext, str_buffer)); \ CHECK_LINE_LEN; \ if ((level < 0) || (level > MAXGEDCLEVEL)) { \ error_level_out_of_range(); \ + line_no++; \ return BADTOKEN; \ } \ level_diff = level - current_level; \ @@ -255,6 +259,7 @@ static void yylex_cleanup() else { \ /* should never happen (error to GEDCOM spec) */ \ error_level_too_high(level_diff); \ + line_no++; \ return BADTOKEN; \ } \ } @@ -263,19 +268,21 @@ static void yylex_cleanup() #define ACTION_ALPHANUM \ { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \ error_tag_too_long(yytext); \ + line_no++; \ return BADTOKEN; \ } \ CHECK_LINE_LEN; \ - gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \ gedcom_lval.tag.value = USERTAG; \ BEGIN(NORMAL); \ + line_no++; \ return USERTAG; \ } #define ACTION_DELIM \ { CHECK_LINE_LEN; \ - gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ + gedcom_lval.string = TO_INTERNAL(yytext, str_buffer); \ return DELIM; \ } @@ -283,7 +290,7 @@ static void yylex_cleanup() #define ACTION_ANY \ { char* tmp; \ CHECK_LINE_LEN; \ - tmp = TO_INTERNAL(yytext, str_buf); \ + tmp = TO_INTERNAL(yytext, str_buffer); \ if (!tmp) { \ /* Something went wrong during conversion... */ \ error_invalid_character(yytext, yytext[0]); \ @@ -305,7 +312,7 @@ static void yylex_cleanup() #define ACTION_ESCAPE \ { CHECK_LINE_LEN; \ - gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ + gedcom_lval.string = TO_INTERNAL(yytext, str_buffer); \ return ESCAPE; \ } @@ -316,7 +323,7 @@ static void yylex_cleanup() error_pointer_too_long(yytext); \ return BADTOKEN; \ } \ - gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \ + gedcom_lval.string = TO_INTERNAL(yytext, ptr_buffer); \ return POINTER; \ } @@ -330,7 +337,6 @@ static void yylex_cleanup() #define ACTION_TERMINATOR \ { CHECK_LINE_LEN; \ INIT_LINE_LEN; \ - line_no++; \ BEGIN(INITIAL); \ } @@ -347,10 +353,7 @@ static void yylex_cleanup() } \ else { \ char* ptr; int size; \ - /* Reset our state */ \ - current_level = -1; \ - level_diff = MAXGEDCLEVEL; \ - /* ... then terminate lex */ \ + /* ... terminate lex */ \ yyterminate(); \ /* Get rid of f*cking compiler warning from lex generated code */ \ /* yyterminate does return(), so program will never come here */ \ @@ -383,4 +386,50 @@ static void yylex_cleanup() return BADTOKEN; \ } -#endif /* IN_LEX */ +#elif LEX_SECTION == 3 + +int yywrap() +{ + return 1; +} + +static void free_conv_buffers() +{ + free_conv_buffer(ptr_buffer); + free_conv_buffer(tag_buffer); + free_conv_buffer(str_buffer); +} + +static void yylex_cleanup() +{ + /* fix memory leak in lex */ + yy_delete_buffer(yy_current_buffer); + yy_current_buffer = NULL; + free_conv_buffers(); +} + +static void init_conv_buffers() +{ + if (!ptr_buffer) { + ptr_buffer = create_conv_buffer(INITIAL_PTR_BUFFER_LEN); + tag_buffer = create_conv_buffer(INITIAL_TAG_BUFFER_LEN); + str_buffer = create_conv_buffer(INITIAL_STR_BUFFER_LEN); + } +} + +static int exitfuncregistered = 0; + +void yymyinit(FILE *f) +{ + if (! exitfuncregistered && atexit(yylex_cleanup) == 0) + exitfuncregistered = 1; + init_conv_buffers(); + yyin = f; + yyrestart(f); + /* Reset our state */ + current_level = -1; + level_diff = MAXGEDCLEVEL; + BEGIN(INITIAL); +} + +#endif