X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fgedcom_lex_common.c;h=d8f35d768481e219e273c54e8278ec4d13ee6a0a;hb=6cb2fba281c993d06b725b0ecb7cbe404f81aff7;hp=f9fa955d5216a3240377ba3b39625d7038963bce;hpb=40ff55f97de314e09359361d83f7eacd47fa3bce;p=gedcom-parse.git diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c index f9fa955..d8f35d7 100644 --- a/gedcom/gedcom_lex_common.c +++ b/gedcom/gedcom_lex_common.c @@ -1,5 +1,5 @@ /* Common lexer code. - Copyright (C) 2001 The Genes Development Team + Copyright (C) 2001, 2002 The Genes Development Team This file is part of the Gedcom parser library. Contributed by Peter Verthez , 2001. @@ -21,15 +21,14 @@ /* $Id$ */ /* $Name$ */ -#ifndef IN_LEX +#if LEX_SECTION == 1 #include "gedcom_internal.h" #include "multilex.h" #include "encoding.h" #include "gedcom.h" -#include "gedcom.tab.h" - -#define YY_NO_UNPUT +#include "gedcom.tabgen.h" +#include "compat.h" static size_t encoding_width; static int current_level = -1; @@ -43,14 +42,21 @@ static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1]; #ifdef LEXER_TEST YYSTYPE gedcom_lval; int line_no = 1; +int compat_at = 0; int gedcom_lex(); -int test_loop(ENCODING enc, char* code) +void message_handler(Gedcom_msg_type type, char *msg) +{ + fprintf(stderr, "(%d) %s\n", type, msg); +} + +int test_loop(ENCODING enc, const char* code) { int tok, res; init_encodings(); set_encoding_width(enc); + gedcom_set_message_handler(message_handler); res = open_conv_to_internal(code); if (!res) { gedcom_error("Unable to open conversion context: %s", @@ -67,8 +73,8 @@ int test_loop(ENCODING enc, char* code) case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; - case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; - default: printf("TAG(%s) ", gedcom_lval.string); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break; + default: printf("TAG(%s) ", gedcom_lval.tag.string); break; } tok = gedcom_lex(); } @@ -79,10 +85,65 @@ int test_loop(ENCODING enc, char* code) #endif /* of #ifdef LEXER_TEST */ -#else /* of #ifndef IN_LEX */ +/* These are defined as functions here, because xgettext has trouble + extracting the strings out of long pre-processor defined */ + +static void error_line_too_long() +{ + gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN); +} + +static void error_level_leading_zero() +{ + gedcom_error (_("Level number with leading zero not allowed")); +} + +static void error_level_out_of_range() +{ + gedcom_error (_("Level number out of range [0..%d]"), MAXGEDCLEVEL); +} + +static void error_level_too_high(int level_diff) +{ + gedcom_error (_("GEDCOM level number is %d higher than previous"), + level_diff); +} + +static void error_tag_too_long(const char *tag) +{ + gedcom_error(_("Tag '%s' too long, max %d characters allowed"), + tag, MAXGEDCTAGLEN); +} + +static void error_invalid_character(const char *str, char ch) +{ + gedcom_error(_("Invalid character for encoding: '%s' (0x%02x)"), str, ch); +} + +static void error_pointer_too_long(const char *ptr) +{ + gedcom_error(_("Pointer '%s' too long, max %d characters allowed"), + ptr, MAXGEDCPTRLEN); +} + +static void error_at_character() +{ + gedcom_error(_("'@' character should be written as '@@' in values")); +} + +static void error_unexpected_character(const char* str, char ch) +{ + gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch); +} + +/* This is to bypass the iconv conversion (if the input is UTF-8 coming + from the program) */ +static int dummy_conv = 0; + +#elif LEX_SECTION == 2 #define TO_INTERNAL(STR,OUTBUF) \ - to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF)) + (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))) #define INIT_LINE_LEN \ line_len = 0; @@ -91,8 +152,7 @@ int test_loop(ENCODING enc, char* code) { if (line_len != (size_t)-1) { \ line_len += strlen(yytext); \ if (line_len > MAXGEDCLINELEN * encoding_width) { \ - gedcom_error("Line too long, max %d characters", \ - MAXGEDCLINELEN); \ + error_line_too_long(); \ line_len = (size_t)-1; \ return BADTOKEN; \ } \ @@ -101,8 +161,10 @@ int test_loop(ENCODING enc, char* code) #define MKTAGACTION(THETAG) \ { CHECK_LINE_LEN; \ - gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.value = TAG_##THETAG; \ BEGIN(NORMAL); \ + line_no++; \ return TAG_##THETAG; \ } @@ -164,7 +226,7 @@ int test_loop(ENCODING enc, char* code) #define ACTION_0_DIGITS \ - { gedcom_error ("Level number with leading zero"); \ + { error_level_leading_zero(); \ return BADTOKEN; \ } @@ -173,8 +235,8 @@ int test_loop(ENCODING enc, char* code) { int level = atoi(TO_INTERNAL(yytext, str_buf)); \ CHECK_LINE_LEN; \ if ((level < 0) || (level > MAXGEDCLEVEL)) { \ - gedcom_error ("Level number out of range [0..%d]", \ - MAXGEDCLEVEL); \ + error_level_out_of_range(); \ + line_no++; \ return BADTOKEN; \ } \ level_diff = level - current_level; \ @@ -191,9 +253,8 @@ int test_loop(ENCODING enc, char* code) } \ else { \ /* should never happen (error to GEDCOM spec) */ \ - gedcom_error ("GEDCOM level number is %d higher than " \ - "previous", \ - level_diff); \ + error_level_too_high(level_diff); \ + line_no++; \ return BADTOKEN; \ } \ } @@ -201,13 +262,15 @@ int test_loop(ENCODING enc, char* code) #define ACTION_ALPHANUM \ { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \ - gedcom_error("Tag '%s' too long, max %d characters", \ - yytext, MAXGEDCTAGLEN); \ + error_tag_too_long(yytext); \ + line_no++; \ return BADTOKEN; \ } \ CHECK_LINE_LEN; \ - gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \ + gedcom_lval.tag.value = USERTAG; \ BEGIN(NORMAL); \ + line_no++; \ return USERTAG; \ } @@ -220,16 +283,25 @@ int test_loop(ENCODING enc, char* code) #define ACTION_ANY \ - { CHECK_LINE_LEN; \ - gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \ - /* Due to character conversions, it is possible that the current \ - character will be combined with the next, and so now we don't have a \ - character yet... \ - In principle, this is only applicable to the 1byte case (e.g. ANSEL), \ - but it doesn't harm the unicode case. \ - */ \ - if (strlen(gedcom_lval.string) > 0) \ - return ANYCHAR; \ + { char* tmp; \ + CHECK_LINE_LEN; \ + tmp = TO_INTERNAL(yytext, str_buf); \ + if (!tmp) { \ + /* Something went wrong during conversion... */ \ + error_invalid_character(yytext, yytext[0]); \ + return BADTOKEN; \ + } \ + else { \ + gedcom_lval.string = tmp; \ + /* Due to character conversions, it is possible that the current \ + character will be combined with the next, and so now we don't have a \ + character yet... \ + In principle, this is only applicable to the 1byte case (e.g. ANSEL),\ + but it doesn't harm the unicode case. \ + */ \ + if (strlen(gedcom_lval.string) > 0) \ + return ANYCHAR; \ + } \ } @@ -243,8 +315,7 @@ int test_loop(ENCODING enc, char* code) #define ACTION_POINTER \ { CHECK_LINE_LEN; \ if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \ - gedcom_error("Pointer '%s' too long, max %d characters", \ - yytext, MAXGEDCPTRLEN); \ + error_pointer_too_long(yytext); \ return BADTOKEN; \ } \ gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \ @@ -261,7 +332,6 @@ int test_loop(ENCODING enc, char* code) #define ACTION_TERMINATOR \ { CHECK_LINE_LEN; \ INIT_LINE_LEN; \ - line_no++; \ BEGIN(INITIAL); \ } @@ -277,19 +347,66 @@ int test_loop(ENCODING enc, char* code) return CLOSE; \ } \ else { \ - /* Reset our state */ \ - current_level = -1; \ - level_diff = MAXGEDCLEVEL; \ - /* ... then terminate lex */ \ + char* ptr; int size; \ + /* ... terminate lex */ \ yyterminate(); \ + /* Get rid of f*cking compiler warning from lex generated code */ \ + /* yyterminate does return(), so program will never come here */ \ + yy_flex_realloc(ptr, size); \ } \ } +#define ACTION_NORMAL_AT \ + { if (compat_at) { \ + int i, j; \ + char *yycopy = strdup(yytext); \ + if (yycopy) { \ + for (i = 0; i < 2; i++) \ + for (j = yyleng - 1; j >= 0; --j) \ + unput(yycopy[j]); \ + free(yycopy); \ + } \ + else { \ + MEMORY_ERROR; \ + } \ + } \ + else { \ + error_at_character(); \ + return BADTOKEN; \ + } \ + } #define ACTION_UNEXPECTED \ - { gedcom_error("Unexpected character: '%s' (0x%02x)", \ - yytext, yytext[0]); \ + { error_unexpected_character(yytext, yytext[0]); \ return BADTOKEN; \ } -#endif /* IN_LEX */ +#elif LEX_SECTION == 3 + +int yywrap() +{ + return 1; +} + +static void yylex_cleanup() +{ + /* fix memory leak in lex */ + yy_delete_buffer(yy_current_buffer); + yy_current_buffer = NULL; +} + +static int exitfuncregistered = 0; + +void yymyinit(FILE *f) +{ + if (! exitfuncregistered && atexit(yylex_cleanup) == 0) + exitfuncregistered = 1; + yyin = f; + yyrestart(f); + /* Reset our state */ + current_level = -1; + level_diff = MAXGEDCLEVEL; + BEGIN(INITIAL); +} + +#endif