#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "gedcom.h"
#include "gedcom.tabgen.h"
#include "compat.h"
static size_t encoding_width;
static int current_level = -1;
-static int level_diff=MAXGEDCLEVEL;
+static int level_diff = MAXGEDCLEVEL;
static size_t line_len = 0;
+static int tab_space = 0;
+static int current_tag = -1;
-static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
-static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
-static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+static struct conv_buffer* ptr_buffer = NULL;
+static struct conv_buffer* tag_buffer = NULL;
+static struct conv_buffer* str_buffer = NULL;
+
+#define INITIAL_PTR_BUFFER_LEN MAXGEDCPTRLEN * UTF_FACTOR + 1
+#define INITIAL_TAG_BUFFER_LEN MAXGEDCTAGLEN * UTF_FACTOR + 1
+#define INITIAL_STR_BUFFER_LEN MAXGEDCLINELEN * UTF_FACTOR + 1
#ifdef LEXER_TEST
YYSTYPE gedcom_lval;
int line_no = 1;
-int compat_at = 0;
int gedcom_lex();
static void error_line_too_long()
{
- gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN);
+ gedcom_error(_("Line too long, max %d characters allowed"),
+ MAXGEDCLINELEN);
}
static void error_level_leading_zero()
gedcom_error(_("'@' character should be written as '@@' in values"));
}
+static void error_tab_character()
+{
+ gedcom_error(_("Tab character is not allowed in values"));
+}
+
static void error_unexpected_character(const char* str, char ch)
{
gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
#elif LEX_SECTION == 2
#define TO_INTERNAL(STR,OUTBUF) \
- (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF)))
+ (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF))
#define INIT_LINE_LEN \
line_len = 0;
#define CHECK_LINE_LEN \
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
- if (line_len > MAXGEDCLINELEN * encoding_width) { \
+ if (line_len > MAXGEDCLINELEN * encoding_width \
+ && ! compat_long_line(current_level, current_tag)) { \
error_line_too_long(); \
line_len = (size_t)-1; \
return BADTOKEN; \
} \
}
+#define GENERATE_TAB_SPACE \
+ { gedcom_lval.string = " "; \
+ tab_space--; \
+ return DELIM; \
+ }
+
#define MKTAGACTION(THETAG) \
{ CHECK_LINE_LEN; \
- gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
- gedcom_lval.tag.value = TAG_##THETAG; \
+ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \
+ current_tag = TAG_##THETAG; \
+ gedcom_lval.tag.value = current_tag; \
BEGIN(NORMAL); \
- return TAG_##THETAG; \
+ line_no++; \
+ return current_tag; \
}
/* The GEDCOM level number is converted into a sequence of opening
But because this means that one token is converted into a series
of tokens, there is some initial code following immediately here
- that returns "pending" tokens. */
+ that returns "pending" tokens.
+
+ Also, for compatibility tabs are converted into spaces, which is
+ also handled here */
#define ACTION_BEFORE_REGEXPS \
- { if (level_diff < 1) { \
+ { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \
+ GENERATE_TAB_SPACE; \
+ } \
+ else if (level_diff < 1) { \
level_diff++; \
return CLOSE; \
} \
#define ACTION_DIGITS \
- { int level = atoi(TO_INTERNAL(yytext, str_buf)); \
+ { int level = atoi(TO_INTERNAL(yytext, str_buffer)); \
CHECK_LINE_LEN; \
if ((level < 0) || (level > MAXGEDCLEVEL)) { \
error_level_out_of_range(); \
+ line_no++; \
return BADTOKEN; \
} \
level_diff = level - current_level; \
else { \
/* should never happen (error to GEDCOM spec) */ \
error_level_too_high(level_diff); \
+ line_no++; \
return BADTOKEN; \
} \
}
#define ACTION_ALPHANUM \
{ if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
error_tag_too_long(yytext); \
+ line_no++; \
return BADTOKEN; \
} \
CHECK_LINE_LEN; \
- gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
+ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \
gedcom_lval.tag.value = USERTAG; \
BEGIN(NORMAL); \
+ line_no++; \
return USERTAG; \
}
#define ACTION_DELIM \
{ CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
+ gedcom_lval.string = TO_INTERNAL(yytext, str_buffer); \
return DELIM; \
}
#define ACTION_ANY \
{ char* tmp; \
CHECK_LINE_LEN; \
- tmp = TO_INTERNAL(yytext, str_buf); \
+ tmp = TO_INTERNAL(yytext, str_buffer); \
if (!tmp) { \
/* Something went wrong during conversion... */ \
error_invalid_character(yytext, yytext[0]); \
#define ACTION_ESCAPE \
{ CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
+ gedcom_lval.string = TO_INTERNAL(yytext, str_buffer); \
return ESCAPE; \
}
error_pointer_too_long(yytext); \
return BADTOKEN; \
} \
- gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
+ gedcom_lval.string = TO_INTERNAL(yytext, ptr_buffer); \
return POINTER; \
}
#define ACTION_TERMINATOR \
{ CHECK_LINE_LEN; \
INIT_LINE_LEN; \
- line_no++; \
+ if (line_no == 1) \
+ set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
BEGIN(INITIAL); \
}
}
#define ACTION_NORMAL_AT \
- { if (compat_at) { \
+ { if (compat_mode(C_NO_DOUBLE_AT)) { \
int i, j; \
char *yycopy = strdup(yytext); \
if (yycopy) { \
} \
}
+#define ACTION_TAB \
+ { if (compat_mode(C_TAB_CHARACTER)) { \
+ tab_space = 8; \
+ GENERATE_TAB_SPACE; \
+ } \
+ else { \
+ error_tab_character(); \
+ return BADTOKEN; \
+ } \
+ }
+
#define ACTION_UNEXPECTED \
{ error_unexpected_character(yytext, yytext[0]); \
return BADTOKEN; \
return 1;
}
+static void free_conv_buffers()
+{
+ free_conv_buffer(ptr_buffer);
+ free_conv_buffer(tag_buffer);
+ free_conv_buffer(str_buffer);
+}
+
static void yylex_cleanup()
{
/* fix memory leak in lex */
yy_delete_buffer(yy_current_buffer);
yy_current_buffer = NULL;
+ free_conv_buffers();
+}
+
+static void init_conv_buffers()
+{
+ if (!ptr_buffer) {
+ ptr_buffer = create_conv_buffer(INITIAL_PTR_BUFFER_LEN);
+ tag_buffer = create_conv_buffer(INITIAL_TAG_BUFFER_LEN);
+ str_buffer = create_conv_buffer(INITIAL_STR_BUFFER_LEN);
+ }
}
static int exitfuncregistered = 0;
{
if (! exitfuncregistered && atexit(yylex_cleanup) == 0)
exitfuncregistered = 1;
+ init_conv_buffers();
yyin = f;
yyrestart(f);
/* Reset our state */