General cleanup.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Sun, 25 Nov 2001 12:53:20 +0000 (12:53 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Sun, 25 Nov 2001 12:53:20 +0000 (12:53 +0000)
13 files changed:
Makefile
ansel/ANSI_Z39.47.c
ansel/Makefile
ansel/gconv-modules
encoding.c
encoding.h
gedcom.enc
gedcom.h
gedcom_1byte.lex [new file with mode: 0644]
gedcom_hilo.lex [new file with mode: 0644]
message.c [new file with mode: 0644]
multilex.c [new file with mode: 0644]
multilex.h [new file with mode: 0644]

index b7585f39730337d94d2b6e8db025a5c31700333d..63be764fef6506171d55b17b880983c762bf226b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -8,12 +8,15 @@ CFLAGS=-g -W -Wall -pedantic
 YFLAGS=--debug --defines
 LFLAGS=-8
 
+all:   ansel_module gedcom_parse
+
 gedcom_parse:  standalone.o lex.gedcom_1byte_.o lex.gedcom_hilo_.o \
                 lex.gedcom_lohi_.o gedcom.tab.o message.o multilex.o \
                encoding.o
        $(CC) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
 
-libgedcom.so:
+ansel_module:
+       cd ansel && $(MAKE)
 
 lex.gedcom_1byte_.c:   gedcom_1byte.lex gedcom.tab.h gedcom.h multilex.h
        $(LEX) $(LFLAGS) -Pgedcom_1byte_ gedcom_1byte.lex
@@ -30,6 +33,7 @@ gedcom.tab.c gedcom.tab.h:    gedcom.y gedcom.h
 clean:
        rm -f core gedcom_parse test_* *.o lex.gedcom_* \
         gedcom.tab.* gedcom.output
+       cd ansel && $(MAKE) clean
 
 # Lexer test programs
 
@@ -53,7 +57,7 @@ lex.gedcom_lohi_.test.o:      lex.gedcom_lohi_.c
 
 # Test of parser
 
-test:  gedcom_parse
+test:  all
        @export GCONV_PATH=./ansel; \
         for file in t/*.ged; do \
          echo "=== testing $$file"; \
index 5826d39519c723aeafd3735c5e68e5903c874dcb..a8f3cee407528c9e4b28f63016952246e9fe2c0f 100644 (file)
@@ -1,3 +1,6 @@
+/* $Id$ */
+/* $Name$ */
+
 /* Generic conversion to and from ANSI Z39.47 (also known as ANSEL)
    Based on the ansi_x3.110.c file from the glibc sources
    Data coming from:
@@ -11,7 +14,6 @@
 #include <gconv.h>
 #include <stdint.h>
 #include <string.h>
-#include <stdio.h>
 
 static const uint32_t to_ucs4[256] =
 {
index 1110388507a7d836aa3f6ecdfc55b4ae710d2701..da733bd42de71ce4221ac8f1c67acdc6b189e842 100644 (file)
@@ -1,15 +1,19 @@
+# $Id$
+# $Name$
+
 LIBTOOL=libtool
 MODPATH=/usr/local/lib
 MODULES=ANSI_Z39.47.so
+CFLAGS=-g
 
 all:   $(MODULES)
 
 %.so:  %.lo
-       $(LIBTOOL) $(CC) -module -avoid-version -o $*.la $^ -rpath $(MODPATH)
+       $(LIBTOOL) $(CC) -module -avoid-version $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $*.la -rpath $(MODPATH)
        mv .libs/$@ $@
 
 %.lo:  %.c
-       $(LIBTOOL) $(CC) -c $^
+       $(LIBTOOL) $(CC) -c $(CPPFLAGS) $(CFLAGS) $^
 
 clean:
        rm -rf .libs
index 5329bb2823e22deb1dc5e6a070b65d7623fb45ca..16a7aa7041e1b9160e063368dff596c8fa0504dd 100644 (file)
@@ -1,3 +1,5 @@
+# $Id$
+# $Name$
 # All lines contain the following information:
 
 # If the lines start with `module'
index 6c342e0ccba4fe2817eab3f553287efe89a47c72..9d842cced378336f6b5dea1e959062127cdd855c 100644 (file)
@@ -1,3 +1,6 @@
+/* $Id$ */
+/* $Name$ */
+
 #include <string.h>
 #include <iconv.h>
 #include <search.h>
@@ -76,7 +79,7 @@ void init_encodings()
                       ENCODING_CONF_FILE);
          return;
        }
-       else if (buffer[0] != '#') {
+       else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
          if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
            add_encoding(gedcom_n, charwidth, iconv_n);
          }
@@ -116,6 +119,10 @@ int open_conv_to_internal(char* fromcode)
     memset(conv_buf, 0, sizeof(conv_buf));
     conv_buf_size = 0;
     cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
+    if (cd_to_internal == (iconv_t) -1) {
+      gedcom_error("Error opening conversion context for encoding %s: %s",
+                  encoding, strerror(errno));
+    }
   }
   return (cd_to_internal != (iconv_t) -1);  
 }
index 12473d241e902b38146bcfc15d61387ece001f23..3e9af4361f1eca4601df432d9c90d4b8c0a48e8c 100644 (file)
@@ -1,3 +1,6 @@
+/* $Id$ */
+/* $Name$ */
+
 /* Basic file encoding */
 #ifndef __ENCODING_H
 #define __ENCODING_H
index b47dc565fde1b977f0f6281ff4c8995ab83f5a43..7f370a7b99b7b70833c366c3008337cd740ac2f2 100644 (file)
@@ -1,11 +1,18 @@
+# $Id$
+# $Name$
+
 # Mapping of charsets for gedcom parsing
 # Each line contains (separated by whitespace):
 #   - the gedcom name
 #   - a token identifying the width of characters and the ordering;
 #     currently supported values: 1, 2_LOHI, 2_HILO
 #   - the iconv name of the charset
+
+# First the encodings supported by the GEDCOM standard
 UNICODE                2_LOHI  UTF16LE
 UNICODE                2_HILO  UTF16BE
 ASCII          1       ASCII
-ANSI           1       CP1252
 ANSEL          1       ANSEL
+
+# Then some very frequently used non-standard encodings:
+ANSI           1       CP1252
index 82836e84ff9f28568dd9017662bffccc9fed79f0..987a382d0676a0039245e0836bd2b40ab39f616f 100644 (file)
--- a/gedcom.h
+++ b/gedcom.h
@@ -1,5 +1,6 @@
 /* $Id$ */
 /* $Name$ */
+
 #ifndef __GEDCOM_H
 #define __GEDCOM_H
 #include <stdarg.h>
diff --git a/gedcom_1byte.lex b/gedcom_1byte.lex
new file mode 100644 (file)
index 0000000..7121973
--- /dev/null
@@ -0,0 +1,364 @@
+/* $Id$ */
+/* $Name$ */
+
+%{
+#include "gedcom.tab.h"
+#include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
+
+#define YY_NO_UNPUT
+%}
+
+%s NORMAL
+%s EXPECT_TAG
+
+alpha        [A-Za-z_]
+digit        [0-9]
+delim        " "
+tab          [\t]
+hash         #
+literal_at   @@
+otherchar    [\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFE]
+terminator   \x0D|\x0A|\x0D\x0A|\x0A\x0D
+
+any_char     {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
+any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
+non_at       {alpha}|{digit}|{otherchar}|{delim}|{hash}
+alphanum     {alpha}|{digit}
+gen_delim    {delim}|{tab}
+
+escape       @#{any_char}+@
+pointer      @{alphanum}{non_at}+@
+
+%{
+static int current_level=-1;
+static int level_diff=MAXGEDCLEVEL;
+#ifdef LEXER_TEST 
+YYSTYPE gedcom_lval;
+int line_no = 1; 
+#endif
+%} 
+
+%%
+
+    /* The GEDCOM level number is converted into a sequence of opening
+       and closing brackets.  Simply put, the following GEDCOM fragment:
+
+         0 HEAD
+        1 SOUR genes
+        2 VERS 1.6
+        2 NAME Genes
+        1 DATE 07 OCT 2001
+        ...
+        0 TRLR
+
+       is converted into:
+
+         { HEAD                     (initial)  
+        { SOUR genes               (1 higher: no closing brackets)
+        { VERS 1.6                 (1 higher: no closing brackets)
+        } { NAME Genes             (same level: 1 closing bracket)
+        } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
+        ...
+        } { TRLR }
+
+       or more clearly:
+
+         { HEAD
+          { SOUR genes
+            { VERS 1.6 }
+            { NAME Genes } }
+          { DATE 07 OCT 2001
+        ... }
+        { TRLR }
+
+       But because this means that one token is converted into a series
+       of tokens, there is some initial code following immediately here
+       that returns "pending" tokens. */
+
+%{
+char string_buf[MAXGEDCLINELEN+1];
+if (level_diff < 1) {
+  level_diff++;
+  return CLOSE;
+}
+else if (level_diff == 1) {
+  level_diff++;
+  return OPEN;
+}
+else {
+  /* out of brackets... */
+}
+
+#define TO_INTERNAL(str) to_internal(str, yyleng) 
+
+#define MKTAGACTION(tag) \
+  { gedcom_lval.string = TO_INTERNAL(yytext); \
+    BEGIN(NORMAL); \
+    return TAG_##tag; }
+
+%}
+
+<INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
+
+<INITIAL>0{digit}+ { gedcom_error ("Level number with leading zero");
+                     return BADTOKEN;
+                   }
+
+<INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
+                    if ((level < 0) || (level > MAXGEDCLEVEL)) {
+                     gedcom_error ("Level number out of range [0..%d]",
+                                   MAXGEDCLEVEL);
+                     return BADTOKEN;
+                   }
+                    level_diff = level - current_level;
+                   BEGIN(EXPECT_TAG);
+                   current_level = level;
+                   if (level_diff < 1) {
+                     level_diff++;
+                     return CLOSE;
+                   }
+                   else if (level_diff == 1) {
+                     level_diff++;
+                     return OPEN;
+                   }
+                   else {
+                     /* should never happen (error to GEDCOM spec) */
+                     gedcom_error ("GEDCOM level number is %d higher than "
+                                   "previous",
+                                   level_diff);
+                     return BADTOKEN;
+                   }
+                  }
+
+<EXPECT_TAG>ABBR  MKTAGACTION(ABBR)
+<EXPECT_TAG>ADDR  MKTAGACTION(ADDR)
+<EXPECT_TAG>ADR1  MKTAGACTION(ADR1)
+<EXPECT_TAG>ADR2  MKTAGACTION(ADR2)
+<EXPECT_TAG>ADOP  MKTAGACTION(ADOP)
+<EXPECT_TAG>AFN   MKTAGACTION(AFN)
+<EXPECT_TAG>AGE   MKTAGACTION(AGE)
+<EXPECT_TAG>AGNC  MKTAGACTION(AGNC)
+<EXPECT_TAG>ALIA  MKTAGACTION(ALIA)
+<EXPECT_TAG>ANCE  MKTAGACTION(ANCE)
+<EXPECT_TAG>ANCI  MKTAGACTION(ANCI)
+<EXPECT_TAG>ANUL  MKTAGACTION(ANUL)
+<EXPECT_TAG>ASSO  MKTAGACTION(ASSO)
+<EXPECT_TAG>AUTH  MKTAGACTION(AUTH)
+<EXPECT_TAG>BAPL  MKTAGACTION(BAPL)
+<EXPECT_TAG>BAPM  MKTAGACTION(BAPM)
+<EXPECT_TAG>BARM  MKTAGACTION(BARM)
+<EXPECT_TAG>BASM  MKTAGACTION(BASM)
+<EXPECT_TAG>BIRT  MKTAGACTION(BIRT)
+<EXPECT_TAG>BLES  MKTAGACTION(BLES)
+<EXPECT_TAG>BLOB  MKTAGACTION(BLOB)
+<EXPECT_TAG>BURI  MKTAGACTION(BURI)
+<EXPECT_TAG>CALN  MKTAGACTION(CALN)
+<EXPECT_TAG>CAST  MKTAGACTION(CAST)
+<EXPECT_TAG>CAUS  MKTAGACTION(CAUS)
+<EXPECT_TAG>CENS  MKTAGACTION(CENS)
+<EXPECT_TAG>CHAN  MKTAGACTION(CHAN)
+<EXPECT_TAG>CHAR  MKTAGACTION(CHAR)
+<EXPECT_TAG>CHIL  MKTAGACTION(CHIL)
+<EXPECT_TAG>CHR   MKTAGACTION(CHR)
+<EXPECT_TAG>CHRA  MKTAGACTION(CHRA)
+<EXPECT_TAG>CITY  MKTAGACTION(CITY)
+<EXPECT_TAG>CONC  MKTAGACTION(CONC)
+<EXPECT_TAG>CONF  MKTAGACTION(CONF)
+<EXPECT_TAG>CONL  MKTAGACTION(CONL)
+<EXPECT_TAG>CONT  MKTAGACTION(CONT)
+<EXPECT_TAG>COPR  MKTAGACTION(COPR)
+<EXPECT_TAG>CORP  MKTAGACTION(CORP)
+<EXPECT_TAG>CREM  MKTAGACTION(CREM)
+<EXPECT_TAG>CTRY  MKTAGACTION(CTRY)
+<EXPECT_TAG>DATA  MKTAGACTION(DATA)
+<EXPECT_TAG>DATE  MKTAGACTION(DATE)
+<EXPECT_TAG>DEAT  MKTAGACTION(DEAT)
+<EXPECT_TAG>DESC  MKTAGACTION(DESC)
+<EXPECT_TAG>DESI  MKTAGACTION(DESI)
+<EXPECT_TAG>DEST  MKTAGACTION(DEST)
+<EXPECT_TAG>DIV   MKTAGACTION(DIV)
+<EXPECT_TAG>DIVF  MKTAGACTION(DIVF)
+<EXPECT_TAG>DSCR  MKTAGACTION(DSCR)
+<EXPECT_TAG>EDUC  MKTAGACTION(EDUC)
+<EXPECT_TAG>EMIG  MKTAGACTION(EMIG)
+<EXPECT_TAG>ENDL  MKTAGACTION(ENDL)
+<EXPECT_TAG>ENGA  MKTAGACTION(ENGA)
+<EXPECT_TAG>EVEN  MKTAGACTION(EVEN)
+<EXPECT_TAG>FAM   MKTAGACTION(FAM)
+<EXPECT_TAG>FAMC  MKTAGACTION(FAMC)
+<EXPECT_TAG>FAMF  MKTAGACTION(FAMF)
+<EXPECT_TAG>FAMS  MKTAGACTION(FAMS)
+<EXPECT_TAG>FCOM  MKTAGACTION(FCOM)
+<EXPECT_TAG>FILE  MKTAGACTION(FILE)
+<EXPECT_TAG>FORM  MKTAGACTION(FORM)
+<EXPECT_TAG>GEDC  MKTAGACTION(GEDC)
+<EXPECT_TAG>GIVN  MKTAGACTION(GIVN)
+<EXPECT_TAG>GRAD  MKTAGACTION(GRAD)
+<EXPECT_TAG>HEAD  MKTAGACTION(HEAD)
+<EXPECT_TAG>HUSB  MKTAGACTION(HUSB)
+<EXPECT_TAG>IDNO  MKTAGACTION(IDNO)
+<EXPECT_TAG>IMMI  MKTAGACTION(IMMI)
+<EXPECT_TAG>INDI  MKTAGACTION(INDI)
+<EXPECT_TAG>LANG  MKTAGACTION(LANG)
+<EXPECT_TAG>LEGA  MKTAGACTION(LEGA)
+<EXPECT_TAG>MARB  MKTAGACTION(MARB)
+<EXPECT_TAG>MARC  MKTAGACTION(MARC)
+<EXPECT_TAG>MARL  MKTAGACTION(MARL)
+<EXPECT_TAG>MARR  MKTAGACTION(MARR)
+<EXPECT_TAG>MARS  MKTAGACTION(MARS)
+<EXPECT_TAG>MEDI  MKTAGACTION(MEDI)
+<EXPECT_TAG>NAME  MKTAGACTION(NAME)
+<EXPECT_TAG>NATI  MKTAGACTION(NATI)
+<EXPECT_TAG>NATU  MKTAGACTION(NATU)
+<EXPECT_TAG>NCHI  MKTAGACTION(NCHI)
+<EXPECT_TAG>NICK  MKTAGACTION(NICK)
+<EXPECT_TAG>NMR   MKTAGACTION(NMR)
+<EXPECT_TAG>NOTE  MKTAGACTION(NOTE)
+<EXPECT_TAG>NPFX  MKTAGACTION(NPFX)
+<EXPECT_TAG>NSFX  MKTAGACTION(NSFX)
+<EXPECT_TAG>OBJE  MKTAGACTION(OBJE)
+<EXPECT_TAG>OCCU  MKTAGACTION(OCCU)
+<EXPECT_TAG>ORDI  MKTAGACTION(ORDI)
+<EXPECT_TAG>ORDN  MKTAGACTION(ORDN)
+<EXPECT_TAG>PAGE  MKTAGACTION(PAGE)
+<EXPECT_TAG>PEDI  MKTAGACTION(PEDI)
+<EXPECT_TAG>PHON  MKTAGACTION(PHON)
+<EXPECT_TAG>PLAC  MKTAGACTION(PLAC)
+<EXPECT_TAG>POST  MKTAGACTION(POST)
+<EXPECT_TAG>PROB  MKTAGACTION(PROB)
+<EXPECT_TAG>PROP  MKTAGACTION(PROP)
+<EXPECT_TAG>PUBL  MKTAGACTION(PUBL)
+<EXPECT_TAG>QUAY  MKTAGACTION(QUAY)
+<EXPECT_TAG>REFN  MKTAGACTION(REFN)
+<EXPECT_TAG>RELA  MKTAGACTION(RELA)
+<EXPECT_TAG>RELI  MKTAGACTION(RELI)
+<EXPECT_TAG>REPO  MKTAGACTION(REPO)
+<EXPECT_TAG>RESI  MKTAGACTION(RESI)
+<EXPECT_TAG>RESN  MKTAGACTION(RESN)
+<EXPECT_TAG>RETI  MKTAGACTION(RETI)
+<EXPECT_TAG>RFN   MKTAGACTION(RFN)
+<EXPECT_TAG>RIN   MKTAGACTION(RIN)
+<EXPECT_TAG>ROLE  MKTAGACTION(ROLE)
+<EXPECT_TAG>SEX   MKTAGACTION(SEX)
+<EXPECT_TAG>SLGC  MKTAGACTION(SLGC)
+<EXPECT_TAG>SLGS  MKTAGACTION(SLGS)
+<EXPECT_TAG>SOUR  MKTAGACTION(SOUR)
+<EXPECT_TAG>SPFX  MKTAGACTION(SPFX)
+<EXPECT_TAG>SSN   MKTAGACTION(SSN)
+<EXPECT_TAG>STAE  MKTAGACTION(STAE)
+<EXPECT_TAG>STAT  MKTAGACTION(STAT)
+<EXPECT_TAG>SUBM  MKTAGACTION(SUBM)
+<EXPECT_TAG>SUBN  MKTAGACTION(SUBN)
+<EXPECT_TAG>SURN  MKTAGACTION(SURN)
+<EXPECT_TAG>TEMP  MKTAGACTION(TEMP)
+<EXPECT_TAG>TEXT  MKTAGACTION(TEXT)
+<EXPECT_TAG>TIME  MKTAGACTION(TIME)
+<EXPECT_TAG>TITL  MKTAGACTION(TITL)
+<EXPECT_TAG>TRLR  MKTAGACTION(TRLR)
+<EXPECT_TAG>TYPE  MKTAGACTION(TYPE)
+<EXPECT_TAG>VERS  MKTAGACTION(VERS)
+<EXPECT_TAG>WIFE  MKTAGACTION(WIFE)
+<EXPECT_TAG>WILL  MKTAGACTION(WILL)
+     
+<EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
+                            gedcom_error("Tag '%s' too long, max %d chars");
+                            return BADTOKEN;
+                          }
+                          strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
+                         gedcom_lval.string = TO_INTERNAL(string_buf);
+                         BEGIN(NORMAL);
+                         return USERTAG;
+                        }
+
+{delim}      { gedcom_lval.string = TO_INTERNAL(yytext);
+               return DELIM;
+             }
+
+{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
+                  /* Due to character conversions, it is possible
+                    that the current character will be combined with
+                    the next, and so now we don't have a character yet...
+                    This is only applicable to the 1byte case (e.g. ANSEL).
+                 */
+                  if (strlen(gedcom_lval.string) > 0) 
+                    return ANYCHAR;
+                }
+
+{escape}/{non_at}  { gedcom_lval.string = TO_INTERNAL(yytext);
+                     return ESCAPE;
+                   }
+
+{pointer}    { gedcom_lval.string = TO_INTERNAL(yytext);
+               return POINTER;
+             }
+
+   /* Due to the conversion of level numbers into brackets, the
+      terminator is not important, so no token is returned here.
+      Although not strictly according to the GEDCOM spec, we'll ignore
+      whitespace just before the terminator.
+   */
+
+{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
+
+   /* Eventually we have to return 1 closing bracket (for the trailer).
+      We can detect whether we have sent the closing bracket using the
+      level_diff (at eof, first it is 2, then we increment it ourselves) */
+
+<<EOF>> { if (level_diff == 2) {
+           level_diff++;
+            return CLOSE;
+          }
+          else {
+           yyterminate();
+         }
+        } 
+
+.  { gedcom_error("Unexpected character: '%s' (0x%02x)",
+                 yytext, yytext[0]);
+     return BADTOKEN;
+   }
+
+%%
+
+int yywrap()
+{
+  return 1;
+}
+
+#ifdef LEXER_TEST
+int main()
+{
+  int tok, res;
+  init_encodings();
+  set_encoding_width(ONE_BYTE);
+  res = open_conv_to_internal("ASCII");
+  if (!res) {
+    gedcom_error("Unable to open conversion context: %s",
+                strerror(errno));
+    return 1;
+  }
+  tok = gedcom_1byte_lex();
+  while (tok) {
+    switch(tok) {
+      case BADTOKEN: printf("BADTOKEN "); break;
+      case OPEN: printf("OPEN "); break;
+      case CLOSE: printf("CLOSE "); break;
+      case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
+      case DELIM: printf("DELIM "); break;
+      case ANYCHAR: printf("%s ", gedcom_lval.string); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
+      default: printf("TAG(%s) ", gedcom_lval.string); break;
+    }
+    tok = gedcom_1byte_lex();
+  }
+  printf("\n");
+  close_conv_to_internal();
+  return 0;
+}
+#endif
diff --git a/gedcom_hilo.lex b/gedcom_hilo.lex
new file mode 100644 (file)
index 0000000..d8a1da0
--- /dev/null
@@ -0,0 +1,361 @@
+/* $Id$ */
+/* $Name$ */
+
+/* In high-low order, a space is encoded as 0x00 0x20 */
+/* i.e. this is utf-16-be */
+
+%{
+#include "gedcom.tab.h"
+#include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
+
+#define YY_NO_UNPUT
+%}
+
+%s NORMAL
+%s EXPECT_TAG
+
+alpha        \x00[A-Za-z_]
+digit        \x00[0-9]
+delim        \x00\x20
+tab          \x00[\t]
+hash         \x00#
+literal_at   \x00@\x00@
+otherchar    \x00[\x21-\x22\x24-\x2F\x3A-\x3F\x5B-\x5E\x60\x7B-\x7E\x80-\xFF]|[\x01-\xFF][\x00-\xFF]
+terminator   \x00\x0D|\x00\x0A|\x00\x0D\x00\x0A|\x00\x0A\x00\x0D
+
+any_char     {alpha}|{digit}|{otherchar}|{delim}|{hash}|{literal_at}
+any_but_delim {alpha}|{digit}|{otherchar}|{hash}|{literal_at}
+non_at       {alpha}|{digit}|{otherchar}|{delim}|{hash}
+alphanum     {alpha}|{digit}
+gen_delim    {delim}|{tab}
+
+escape       \x00@\x00#{any_char}+\x00@
+pointer      \x00@{alphanum}{non_at}+\x00@
+
+%{
+static int current_level=-1;
+static int level_diff=MAXGEDCLEVEL;
+#ifdef LEXER_TEST 
+YYSTYPE gedcom_lval;
+int line_no = 1; 
+#endif
+%} 
+
+%%
+
+    /* The GEDCOM level number is converted into a sequence of opening
+       and closing brackets.  Simply put, the following GEDCOM fragment:
+
+         0 HEAD
+        1 SOUR genes
+        2 VERS 1.6
+        2 NAME Genes
+        1 DATE 07 OCT 2001
+        ...
+        0 TRLR
+
+       is converted into:
+
+         { HEAD                     (initial)  
+        { SOUR genes               (1 higher: no closing brackets)
+        { VERS 1.6                 (1 higher: no closing brackets)
+        } { NAME Genes             (same level: 1 closing bracket)
+        } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
+        ...
+        } { TRLR }
+
+       or more clearly:
+
+         { HEAD
+          { SOUR genes
+            { VERS 1.6 }
+            { NAME Genes } }
+          { DATE 07 OCT 2001
+        ... }
+        { TRLR }
+
+       But because this means that one token is converted into a series
+       of tokens, there is some initial code following immediately here
+       that returns "pending" tokens. */
+
+%{
+char string_buf[MAXGEDCLINELEN+1];
+if (level_diff < 1) {
+  level_diff++;
+  return CLOSE;
+}
+else if (level_diff == 1) {
+  level_diff++;
+  return OPEN;
+}
+else {
+  /* out of brackets... */
+}
+
+#define TO_INTERNAL(str) to_internal(str, yyleng) 
+
+#define MKTAGACTION(tag) \
+  { gedcom_lval.string = TO_INTERNAL(yytext); \
+    BEGIN(NORMAL); \
+    return TAG_##tag; }
+
+%}
+
+<INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
+
+<INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
+                           return BADTOKEN;
+                         }
+
+<INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
+                    if ((level < 0) || (level > MAXGEDCLEVEL)) {
+                     gedcom_error ("Level number out of range [0..%d]",
+                                   MAXGEDCLEVEL);
+                     return BADTOKEN;
+                   }
+                    level_diff = level - current_level;
+                   BEGIN(EXPECT_TAG);
+                   current_level = level;
+                   if (level_diff < 1) {
+                     level_diff++;
+                     return CLOSE;
+                   }
+                   else if (level_diff == 1) {
+                     level_diff++;
+                     return OPEN;
+                   }
+                   else {
+                     /* should never happen (error to GEDCOM spec) */
+                     gedcom_error ("GEDCOM level number is %d higher than "
+                                   "previous",
+                                   level_diff);
+                     return BADTOKEN;
+                   }
+                  }
+
+<EXPECT_TAG>\x00A\x00B\x00B\x00R  MKTAGACTION(ABBR)
+<EXPECT_TAG>\x00A\x00D\x00D\x00R  MKTAGACTION(ADDR)
+<EXPECT_TAG>\x00A\x00D\x00R\x001  MKTAGACTION(ADR1)
+<EXPECT_TAG>\x00A\x00D\x00R\x002  MKTAGACTION(ADR2)
+<EXPECT_TAG>\x00A\x00D\x00O\x00P  MKTAGACTION(ADOP)
+<EXPECT_TAG>\x00A\x00F\x00N   MKTAGACTION(AFN)
+<EXPECT_TAG>\x00A\x00G\x00E   MKTAGACTION(AGE)
+<EXPECT_TAG>\x00A\x00G\x00N\x00C  MKTAGACTION(AGNC)
+<EXPECT_TAG>\x00A\x00L\x00I\x00A  MKTAGACTION(ALIA)
+<EXPECT_TAG>\x00A\x00N\x00C\x00E  MKTAGACTION(ANCE)
+<EXPECT_TAG>\x00A\x00N\x00C\x00I  MKTAGACTION(ANCI)
+<EXPECT_TAG>\x00A\x00N\x00U\x00L  MKTAGACTION(ANUL)
+<EXPECT_TAG>\x00A\x00S\x00S\x00O  MKTAGACTION(ASSO)
+<EXPECT_TAG>\x00A\x00U\x00T\x00H  MKTAGACTION(AUTH)
+<EXPECT_TAG>\x00B\x00A\x00P\x00L  MKTAGACTION(BAPL)
+<EXPECT_TAG>\x00B\x00A\x00P\x00M  MKTAGACTION(BAPM)
+<EXPECT_TAG>\x00B\x00A\x00R\x00M  MKTAGACTION(BARM)
+<EXPECT_TAG>\x00B\x00A\x00S\x00M  MKTAGACTION(BASM)
+<EXPECT_TAG>\x00B\x00I\x00R\x00T  MKTAGACTION(BIRT)
+<EXPECT_TAG>\x00B\x00L\x00E\x00S  MKTAGACTION(BLES)
+<EXPECT_TAG>\x00B\x00L\x00O\x00B  MKTAGACTION(BLOB)
+<EXPECT_TAG>\x00B\x00U\x00R\x00I  MKTAGACTION(BURI)
+<EXPECT_TAG>\x00C\x00A\x00L\x00N  MKTAGACTION(CALN)
+<EXPECT_TAG>\x00C\x00A\x00S\x00T  MKTAGACTION(CAST)
+<EXPECT_TAG>\x00C\x00A\x00U\x00S  MKTAGACTION(CAUS)
+<EXPECT_TAG>\x00C\x00E\x00N\x00S  MKTAGACTION(CENS)
+<EXPECT_TAG>\x00C\x00H\x00A\x00N  MKTAGACTION(CHAN)
+<EXPECT_TAG>\x00C\x00H\x00A\x00R  MKTAGACTION(CHAR)
+<EXPECT_TAG>\x00C\x00H\x00I\x00L  MKTAGACTION(CHIL)
+<EXPECT_TAG>\x00C\x00H\x00R   MKTAGACTION(CHR)
+<EXPECT_TAG>\x00C\x00H\x00R\x00A  MKTAGACTION(CHRA)
+<EXPECT_TAG>\x00C\x00I\x00T\x00Y  MKTAGACTION(CITY)
+<EXPECT_TAG>\x00C\x00O\x00N\x00C  MKTAGACTION(CONC)
+<EXPECT_TAG>\x00C\x00O\x00N\x00F  MKTAGACTION(CONF)
+<EXPECT_TAG>\x00C\x00O\x00N\x00L  MKTAGACTION(CONL)
+<EXPECT_TAG>\x00C\x00O\x00N\x00T  MKTAGACTION(CONT)
+<EXPECT_TAG>\x00C\x00O\x00P\x00R  MKTAGACTION(COPR)
+<EXPECT_TAG>\x00C\x00O\x00R\x00P  MKTAGACTION(CORP)
+<EXPECT_TAG>\x00C\x00R\x00E\x00M  MKTAGACTION(CREM)
+<EXPECT_TAG>\x00C\x00T\x00R\x00Y  MKTAGACTION(CTRY)
+<EXPECT_TAG>\x00D\x00A\x00T\x00A  MKTAGACTION(DATA)
+<EXPECT_TAG>\x00D\x00A\x00T\x00E  MKTAGACTION(DATE)
+<EXPECT_TAG>\x00D\x00E\x00A\x00T  MKTAGACTION(DEAT)
+<EXPECT_TAG>\x00D\x00E\x00S\x00C  MKTAGACTION(DESC)
+<EXPECT_TAG>\x00D\x00E\x00S\x00I  MKTAGACTION(DESI)
+<EXPECT_TAG>\x00D\x00E\x00S\x00T  MKTAGACTION(DEST)
+<EXPECT_TAG>\x00D\x00I\x00V   MKTAGACTION(DIV)
+<EXPECT_TAG>\x00D\x00I\x00V\x00F  MKTAGACTION(DIVF)
+<EXPECT_TAG>\x00D\x00S\x00C\x00R  MKTAGACTION(DSCR)
+<EXPECT_TAG>\x00E\x00D\x00U\x00C  MKTAGACTION(EDUC)
+<EXPECT_TAG>\x00E\x00M\x00I\x00G  MKTAGACTION(EMIG)
+<EXPECT_TAG>\x00E\x00N\x00D\x00L  MKTAGACTION(ENDL)
+<EXPECT_TAG>\x00E\x00N\x00G\x00A  MKTAGACTION(ENGA)
+<EXPECT_TAG>\x00E\x00V\x00E\x00N  MKTAGACTION(EVEN)
+<EXPECT_TAG>\x00F\x00A\x00M   MKTAGACTION(FAM)
+<EXPECT_TAG>\x00F\x00A\x00M\x00C  MKTAGACTION(FAMC)
+<EXPECT_TAG>\x00F\x00A\x00M\x00F  MKTAGACTION(FAMF)
+<EXPECT_TAG>\x00F\x00A\x00M\x00S  MKTAGACTION(FAMS)
+<EXPECT_TAG>\x00F\x00C\x00O\x00M  MKTAGACTION(FCOM)
+<EXPECT_TAG>\x00F\x00I\x00L\x00E  MKTAGACTION(FILE)
+<EXPECT_TAG>\x00F\x00O\x00R\x00M  MKTAGACTION(FORM)
+<EXPECT_TAG>\x00G\x00E\x00D\x00C  MKTAGACTION(GEDC)
+<EXPECT_TAG>\x00G\x00I\x00V\x00N  MKTAGACTION(GIVN)
+<EXPECT_TAG>\x00G\x00R\x00A\x00D  MKTAGACTION(GRAD)
+<EXPECT_TAG>\x00H\x00E\x00A\x00D  MKTAGACTION(HEAD)
+<EXPECT_TAG>\x00H\x00U\x00S\x00B  MKTAGACTION(HUSB)
+<EXPECT_TAG>\x00I\x00D\x00N\x00O  MKTAGACTION(IDNO)
+<EXPECT_TAG>\x00I\x00M\x00M\x00I  MKTAGACTION(IMMI)
+<EXPECT_TAG>\x00I\x00N\x00D\x00I  MKTAGACTION(INDI)
+<EXPECT_TAG>\x00L\x00A\x00N\x00G  MKTAGACTION(LANG)
+<EXPECT_TAG>\x00L\x00E\x00G\x00A  MKTAGACTION(LEGA)
+<EXPECT_TAG>\x00M\x00A\x00R\x00B  MKTAGACTION(MARB)
+<EXPECT_TAG>\x00M\x00A\x00R\x00C  MKTAGACTION(MARC)
+<EXPECT_TAG>\x00M\x00A\x00R\x00L  MKTAGACTION(MARL)
+<EXPECT_TAG>\x00M\x00A\x00R\x00R  MKTAGACTION(MARR)
+<EXPECT_TAG>\x00M\x00A\x00R\x00S  MKTAGACTION(MARS)
+<EXPECT_TAG>\x00M\x00E\x00D\x00I  MKTAGACTION(MEDI)
+<EXPECT_TAG>\x00N\x00A\x00M\x00E  MKTAGACTION(NAME)
+<EXPECT_TAG>\x00N\x00A\x00T\x00I  MKTAGACTION(NATI)
+<EXPECT_TAG>\x00N\x00A\x00T\x00U  MKTAGACTION(NATU)
+<EXPECT_TAG>\x00N\x00C\x00H\x00I  MKTAGACTION(NCHI)
+<EXPECT_TAG>\x00N\x00I\x00C\x00K  MKTAGACTION(NICK)
+<EXPECT_TAG>\x00N\x00M\x00R   MKTAGACTION(NMR)
+<EXPECT_TAG>\x00N\x00O\x00T\x00E  MKTAGACTION(NOTE)
+<EXPECT_TAG>\x00N\x00P\x00F\x00X  MKTAGACTION(NPFX)
+<EXPECT_TAG>\x00N\x00S\x00F\x00X  MKTAGACTION(NSFX)
+<EXPECT_TAG>\x00O\x00B\x00J\x00E  MKTAGACTION(OBJE)
+<EXPECT_TAG>\x00O\x00C\x00C\x00U  MKTAGACTION(OCCU)
+<EXPECT_TAG>\x00O\x00R\x00D\x00I  MKTAGACTION(ORDI)
+<EXPECT_TAG>\x00O\x00R\x00D\x00N  MKTAGACTION(ORDN)
+<EXPECT_TAG>\x00P\x00A\x00G\x00E  MKTAGACTION(PAGE)
+<EXPECT_TAG>\x00P\x00E\x00D\x00I  MKTAGACTION(PEDI)
+<EXPECT_TAG>\x00P\x00H\x00O\x00N  MKTAGACTION(PHON)
+<EXPECT_TAG>\x00P\x00L\x00A\x00C  MKTAGACTION(PLAC)
+<EXPECT_TAG>\x00P\x00O\x00S\x00T  MKTAGACTION(POST)
+<EXPECT_TAG>\x00P\x00R\x00O\x00B  MKTAGACTION(PROB)
+<EXPECT_TAG>\x00P\x00R\x00O\x00P  MKTAGACTION(PROP)
+<EXPECT_TAG>\x00P\x00U\x00B\x00L  MKTAGACTION(PUBL)
+<EXPECT_TAG>\x00Q\x00U\x00A\x00Y  MKTAGACTION(QUAY)
+<EXPECT_TAG>\x00R\x00E\x00F\x00N  MKTAGACTION(REFN)
+<EXPECT_TAG>\x00R\x00E\x00L\x00A  MKTAGACTION(RELA)
+<EXPECT_TAG>\x00R\x00E\x00L\x00I  MKTAGACTION(RELI)
+<EXPECT_TAG>\x00R\x00E\x00P\x00O  MKTAGACTION(REPO)
+<EXPECT_TAG>\x00R\x00E\x00S\x00I  MKTAGACTION(RESI)
+<EXPECT_TAG>\x00R\x00E\x00S\x00N  MKTAGACTION(RESN)
+<EXPECT_TAG>\x00R\x00E\x00T\x00I  MKTAGACTION(RETI)
+<EXPECT_TAG>\x00R\x00F\x00N   MKTAGACTION(RFN)
+<EXPECT_TAG>\x00R\x00I\x00N   MKTAGACTION(RIN)
+<EXPECT_TAG>\x00R\x00O\x00L\x00E  MKTAGACTION(ROLE)
+<EXPECT_TAG>\x00S\x00E\x00X   MKTAGACTION(SEX)
+<EXPECT_TAG>\x00S\x00L\x00G\x00C  MKTAGACTION(SLGC)
+<EXPECT_TAG>\x00S\x00L\x00G\x00S  MKTAGACTION(SLGS)
+<EXPECT_TAG>\x00S\x00O\x00U\x00R  MKTAGACTION(SOUR)
+<EXPECT_TAG>\x00S\x00P\x00F\x00X  MKTAGACTION(SPFX)
+<EXPECT_TAG>\x00S\x00S\x00N   MKTAGACTION(SSN)
+<EXPECT_TAG>\x00S\x00T\x00A\x00E  MKTAGACTION(STAE)
+<EXPECT_TAG>\x00S\x00T\x00A\x00T  MKTAGACTION(STAT)
+<EXPECT_TAG>\x00S\x00U\x00B\x00M  MKTAGACTION(SUBM)
+<EXPECT_TAG>\x00S\x00U\x00B\x00N  MKTAGACTION(SUBN)
+<EXPECT_TAG>\x00S\x00U\x00R\x00N  MKTAGACTION(SURN)
+<EXPECT_TAG>\x00T\x00E\x00M\x00P  MKTAGACTION(TEMP)
+<EXPECT_TAG>\x00T\x00E\x00X\x00T  MKTAGACTION(TEXT)
+<EXPECT_TAG>\x00T\x00I\x00M\x00E  MKTAGACTION(TIME)
+<EXPECT_TAG>\x00T\x00I\x00T\x00L  MKTAGACTION(TITL)
+<EXPECT_TAG>\x00T\x00R\x00L\x00R  MKTAGACTION(TRLR)
+<EXPECT_TAG>\x00T\x00Y\x00P\x00E  MKTAGACTION(TYPE)
+<EXPECT_TAG>\x00V\x00E\x00R\x00S  MKTAGACTION(VERS)
+<EXPECT_TAG>\x00W\x00I\x00F\x00E  MKTAGACTION(WIFE)
+<EXPECT_TAG>\x00W\x00I\x00L\x00L  MKTAGACTION(WILL)
+     
+<EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
+                            gedcom_error("Tag '%s' too long, max %d chars");
+                            return BADTOKEN;
+                          }
+                          strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
+                         gedcom_lval.string = TO_INTERNAL(string_buf);
+                         BEGIN(NORMAL);
+                         return USERTAG;
+                        }
+
+{delim}      { gedcom_lval.string = TO_INTERNAL(yytext);
+               return DELIM;
+             }
+
+{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
+                  return ANYCHAR;
+                }
+
+{escape}/{non_at}  { gedcom_lval.string = TO_INTERNAL(yytext);
+                     return ESCAPE;
+                   }
+
+{pointer}    { gedcom_lval.string = TO_INTERNAL(yytext);
+               return POINTER;
+             }
+
+   /* Due to the conversion of level numbers into brackets, the
+      terminator is not important, so no token is returned here.
+      Although not strictly according to the GEDCOM spec, we'll ignore
+      whitespace just before the terminator.
+   */
+
+{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
+
+   /* Eventually we have to return 1 closing bracket (for the trailer).
+      We can detect whether we have sent the closing bracket using the
+      level_diff (at eof, first it is 2, then we increment it ourselves) */
+
+<<EOF>> { if (level_diff == 2) {
+           level_diff++;
+            return CLOSE;
+          }
+          else {
+           yyterminate();
+         }
+        } 
+
+.  { gedcom_error("Unexpected character: '%s' (0x%02x)",
+                 yytext, yytext[0]);
+     return BADTOKEN;
+   }
+
+%%
+
+int yywrap()
+{
+  return 1;
+}
+
+#ifdef LEXER_TEST
+
+int main()
+{
+  int tok, res;
+  init_encodings();
+  set_encoding_width(TWO_BYTE_HILO);
+  res = open_conv_to_internal("UNICODE");
+  if (!res) {
+    gedcom_error("Unable to open conversion context: %s",
+                strerror(errno));
+    return 1;
+  }
+  tok = gedcom_hilo_lex();
+  while (tok) {
+    switch(tok) {
+      case BADTOKEN: printf("BADTOKEN "); break;
+      case OPEN: printf("OPEN "); break;
+      case CLOSE: printf("CLOSE "); break;
+      case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
+      case DELIM: printf("DELIM "); break;
+      case ANYCHAR: printf("%s ", gedcom_lval.string); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
+      default: printf("TAG(%s) ", gedcom_lval.string); break;
+    }
+    tok = gedcom_hilo_lex();
+  }
+  printf("\n");
+  close_conv_to_internal();
+  return 0;
+}
+#endif
diff --git a/message.c b/message.c
new file mode 100644 (file)
index 0000000..abde4b9
--- /dev/null
+++ b/message.c
@@ -0,0 +1,45 @@
+/* $Id$ */
+/* $Name$ */
+
+#include "gedcom.h"
+
+int gedcom_message(char* s, ...)
+{
+  int res;
+  va_list ap;
+
+  va_start(ap, s);
+  res = vfprintf(stderr, s, ap);
+  fprintf(stderr, "\n");
+  va_end(ap);
+  
+  return res;
+}
+
+int gedcom_warning(char* s, ...)
+{
+  int res;
+  va_list ap;
+
+  va_start(ap, s);
+  fprintf(stderr, "Warning on line %d: ", line_no);
+  res = vfprintf(stderr, s, ap);
+  fprintf(stderr, "\n");
+  va_end(ap);
+  
+  return res;
+}
+
+int gedcom_error(char* s, ...)
+{
+  int res;
+  va_list ap;
+
+  va_start(ap, s);
+  fprintf(stderr, "Error on line %d: ", line_no);
+  res = vfprintf(stderr, s, ap);
+  fprintf(stderr, "\n");
+  va_end(ap);
+  
+  return res;
+}
diff --git a/multilex.c b/multilex.c
new file mode 100644 (file)
index 0000000..b0097cc
--- /dev/null
@@ -0,0 +1,107 @@
+/* $Id$ */
+/* $Name$ */
+
+#include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
+
+int line_no = 1;
+
+typedef int (*lex_func)(void);
+lex_func lf;
+
+int lexer_init(ENCODING enc, FILE* f)
+{
+  if (enc == ONE_BYTE) {
+    gedcom_1byte_in = f;
+    lf = &gedcom_1byte_lex;
+    set_encoding_width(enc);
+    return open_conv_to_internal("ASCII");
+  }
+  else if (enc == TWO_BYTE_HILO) {
+    gedcom_hilo_in = f;
+    lf = &gedcom_hilo_lex;
+    set_encoding_width(enc);
+    return open_conv_to_internal("UNICODE");
+  }
+  else if (enc == TWO_BYTE_LOHI) {
+    gedcom_lohi_in = f;
+    lf = &gedcom_lohi_lex;
+    set_encoding_width(enc);
+    return open_conv_to_internal("UNICODE");
+  }
+  else {
+    return 0;
+  }
+}
+
+void lexer_close()
+{
+  close_conv_to_internal();
+}
+
+int gedcom_lex()
+{
+  return (*lf)();
+}
+
+int determine_encoding(FILE* f)
+{
+  char first[2];
+
+  fread(first, 1, 2, f);
+  if ((first[0] == '0') && (first[1] == ' ')) {
+    gedcom_message("One-byte encoding");
+    fseek(f, 0, 0);
+    return ONE_BYTE;
+  }
+  else if ((first[0] == '\0') && (first[1] == '0'))
+  {
+    gedcom_message("Two-byte encoding, high-low");
+    fseek(f, 0, 0);
+    return TWO_BYTE_HILO;
+  }
+  else if ((first[0] == '\xFE') && (first[1] == '\xFF'))
+  {
+    gedcom_message("Two-byte encoding, high-low, with BOM");
+    return TWO_BYTE_HILO;
+  }
+  else if ((first[0] == '0') && (first[1] == '\0'))
+  {
+    gedcom_message("Two-byte encoding, low-high");
+    fseek(f, 0, 0);
+    return TWO_BYTE_LOHI;
+  }
+  else if ((first[0] == '\xFF') && (first[1] == '\xFE'))
+  {
+    gedcom_message("Two-byte encoding, low-high, with BOM");
+    return TWO_BYTE_LOHI;
+  }
+  else {
+    gedcom_message("Unknown encoding, falling back to one-byte");
+    fseek(f, 0, 0);
+    return ONE_BYTE;
+  }
+}
+
+int gedcom_parse_file(char* file_name)
+{
+  ENCODING enc;
+  int result = 1;
+  FILE* file = fopen (file_name, "r");
+  if (!file) {
+    gedcom_error("Could not open file '%s'\n", file_name);
+    return 1;
+  }
+
+  init_encodings();
+  enc = determine_encoding(file);
+  
+  if (lexer_init(enc, file)) {
+    result = gedcom_parse();
+  }
+  lexer_close();
+  
+  return result;
+}
+
diff --git a/multilex.h b/multilex.h
new file mode 100644 (file)
index 0000000..c8f81ef
--- /dev/null
@@ -0,0 +1,18 @@
+/* $Id$ */
+/* $Name$ */
+
+#ifndef __MULTILEX_H
+#define __MULTILEX_H
+#include <stdio.h>
+
+int        gedcom_parse_file(char* file_name);
+
+int        gedcom_1byte_lex();
+extern FILE *gedcom_1byte_in;
+
+int        gedcom_hilo_lex();
+extern FILE *gedcom_hilo_in;
+
+int        gedcom_lohi_lex();
+extern FILE *gedcom_lohi_in;
+#endif /* __MULTILEX_H */