Put documentation of parser in CVS.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 1 Dec 2001 15:27:21 +0000 (15:27 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 1 Dec 2001 15:27:21 +0000 (15:27 +0000)
doc/images/schema.obj [new file with mode: 0644]
doc/images/schema.png [new file with mode: 0644]
doc/parser.html [new file with mode: 0644]

diff --git a/doc/images/schema.obj b/doc/images/schema.obj
new file mode 100644 (file)
index 0000000..779c19c
--- /dev/null
@@ -0,0 +1,251 @@
+%TGIF 4.1.39
+state(0,37,100.000,0,0,0,8,1,9,1,1,0,0,1,0,1,0,'Courier',0,57600,0,3,0,10,0,0,1,1,0,16,0,0,1,1,1,1,1088,1408,1,0,2880,0).
+%
+% @(#)$Header$
+% %W%
+%
+unit("1 pixel/pixel").
+color_info(11,65535,0,[
+       "magenta", 65535, 0, 65535, 65535, 0, 65535, 1,
+       "red", 65535, 0, 0, 65535, 0, 0, 1,
+       "green", 0, 65535, 0, 0, 65535, 0, 1,
+       "blue", 0, 0, 65535, 0, 0, 65535, 1,
+       "yellow", 65535, 65535, 0, 65535, 65535, 0, 1,
+       "pink", 65535, 49344, 52171, 65535, 49344, 52171, 1,
+       "cyan", 0, 65535, 65535, 0, 65535, 65535, 1,
+       "CadetBlue", 24415, 40606, 41120, 24415, 40606, 41120, 1,
+       "white", 65535, 65535, 65535, 65535, 65535, 65535, 1,
+       "black", 0, 0, 0, 0, 0, 0, 1,
+       "DarkSlateGray", 12079, 20303, 20303, 12079, 20303, 20303, 1
+]).
+script_frac("0.6").
+fg_bg_colors('black','white').
+page(1,"",1,'').
+text('black',16,159,1,0,1,54,11,0,9,2,0,0,0,0,2,54,11,0,0,"",0,0,0,0,168,'',[
+minilines(54,11,0,0,0,0,0,[
+mini_line(54,9,2,0,0,0,[
+str_block(0,54,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,54,9,2,0,-1,0,0,0,0,0,
+       "inputfile")])
+])
+])]).
+box('black','',112,152,176,176,0,1,1,4,0,0,0,0,0,'1',0,[
+]).
+text('black',120,159,1,0,1,48,11,7,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,-1,0,0,0,0,0,
+       "multilex")])
+])
+])]).
+poly('black','',2,[
+       72,168,112,168],1,1,1,11,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+box('black','',224,192,296,216,0,1,1,28,0,0,0,0,0,'1',0,[
+]).
+text('black',232,199,1,0,1,48,11,29,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,208,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0,
+       "lex_lohi")])
+])
+])]).
+poly('black','',2,[
+       176,152,224,128],1,1,1,34,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       176,160,224,160],1,1,1,37,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       176,176,224,208],1,1,1,40,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+box('black','',344,152,408,176,0,1,1,43,0,0,0,0,0,'1',0,[
+]).
+text('black',360,159,1,0,1,36,11,44,9,2,0,0,0,0,2,36,11,0,0,"",0,0,0,0,168,'',[
+minilines(36,11,0,0,0,0,0,[
+mini_line(36,9,2,0,0,0,[
+str_block(0,36,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,36,9,2,0,0,0,0,0,0,0,
+       "parser")])
+])
+])]).
+poly('black','',2,[
+       296,120,344,152],1,1,1,48,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       296,160,344,160],1,1,1,49,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       296,200,344,176],1,1,1,50,0,0,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+box('black','',224,56,296,80,0,1,1,51,0,0,0,0,0,'1',0,[
+]).
+text('black',240,63,1,0,1,48,11,52,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,72,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0,
+       "encoding")])
+])
+])]).
+poly('black','',3,[
+       144,152,144,64,224,64],1,1,1,58,0,0,5,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',3,[
+       376,152,376,64,296,64],1,1,1,59,0,0,5,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       248,80,248,112],3,1,1,60,0,0,8,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       256,80,256,152],3,1,1,61,0,0,8,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       264,80,264,192],3,1,1,62,0,0,8,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+box('black','',224,112,296,136,2,1,1,20,0,0,0,0,0,'1',0,[
+]).
+text('black',232,119,1,0,1,54,11,21,9,2,0,0,0,0,2,54,11,0,0,"",0,0,0,0,128,'',[
+minilines(54,11,0,0,0,0,0,[
+mini_line(54,9,2,0,0,0,[
+str_block(0,54,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,54,9,2,0,-1,0,0,0,0,0,
+       "lex_1byte")])
+])
+])]).
+box('black','',224,152,296,176,2,1,1,26,0,0,0,0,0,'1',0,[
+]).
+text('black',232,159,1,0,1,48,11,27,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,-1,0,0,0,0,0,
+       "lex_hilo")])
+])
+])]).
+text('black',144,55,1,0,1,42,11,75,9,2,2,0,0,0,2,42,11,0,0,"",0,0,0,0,64,'',[
+minilines(42,11,0,0,0,0,0,[
+mini_line(42,9,2,0,0,0,[
+str_block(0,42,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,42,9,2,0,0,0,0,0,0,0,
+       "initial")])
+])
+])]).
+text('black',336,55,1,0,1,36,11,79,9,2,2,0,0,0,2,36,11,0,0,"",0,0,0,0,64,'',[
+minilines(36,11,0,0,0,0,0,[
+mini_line(36,9,2,0,0,0,[
+str_block(0,36,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,36,9,2,0,0,0,0,0,0,0,
+       "adjust")])
+])
+])]).
+box('black','',472,152,536,176,0,1,1,83,0,0,0,0,0,'1',0,[
+]).
+text('black',480,159,1,0,1,48,11,84,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0,
+       "app_data")])
+])
+])]).
+poly('black','',2,[
+       408,160,472,160],1,1,1,86,0,2,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+text('black',424,167,1,0,1,30,11,87,9,2,2,0,0,0,2,30,11,0,0,"",0,0,0,0,176,'',[
+minilines(30,11,0,0,0,0,0,[
+mini_line(30,9,2,0,0,0,[
+str_block(0,30,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,30,9,2,0,-1,0,0,0,0,0,
+       "utf-8")])
+])
+])]).
+box('black','',584,152,648,176,0,1,1,89,0,0,0,0,0,'1',0,[
+]).
+text('black',592,159,1,0,1,42,11,90,9,2,0,0,0,0,2,42,11,0,0,"",0,0,0,0,168,'',[
+minilines(42,11,0,0,0,0,0,[
+mini_line(42,9,2,0,0,0,[
+str_block(0,42,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,42,9,2,0,0,0,0,0,0,0,
+       "app_gui")])
+])
+])]).
+box('black','',472,80,536,104,0,1,1,94,0,0,0,0,0,'1',0,[
+]).
+text('black',480,87,1,0,1,48,11,95,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,96,'',[
+minilines(48,11,0,0,0,0,0,[
+mini_line(48,9,2,0,0,0,[
+str_block(0,48,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0,
+       "encoding")])
+])
+])]).
+text('black',472,23,1,0,1,78,11,117,9,2,2,0,0,0,2,78,11,0,0,"",0,0,0,0,32,'',[
+minilines(78,11,0,0,0,0,0,[
+mini_line(78,9,2,0,0,0,[
+str_block(0,78,9,2,0,-3,0,0,0,[
+str_seg('black','Courier',0,57600,78,9,2,0,-3,0,0,0,0,0,
+       "LANG env.var.")])
+])
+])]).
+poly('black','',2,[
+       504,40,504,80],1,1,1,119,0,2,5,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       504,104,504,152],3,1,1,127,0,0,8,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+poly('black','',2,[
+       536,160,584,160],1,1,1,129,0,2,0,0,0,0,0,'1',0,0,
+    "0","",[
+    0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[
+]).
+text('black',544,167,1,0,1,36,11,130,9,2,2,0,0,0,2,36,11,0,0,"",0,0,0,0,176,'',[
+minilines(36,11,0,0,0,0,0,[
+mini_line(36,9,2,0,0,0,[
+str_block(0,36,9,2,0,-1,0,0,0,[
+str_seg('black','Courier',0,57600,36,9,2,0,-1,0,0,0,0,0,
+       "locale")])
+])
+])]).
+box('black','',88,40,416,224,0,1,1,133,3,0,0,0,0,'1',0,[
+]).
+text('black',328,23,1,0,1,78,11,141,9,2,0,0,0,0,2,78,11,0,0,"",0,0,0,0,32,'',[
+minilines(78,11,0,0,0,0,0,[
+mini_line(78,9,2,0,0,0,[
+str_block(0,78,9,2,0,0,0,0,0,[
+str_seg('black','Courier',0,57600,78,9,2,0,0,0,0,0,0,0,
+       "gedcom parser")])
+])
+])]).
diff --git a/doc/images/schema.png b/doc/images/schema.png
new file mode 100644 (file)
index 0000000..cff1475
Binary files /dev/null and b/doc/images/schema.png differ
diff --git a/doc/parser.html b/doc/parser.html
new file mode 100644 (file)
index 0000000..cc1d718
--- /dev/null
@@ -0,0 +1,129 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+           
+  <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+  <title>Gedcom parser in Genes</title>
+</head>
+  <body>
+   
+<div align="Center">  
+<h1>Gedcom parser in Genes</h1>
+   
+<div align="Left">The intention of this page is to provide some explanation 
+ of the gedcom parser, to aid development on and with it. &nbsp;Currently, 
+ the parser is in a state that it works, but some parts are still missing, 
+ notably the interface towards applications. &nbsp;First, some practical issues
+ of testing with the parser will be explained.<br>
+  <br>
+   
+<h2>Basic testing<br>
+  </h2>
+   The parser is located in the "gedcom" subdirectory of the Genes source 
+code.  &nbsp;You should be able to perform a basic test using the commands:<br>
+   
+<blockquote><code>make clean<br>
+  make<br>
+   make test</code><br>
+    </blockquote>
+   If everything goes OK, you'll see that some gedcom files are parsed, and 
+ that each parse is successful. &nbsp;Note that the used gedcom files are 
+made by <a href="http://heiner-eichmann.de/gedcom/gedcom.htm">Heiner Eichmann</a>
+    and are an excellent way to test gedcom parsers thoroughly.<br>
+    <br>
+       
+  <h2>Preparing for further testing</h2>
+   The basic testing described above doesn't show anything else than "Parse 
+ succeeded", which is nice, but not very interesting. &nbsp;Some more detailed 
+ tests are possible, via the <code>gedcom-parse</code> program that is generated 
+ by <code>make test</code>. &nbsp;<br>
+    <br>
+   However, since the output that <code>gedcom-parse</code> generates is
+in  UTF-8 format (more on this later), some preparation is necessary to have 
+a full view on it. &nbsp;Basically, you need a terminal that understands and
+can display UTF-8 encoded characters, and you need to proper fonts installed 
+ to display them. &nbsp;I'll give some advice on this here, based on the Red
+ Hat 7.1 distribution that I use, with glibc 2.2 and XFree86 4.0.x. &nbsp;Any 
+ other distribution that has the same or newer versions for these components 
+ should give the same results.<br>
+    <br>
+   For the first issue, the UTF-8 capable terminal, the safest bet is to
+use    <code>xterm</code> in its unicode mode (which is supported by the
+  <code>   xterm</code> coming with XFree86 4.0.x). &nbsp;UTF-8 capabilities
+have only  recently been added to <code>gnome-terminal</code>, so probably
+that is not  in your distribution yet (it certainly isn't in Red Hat 7.1).<br>
+    <br>
+   For the second issue, you'll need the ISO 10646-1 fonts. &nbsp;These come 
+ also with XFree86 4.0.x.<br>
+    <br>
+   The way to start <code>xterm</code> in unicode mode is then e.g. (put
+everything  on 1 line !):<br>
+       
+  <blockquote><code>LANG=en_GB.UTF-8 xterm -bg 'black' -fg 'DarkGrey' -cm 
+ -fn '-Misc-Fixed-Medium-R-SemiCondensed--13-120-75-75-C-60-ISO10646-1'</code><br>
+      </blockquote>
+        This first sets the <code>LANG</code> variable to a locale that uses
+ UTF-8, and then starts <code>xterm</code> with a proper Unicode font. &nbsp;Some
+ sample UTF-8 plain text files can be found <a href="http://www.cl.cam.ac.uk/%7Emgk25/ucs/examples">
+  here</a>
+  . &nbsp;Just <code>cat</code> them on the command line and see the result.<br>
+      <br>
+           
+    <h2>Testing the parser with debugging</h2>
+  Given the UTF-8 capable terminal, you can now let the <code>gedcom-parse</code>
+   program print the values that it parses. &nbsp;An example of a command 
+line is (in the <code>gedcom</code> directory):<br>
+           
+    <blockquote><code>./gedcom_parse -dg t/ulhc.ged</code><br>
+        </blockquote>
+  The <code>-dg</code> option instructs the parser to show its own debug
+messages  &nbsp;(see <code>./gedcom_parse -h</code> for the full set of options).
+&nbsp;If  everything is OK, you'll see the values from the gedcom file, containing
+a lot of special characters.<br>
+        <br>
+  For the ANSEL test file (<code>t/ansel.ged</code>), you have to set the 
+environment variable <code>GCONV_PATH</code> to the <code>ansel</code> subdirectory 
+of the gedcom directory:<br>
+               
+      <blockquote><code>export GCONV_PATH=./ansel<br>
+  ./gedcom_parse -dg t/ansel.ged<br>
+          </code></blockquote>
+  This is because for the ANSEL character set an extra module is needed for
+ the iconv library (more on this later). &nbsp;But again, this should show
+ a lot of special characters.<br>
+          <br>
+                   
+        <h2>Testing the lexers separately</h2>
+  The lexers themselves can be tested separately. &nbsp;For the 1-byte lexer
+ (i.e. supporting the encodings with 1 byte per characters, such as ASCII,
+ ANSI and ANSEL), the sequence of commands would be:<br>
+                   
+        <blockquote><code>make clean<br>
+  make test_1byte<br>
+  cat t/allged.ged | ./test_1byte</code><br>
+            </blockquote>
+  This will show all tokens in the <code>t/allged.ged</code> test file. &nbsp;With
+ the lexers you have to make sure that you use the proper lexer for each
+test  file. &nbsp;The <code>test_1byte</code> test program is OK for <code>
+allged.ged</code>   and <code>ansel.ged</code> (the last one again with the
+environment variable  set); for the <code>uhl*.ged</code> files you need
+the <code>test_hilo</code>   test program; for the <code>ulh*.ged</code>
+files you need the <code>test_lohi</code>   program.<br>
+            <br>
+  This concludes the testing setup. &nbsp;Now for some explanations...<br>
+            <br>
+                       
+          <h2>Structure of the parser</h2>
+  I see the structure of a program using the gedcom parser as follows:<br>
+            <br>
+            <img src="images/schema.png" alt="Gedcom parsing scheme">
+            <br>
+            <br>
+            <br>
+  TO BE COMPLETED...<br>
+          <br>
+            </div>
+            </div>
+                       
+          </body>
+          </html>