2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
25 #include "ANSI_Z39.47-tables.h"
28 /* Omit first half of table: assume identity mapping (ASCII) */
29 static const unsigned short ansi_z39_47_2uni[128] = TABLE_TO_UCS4_BASIC;
31 /* The outer array range runs from 0xe0 to 0xfe, the inner range from 0x20
33 static const unsigned short ansi_z39_47_2uni_comb[31][96] =
35 /* 0xe0 (hook above) */ TABLE_TO_UCS4_COMBINING_E0,
36 /* 0xe1 (grave) */ TABLE_TO_UCS4_COMBINING_E1,
37 /* 0xe2 (acute) */ TABLE_TO_UCS4_COMBINING_E2,
38 /* 0xe3 (circumflex) */ TABLE_TO_UCS4_COMBINING_E3,
39 /* 0xe4 (tilde) */ TABLE_TO_UCS4_COMBINING_E4,
40 /* 0xe5 (macron) */ TABLE_TO_UCS4_COMBINING_E5,
41 /* 0xe6 (breve) */ TABLE_TO_UCS4_COMBINING_E6,
42 /* 0xe7 (dot above) */ TABLE_TO_UCS4_COMBINING_E7,
43 /* 0xe8 (umlaut, diaeresis) */ TABLE_TO_UCS4_COMBINING_E8,
44 /* 0xe9 (caron, hacek) */ TABLE_TO_UCS4_COMBINING_E9,
45 /* 0xea (ring above) */ TABLE_TO_UCS4_COMBINING_EA,
46 /* 0xeb (ligature, left half) */ TABLE_TO_UCS4_COMBINING_EB,
47 /* 0xec (ligature, right half) */ TABLE_TO_UCS4_COMBINING_EC,
48 /* 0xed (comma above right) */ TABLE_TO_UCS4_COMBINING_ED,
49 /* 0xee (double acute) */ TABLE_TO_UCS4_COMBINING_EE,
50 /* 0xef (candrabindu) */ TABLE_TO_UCS4_COMBINING_EF,
51 /* 0xf0 (cedilla) */ TABLE_TO_UCS4_COMBINING_F0,
52 /* 0xf1 (ogonek, right hook) */ TABLE_TO_UCS4_COMBINING_F1,
53 /* 0xf2 (dot below) */ TABLE_TO_UCS4_COMBINING_F2,
54 /* 0xf3 (double dot below) */ TABLE_TO_UCS4_COMBINING_F3,
55 /* 0xf4 (ring below) */ TABLE_TO_UCS4_COMBINING_F4,
56 /* 0xf5 (double low line) */ TABLE_TO_UCS4_COMBINING_F5,
57 /* 0xf6 (line below) */ TABLE_TO_UCS4_COMBINING_F6,
58 /* 0xf7 (comma below, left hook) */ TABLE_TO_UCS4_COMBINING_F7,
59 /* 0xf8 (left half ring below, right cedilla) */ TABLE_TO_UCS4_COMBINING_F8,
60 /* 0xf9 (breve below, half circle below) */ TABLE_TO_UCS4_COMBINING_F9,
61 /* 0xfa (double tilde, left half) */ TABLE_TO_UCS4_COMBINING_FA,
62 /* 0xfb (double tilde, right half) */ TABLE_TO_UCS4_COMBINING_FB,
63 /* 0xfc */ TABLE_TO_UCS4_COMBINING_FC,
64 /* 0xfd */ TABLE_TO_UCS4_COMBINING_FD,
65 /* 0xfe (comma above, high centered comma) */ TABLE_TO_UCS4_COMBINING_FE,
68 #define BASE_PASSED 0x10000
71 ansi_z39_47_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
74 ucs4_t last_wc = conv->istate;
76 if (last_wc & BASE_PASSED) {
77 /* base character was already output, reset the state and output the
79 unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
80 *pwc = ansi_z39_47_2uni[dc-0x80];
85 conv->istate |= BASE_PASSED;
89 if (last_wc && c >= 0x20) {
90 /* Check if we can combine the character with the diacritical mark */
91 unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
92 unsigned short wc = ansi_z39_47_2uni_comb[dc-0xe0][c-0x20];
103 unsigned short wc = ansi_z39_47_2uni[c-0x80];
110 /* The range from 0xe0 to 0xfe are diacritical marks.
111 Note that in ANSEL they come *before* the base characters, in Unicode,
112 they come *after*, so we have to buffer them ... */
113 conv->istate = (state_t)c;
114 return RET_TOOFEW(1);
119 static const unsigned char ansi_z39_47_page080[][2] = TABLE_FROM_UCS4_BASIC;
120 static const unsigned char ansi_z39_47_page01a[][2] = TABLE_FROM_UCS4_PAGE_01A;
121 static const unsigned char ansi_z39_47_page022[][2] = TABLE_FROM_UCS4_PAGE_022;
122 static const unsigned char ansi_z39_47_page02b[][2] = TABLE_FROM_UCS4_PAGE_02B;
123 static const unsigned char ansi_z39_47_page030[][2] = TABLE_FROM_UCS4_PAGE_030;
124 static const unsigned char ansi_z39_47_page1ea[][2] = TABLE_FROM_UCS4_PAGE_1EA;
125 static const unsigned char ansi_z39_47_page200[][2] = TABLE_FROM_UCS4_PAGE_200;
126 static const unsigned char ansi_z39_47_page211[][2] = TABLE_FROM_UCS4_PAGE_211;
127 static const unsigned char ansi_z39_47_page266[][2] = TABLE_FROM_UCS4_PAGE_266;
128 static const unsigned char ansi_z39_47_pagefe2[][2] = TABLE_FROM_UCS4_PAGE_FE2;
131 ansi_z39_47_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
133 const unsigned char* ch = NULL;
136 #define OUTPUT(c) ++output; if (n < output) return RET_TOOSMALL; *(r++) = (c);
138 /* Since in UTF-8 diacritical marks come after the base character and in
139 ANSEL before, we need to buffer possible base characters (0x20 to 0x7f)
140 to put the diacritical mark before it if there is one following */
143 OUTPUT(conv->ostate);
147 conv->ostate = (state_t) wc;
154 else if (wc >= 0x0080 && wc < 0x017f)
155 ch = ansi_z39_47_page080[wc-0x0080];
156 else if (wc >= 0x01a0 && wc < 0x01b4)
157 ch = ansi_z39_47_page01a[wc-0x01a0];
158 else if (wc >= 0x0220 && wc < 0x0234)
159 ch = ansi_z39_47_page022[wc-0x0220];
160 else if (wc >= 0x02b0 && wc < 0x02e2)
161 ch = ansi_z39_47_page02b[wc-0x02b0];
162 else if (wc >= 0x0300 && wc < 0x0337)
163 ch = ansi_z39_47_page030[wc-0x0300];
164 else if (wc >= 0x1ea0 && wc < 0x1efa)
165 ch = ansi_z39_47_page1ea[wc-0x1ea0];
166 else if (wc >= 0x2000 && wc < 0x200f)
167 ch = ansi_z39_47_page200[wc-0x2000];
168 else if (wc >= 0x2110 && wc < 0x211a)
169 ch = ansi_z39_47_page211[wc-0x2110];
170 else if (wc >= 0x2660 && wc < 0x2674)
171 ch = ansi_z39_47_page266[wc-0x2660];
172 else if (wc >= 0xfe20 && wc < 0xfe25)
173 ch = ansi_z39_47_pagefe2[wc-0xfe20];
174 if (ch && ch[0] != 0) {
175 if (ch[1] == 0 && ch[0] >= 0xe0 && ch[0] <= 0xfe) {
176 /* Diacritical mark following a base character, buffered in ostate */
177 /* Output diacritical mark, then base character */
180 OUTPUT(conv->ostate);
188 OUTPUT(conv->ostate);