1 /* Utility functions for UTF-8
2 Copyright (C) 2001, 2002 Peter Verthez
4 The UTF8 tools library is free software; you can redistribute it
5 and/or modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The Gedcom parser library is distributed in the hope that it will be
10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the Gedcom parser library; if not, write to the
16 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 #include "utf8tools.h"
26 int is_utf8_string(const char* str)
33 if ((*str & 0x80) == 0) {
34 /* Looks like an ASCII character */
36 /* byte of UTF-8 character expected */
39 /* OK, ASCII character expected */
44 /* Looks like byte of an UTF-8 character */
46 /* expect_bytes already set: first byte of UTF-8 char already seen */
47 if ((*str & 0xC0) == 0x80) {
48 /* OK, next byte of UTF-8 character */
49 /* Decrement number of expected bytes */
54 /* again first byte ?!?! */
59 /* First byte of the UTF-8 character */
60 /* count initial one bits and set expect_bytes to 1 less */
64 ch = (ch & 0x7f) << 1;
72 return (expect_bytes == 0);
75 int utf8_strlen(const char* str)
82 if ((*str & 0xC0) != 0xC0) num_char++;
89 char* next_utf8_char(char* str)
91 if (!str) return NULL;
95 while (*str && (*str & 0xC0) == 0x80)
101 char* nth_utf8_char(char* str, int n)
104 if (!str) return NULL;
109 if ((*str & 0xC0) != 0x80) num_char++;
110 if (num_char == n) break;