libtranscript
/home/gertjan/projects/transcript/src/utf_endian.h
00001 /* Copyright (C) 2011-2012 G.P. Halkes
00002    This program is free software: you can redistribute it and/or modify
00003    it under the terms of the GNU General Public License version 3, as
00004    published by the Free Software Foundation.
00005 
00006    This program is distributed in the hope that it will be useful,
00007    but WITHOUT ANY WARRANTY; without even the implied warranty of
00008    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00009    GNU General Public License for more details.
00010 
00011    You should have received a copy of the GNU General Public License
00012    along with this program.  If not, see <http://www.gnu.org/licenses/>.
00013 */
00014 #ifdef UTF_ENDIAN_H_VERSION
00015 #define __ALT(x, y) x ## y
00016 #define _ALT(x, y) __ALT(x, y)
00017 #define ALT(x) _ALT(x, UTF_ENDIAN_H_VERSION)
00018 
00020 static transcript_error_t ALT(put_utf16)(uint_fast32_t codepoint, char **outbuf, const char const *outbuflimit) {
00021         CHECK_CODEPOINT_RANGE();
00022         if (codepoint < UINT32_C(0xffff)) {
00023                 CHECK_OUTBYTESLEFT(2);
00024                 ALT(put16)(codepoint, *(unsigned char **) outbuf);
00025                 *outbuf += 2;
00026         } else {
00027                 CHECK_OUTBYTESLEFT(4);
00028                 codepoint -= UINT32_C(0x10000);
00029                 ALT(put16)(UINT32_C(0xd800) + (codepoint >> 10), *(unsigned char **) outbuf);
00030                 ALT(put16)(UINT32_C(0xdc00) + (codepoint & 0x3ff), (*(unsigned char **) outbuf) + 2);
00031                 *outbuf += 4;
00032         }
00033         return TRANSCRIPT_SUCCESS;
00034 }
00035 
00037 static transcript_error_t ALT(put_utf32)(uint_fast32_t codepoint, char **outbuf, const char const *outbuflimit) {
00038         CHECK_CODEPOINT_RANGE();
00039 
00040         CHECK_OUTBYTESLEFT(4);
00041         ALT(put32)(codepoint, *(unsigned char **) outbuf);
00042         *outbuf += 4;
00043         return TRANSCRIPT_SUCCESS;
00044 }
00045 
00047 static uint_fast32_t ALT(get_utf16)(const char **inbuf, const char const *inbuflimit, bool_t skip) {
00048         uint_fast32_t codepoint, masked_codepoint;
00049 
00050         if ((*inbuf) + 2 > inbuflimit)
00051                 return TRANSCRIPT_UTF_INCOMPLETE;
00052 
00053         codepoint = ALT(get16)(*(const unsigned char **) inbuf);
00054         masked_codepoint = codepoint & UINT32_C(0xfc00);
00055 
00056         if (masked_codepoint == UINT32_C(0xd800)) {
00057                 uint_fast32_t next_codepoint;
00058                 /* Codepoint is high surrogate. */
00059                 if ((*inbuf) + 4 > inbuflimit)
00060                         return TRANSCRIPT_UTF_INCOMPLETE;
00061 
00062                 next_codepoint = ALT(get16)((*(const unsigned char **) inbuf) + 2);
00063                 if ((next_codepoint & UINT32_C(0xfc00)) != UINT32_C(0xdc00)) {
00064                         /* Next codepoint is not a low surrogate. */
00065                         if (!skip)
00066                                 return TRANSCRIPT_UTF_ILLEGAL;
00067 
00068                         /* Only skip the high surrogate. */
00069                         *inbuf += 2;
00070                         return codepoint;
00071                 }
00072                 codepoint -= UINT32_C(0xd800);
00073                 codepoint <<= 10;
00074                 codepoint += next_codepoint - UINT32_C(0xdc00);
00075                 codepoint += UINT32_C(0x10000);
00076 
00077                 if (!skip)
00078                         CHECK_CODEPOINT_ILLEGAL();
00079                 *inbuf += 4;
00080                 return codepoint;
00081         }
00082 
00083         if (!skip) {
00084                 if (masked_codepoint == UINT32_C(0xdc00)) {
00085                         /* Codepoint is a low surrogate. */
00086                         return TRANSCRIPT_UTF_ILLEGAL;
00087                 }
00088                 CHECK_CODEPOINT_ILLEGAL();
00089         }
00090 
00091         *inbuf += 2;
00092         return codepoint;
00093 }
00094 
00096 static uint_fast32_t ALT(get_utf32)(const char **inbuf, const char const *inbuflimit, bool_t skip) {
00097         uint32_t codepoint;
00098 
00099         if ((*inbuf) + 4 > inbuflimit)
00100                 return TRANSCRIPT_UTF_INCOMPLETE;
00101 
00102         memcpy(&codepoint, *inbuf, 4);
00103         codepoint = ALT(get32)(*(const unsigned char **) inbuf);
00104         if (!skip) {
00105                 CHECK_CODEPOINT_ILLEGAL();
00106                 CHECK_CODEPOINT_SURROGATES();
00107         }
00108 
00109         *inbuf += 4;
00110         return codepoint;
00111 }
00112 
00113 #undef ALT
00114 #undef _ALT
00115 #undef __ALT
00116 #endif
 All Data Structures Variables