/*
* Copyright (c) 2003-2008 Hypertriton, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Dynamically-allocated text buffer designed to perform automatic character
* set conversion when appending text of a different encoding.
*/
#include "cgi.h"
#include
#include
#include
#include
#define TEXT_BUFFER_GROW 1024
static size_t utf8_to_latin1(TEXT *, const char *);
static const struct {
char *name;
size_t (*conv_fn)(TEXT *, const char *);
} convs[] = {
{ "ISO-8859-1", utf8_to_latin1 }
};
static const int nconvs = sizeof(convs) / sizeof(convs[0]);
/* Initialize a new text buffer. */
void
TEXT_Init(TEXT *te, size_t len, const char *encoding)
{
Strlcpy(te->encoding, encoding, sizeof(te->encoding));
te->buf = Malloc(len);
te->buf_len = len;
te->len = 0;
}
/* Release a text buffer. */
void
TEXT_Destroy(TEXT *te)
{
free(te->buf);
}
/* Append a NUL-terminated string to a text buffer without converting. */
size_t
TEXT_CatS(TEXT *te, const char *s)
{
size_t len;
len = strlen(s);
te->len += len;
if (te->len > te->buf_len) {
te->buf_len = te->len + TEXT_BUFFER_GROW;
te->buf = Realloc(te->buf, te->buf_len);
}
memcpy(&te->buf[te->len-len], s, len);
return (len);
}
/* Append a formatted string to a text buffer. */
size_t
TEXT_Cat(TEXT *te, const char *fmt, ...)
{
size_t rv;
va_list ap;
char *s;
va_start(ap, fmt);
vasprintf(&s, fmt, ap);
va_end(ap);
rv = TEXT_CatS(te, s);
free(s);
return (rv);
}
/* Built-in UTF-8 to LATIN1 conversion. */
static size_t
utf8_to_latin1(TEXT *te, const char *s)
{
const u_char *sp;
size_t len = strlen(s);
u_int32_t c;
if (te->len+len > te->buf_len) {
te->buf_len = len + TEXT_BUFFER_GROW;
te->buf = Realloc(te->buf, te->buf_len);
}
for (sp = (const u_char *)&s[0]; *sp != '\0'; ) {
int ntrail = 0;
if (*sp < 0x80) {
c = *(sp++);
} else if (*sp < 0xc0) {
c = '!';
sp++;
} else if (*sp < 0xe0) {
c = *(sp++) & 0x1f;
ntrail = 1;
} else if (*sp < 0xf0) {
c = *(sp++) & 0x0f;
ntrail = 2;
} else if (*sp < 0xf8) {
c = *(sp++) & 0x07;
ntrail = 3;
} else {
c = '?';
sp++;
}
for (; ntrail > 0; ntrail--) {
if ((*sp & 0xc0) != 0x80) {
c = 'X';
sp++;
break;
}
c <<= 6;
c |= *(sp++) & 0x3f;
}
if (c <= 0xff) {
te->buf[te->len++] = c;
} else {
te->buf[te->len++] = '?';
}
}
return (len);
}
/* Append an UTF-8 string to a text buffer, converting as necessary. */
size_t
TEXT_CatS_UTF8(TEXT *te, const char *s)
{
int i;
for (i = 0; i < nconvs; i++) {
if (strcasecmp(convs[i].name, te->encoding) == 0)
return (convs[i].conv_fn(te, s));
}
return TEXT_CatS(te, s);
}
/* Append a single character to a text buffer without converting. */
size_t
TEXT_CatC(TEXT *te, char c)
{
if (++te->len > te->buf_len) {
te->buf_len = te->len + TEXT_BUFFER_GROW;
te->buf = Realloc(te->buf, te->buf_len);
}
te->buf[te->len-1] = c;
return (1);
}