/*
  Copyright (c) 2007 MySQL AB, 2010 Sun Microsystems, Inc.
  Use is subject to license terms.

  The MySQL Connector/ODBC is licensed under the terms of the GPLv2
  <http://www.gnu.org/licenses/old-licenses/gpl-2.0.html>, like most
  MySQL Connectors. There are special exceptions to the terms and
  conditions of the GPLv2 as it is applied to this software, see the
  FLOSS License Exception
  <http://www.mysql.com/about/legal/licensing/foss-exception.html>.
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; version 2 of the License.
  
  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  for more details.
  
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
*/

/**
  @file  unicode_transcode.c
  @brief Unicode transcoding functions. Raw conversions.
*/

#ifndef ODBCTAP
# include "stringutil.h"
#else
typedef unsigned int UTF32;
typedef unsigned short UTF16;
typedef unsigned char UTF8;
#endif

/**
  Convert UTF-16 code unit(s) to a UTF-32 character. For characters in the
  Basic Multilingual Plane, one UTF-16 code unit maps to one UTF-32 character,
  but characters in other planes may require two UTF-16 code units.

  @param[in] i  Pointer to UTF-16 code units
  @param[in] u  Pointer to UTF-32 character

  @return Number of UTF-16 code units consumed.
*/
#if 0
int utf16toutf32(UTF16 *i, UTF32 *u)
{
  if (*i >= 0xd800 && *i <= 0xdbff)
  {
    *u= 0x10000 | ((*i++ & 0x3ff) << 10);
    if (*i < 0xdc00 || *i > 0xdfff) /* invalid */
      return 0;
    *u|= *i & 0x3ff;
    return 2;
  }
  else
  {
    *u= *i;
    return 1;
  }
}
#else
int utf16toutf32(UTF16 *i, UTF32 *u)
{
  if (*i >= 0xd800 && *i <= 0xdbff)
  {
    *u= 0x10000 + ((*i++ - 0xd800) << 10);
    if (*i < 0xdc00 || *i > 0xdfff) /* invalid */
      return 0;
    *u+= *i - 0xdc00;
    return 2;
  }
  else
  {
    *u= *i;
    return 1;
  }
}
#endif

/**
  Convert UTF-32 character to UTF-16 code unit(s).

  @param[in] i  UTF-32 character
  @param[in] u  Pointer to UTF-16 code units

  @return Number of UTF-16 code units produced.
*/
#if 0
int utf32toutf16(UTF32 i, UTF16 *u)
{
  if (i < 0xffff)
  {
    *u= (UTF16)(i & 0xffff);
    return 1;
  }
  else if(i < 0x10ffff)
  {
    i-= 0x10000;
    *u++= 0xd800 | (i >> 10);
    *u= 0xdc00 | (i & 0x3ff);
    return 2;
  }
  return 0;
}
#else
int utf32toutf16(UTF32 i, UTF16 *u)
{
  if (i <= 0xffff)
  {
    *u= (UTF16)(i & 0xffff);
    return 1;
  }
  else if(i <= 0x10ffff)
  {
    i-= 0x10000;
    *u++= 0xd800 + (i >> 10);
    *u= 0xdc00 + (i & 0x3ff);
    return 2;
  }
  return 0;
}
#endif
/**
  Convert UTF-8 octets to a UTF-32 character. It may take up to four
  UTF-8 octets to encode one UTF-32 character.

  @param[in] i  Pointer to UTF-8 octets
  @param[in] u  Pointer to UTF-32 character

  @return Number of UTF-8 octets consumed, or 0 if an invalid character was
  encountered.
*/
int utf8toutf32(UTF8 *i, UTF32 *u)
{
  int len, x;

  if (*i < 0x80)
  {
    *u= *i;
    return 1;
  }
  else if (*i < 0xe0)
  {
    len= 2;
    *u= *i & 0x1f;
  }
  else if (*i < 0xf0)
  {
    len= 3;
    *u= *i & 0x0f;
  }
  else
  {
    len= 4;
    *u= *i & 0x07;
  }

  x= len;
  while (--x)
  {
    *u<<= 6;
    *u|= *++i & 0x3f;
    if (*i >> 6 != 2) /* invalid */
      return 0;
  }

  return len;
}


/**
  Convert a UTF-32 character into UTF-8 octets. It may take four UTF-8
  octets to encode one UTF-32 character.

  @param[in] i  UTF-32 characer
  @param[in] u  Pointer to UTF-8 octets

  @return Number of UTF-8 octets produced.
*/
int utf32toutf8(UTF32 i, UTF8 *c)
{
  int len= 0, x;

  if (i < 0x80)
  {
    *c= (UTF8)(i & 0x7f);
    return 1;
  }
  else if (i < 0x800)
  {
    *c++= (3 << 6) | (i >> 6);
    len= 2;
  }
  else if (i < 0x10000)
  {
    *c++= (7 << 5) | (i >> 12);
    len= 3;
  }
  else if (i <= 0x10ffff)
  {
    *c++= (0xf << 4) | (i >> 18);
    len= 4;
  }

  x= len;
  if (x)
    while (--x)
    {
      *c++= (1 << 7) | ((i >> (6 * (x - 1))) & 0x3f);
    }

  return len;
}


#ifdef UCTEST

#include <assert.h>
#include <string.h>
#include <stdio.h>

typedef struct {
  UTF8 u8[4];
  UTF32 u32;
  int cnt;
} t_8_32;

typedef struct {
  UTF16 u16[2];
  UTF32 u32;
  int cnt;
} t_16_32;

void t1()
{
  int i, j;
  t_8_32 t1[]= {
      {{0, 0, 0, 0}, 0, 1},
      {{0x3c, 0, 0, 0}, 0x3c, 1},
      {{0xc3, 0xbe, 0, 0}, 0xfe, 2},
      {{0xe0, 0xa4, 0x96, 0}, 0x916, 3},
      {{0xf0, 0x90, 0x85, 0xad}, 0x1016d, 4},
      {{0xf0, 0xa1, 0xa1, 0xa3}, 0x21863, 4},
      {{0xf0, 0xaa, 0x9b, 0x96}, 0x2a6d6, 4},
      {{0xf4, 0x8f, 0xbf, 0xbf}, 0x10ffff, 4}
  };
  printf("***** T1 -> utf32<->utf8 *****\n");
  for (i= 0; i < sizeof(t1) / sizeof(t_8_32); ++i)
  {
    int cnt;
    t_8_32 t= t1[i];
    UTF8 res[4];
    UTF32 resu;
    memset(res, 0, 4);
    printf("Convert %x\n", t.u32);
    cnt= utf32toutf8(t.u32, res);
    assert(cnt == t.cnt);
    for (j= 0; j < 4; ++j)
    {
      printf("Res[%d] = 0x%x (expect 0x%x)\n", j, res[j], t.u8[j]);
      assert(res[j] == t.u8[j]);
    }
    printf("Ok. Now back\n");
    cnt= utf8toutf32(t.u8, &resu);
    printf("ResU = %x\n", resu);
    assert(cnt == t.cnt);
    assert(resu == t.u32);
  }
}

void t2()
{
  int i, j;
  t_16_32 t1[]= {
      {{0, 0}, 0, 1},
      {{0x7a, 0}, 0x7a, 1},
      {{0x6c34, 0}, 0x6c34, 1},
      {{0xd800, 0xdc00}, 0x10000, 2},
      {{0xd834, 0xdd1e}, 0x1d11e, 2},
      {{0xd840, 0xdc06}, 0x20006, 2},
      {{0xd846, 0xde34}, 0x21a34, 2},
      {{0xd869, 0xded6}, 0x2a6d6, 2},
      {{0xdbff, 0xdfff}, 0x10ffff, 2}
  };
  printf("***** T2 -> utf32<->utf16 *****\n");
  for (i= 0; i < sizeof(t1) / sizeof(t_16_32); ++i)
  {
    int cnt;
    t_16_32 t= t1[i];
    UTF16 res[2];
    UTF32 resu;
    memset(res, 0, 2 * 2);
    printf("Convert %x\n", t.u32);
    cnt= utf32toutf16(t.u32, res);
    assert(cnt == t.cnt);
    for (j = 0; j < 2; ++j)
    {
      printf("Res[%d] = 0x%x (expect 0x%x)\n", j, res[j], t.u16[j]);
      assert(res[j] == t.u16[j]);
    }
    printf("Ok. Now back\n");
    cnt= utf16toutf32(t.u16, &resu);
    printf("ResU = %x\n", resu);
    assert(cnt == t.cnt);
    assert(resu == t.u32);
  }
}

int main(int argc, char **argv)
{
  t1();
  t2();
  return 0;
}
#endif /* UCTEST */