=== modified file 'mysys/base64.c' --- mysys/base64.c 2009-03-20 14:27:53 +0000 +++ mysys/base64.c 2010-10-05 10:00:52 +0000 @@ -20,9 +20,43 @@ #ifndef MAIN -static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +static char base64_encode_table[]= + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; +/* + Generated by the following script: +#!/usr/bin/perl -w +my $enc = ''; +$enc .= $_ for 'A'..'Z', 'a'..'z', '0'..'9', '+', '/'; +print "static char base64_decode_table[256]=\n{\n"; +for my $i (0..255) { + print " " if $i % 16 == 0; + my $str = chr($i); + printf ' %2d,', ($enc =~ /\Q$str\E/) ? length($`) : $str eq '=' ? -2 : -1; + print "\n" if $i % 16 == 15; +} +print "};\n"; +*/ +static char base64_decode_table[256]= +{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; int @@ -41,7 +75,14 @@ base64_needed_encoded_length(int length_ int base64_needed_decoded_length(int length_of_encoded_data) { - return (int) ceil(length_of_encoded_data * 3 / 4); + /* + Each 4 bytes encoded gives 3 bytes decoded. A correctly encoded + string has length that is a multiple of 4, plus extra whitespace, + so we can use truncated division. An incorrectly encoded string + may have a length that is not a multiple of 4, but in that case, + the decoder will discard the extra bytes. + */ + return (length_of_encoded_data * 3) / 4; } @@ -81,18 +122,18 @@ base64_encode(const void *src, size_t sr c += s[i]; i++; - *dst++= base64_table[(c >> 18) & 0x3f]; - *dst++= base64_table[(c >> 12) & 0x3f]; + *dst++= base64_encode_table[(c >> 18) & 0x3f]; + *dst++= base64_encode_table[(c >> 12) & 0x3f]; if (i > (src_len + 1)) *dst++= '='; else - *dst++= base64_table[(c >> 6) & 0x3f]; + *dst++= base64_encode_table[(c >> 6) & 0x3f]; if (i > src_len) *dst++= '='; else - *dst++= base64_table[(c >> 0) & 0x3f]; + *dst++= base64_encode_table[(c >> 0) & 0x3f]; } *dst= '\0'; @@ -100,130 +141,151 @@ base64_encode(const void *src, size_t sr } -static inline uint -pos(unsigned char c) -{ - return (uint) (strchr(base64_table, c) - base64_table); -} - - -#define SKIP_SPACE(src, i, size) \ -{ \ - while (i < size && my_isspace(&my_charset_latin1, * src)) \ - { \ - i++; \ - src++; \ - } \ - if (i == size) \ - { \ - break; \ - } \ -} - - -/* +/** Decode a base64 string - SYNOPSIS - base64_decode() - src Pointer to base64-encoded string - len Length of string at 'src' - dst Pointer to location where decoded data will be stored - end_ptr Pointer to variable that will refer to the character - after the end of the encoded data that were decoded. Can - be NULL. - - DESCRIPTION - - The base64-encoded data in the range ['src','*end_ptr') will be - decoded and stored starting at 'dst'. The decoding will stop - after 'len' characters have been read from 'src', or when padding - occurs in the base64-encoded data. In either case: if 'end_ptr' is - non-null, '*end_ptr' will be set to point to the character after - the last read character, even in the presence of error. + base64-encoded data starting at 'src' will be decoded and stored + starting at 'dst'. The decoding will stop after 'len' characters + have been read from 'src', or when padding occurs in the + base64-encoded data. In either case: if 'end_ptr' is non-null, + '*end_ptr' will be set to point to the character after the last read + character, even in the presence of error. + + @param src Pointer to base64-encoded string + @param len Length of string at 'src' + @param dst Pointer to location where decoded data will be stored + @param end_ptr Pointer to variable that will refer to the character + after the end of the encoded data that were decoded. Can + be NULL. - NOTE - We require that 'dst' is pre-allocated to correct size. + @note We require that 'dst' is pre-allocated to correct size. - SEE ALSO - base64_needed_decoded_length(). + @see base64_needed_decoded_length(). - RETURN VALUE - Number of bytes written at 'dst' or -1 in case of failure + @return Number of bytes written at 'dst' on success, or + -1 - (number of bytes written) on failure. */ int -base64_decode(const char *src_base, size_t len, +base64_decode(const char *src, size_t len, void *dst, const char **end_ptr) { - char b[3]; - size_t i= 0; - char *dst_base= (char *)dst; - char const *src= src_base; - char *d= dst_base; - size_t j; + char *d= (char *)dst; + char const *s= src; + char const *src_end= src + len; + int have_error= 0; + int three_chars; - while (i < len) - { - unsigned c= 0; - size_t mark= 0; - - SKIP_SPACE(src, i, len); - - c += pos(*src++); - c <<= 6; - i++; + /* + Algorithm: Read four bytes at a time from the input and write + three bytes to output. If there are any errors, all three bytes + are skipped and not written to the output. Possible errors are: + - invalid character, i.e. [^A-Za-z0-9+/] + - input ends in the middle of the group of four characters + - '=' characters, except in the patterns 'XX==' or 'XXX='. + */ - SKIP_SPACE(src, i, len); + /* + Read one character from 's' and decode it into 'c'. - c += pos(*src++); - c <<= 6; - i++; + Whitespace before the character is skipped, and s is increased so + that it points after the last character read. - SKIP_SPACE(src, i, len); + If a valid base64 character is found, the low 6 bits of 'c' are + set to the decoded value of the character. If an equality sign is + found, 'c' is set to -2. If an invalid character is found, 'c' is + set to -1. If end-of-string is found before any non-whitespace + character is found, does 'goto END_LABEL'. + */ +#define DECODE_CHAR(END_LABEL) \ + { \ + while (s < src_end && my_isspace(&my_charset_latin1, *s)) \ + s++; \ + if (s == src_end) \ + goto END_LABEL; \ + c= base64_decode_table[(int)*s]; \ + s++; \ + } - if (*src != '=') - c += pos(*src++); - else - { - src += 2; /* There should be two bytes padding */ - i= len; - mark= 2; - c <<= 6; - goto end; - } - c <<= 6; - i++; + /* + Accumulate the character decoded by DECODE_CHAR in three_chars. + Whitespace before the character is skipped. - SKIP_SPACE(src, i, len); + If the character is invalid, does goto error. The character is + '=', does goto EQ_LABEL. + */ +#define HANDLE_CHAR(EQ_LABEL) \ + { \ + if (unlikely(c < 0)) \ + { \ + /* invalid char: return failure */ \ + if (c == -1) \ + goto error; \ + /* '=' char: what we do depends on which character we decode */ \ + if (c == -2) \ + goto EQ_LABEL; \ + } \ + three_chars= (three_chars << 6) | c; \ + } - if (*src != '=') - c += pos(*src++); - else - { - src += 1; /* There should be one byte padding */ - i= len; - mark= 1; - goto end; - } - i++; + while (1) + { + int c; + three_chars= 0; - end: - b[0]= (c >> 16) & 0xff; - b[1]= (c >> 8) & 0xff; - b[2]= (c >> 0) & 0xff; + // No char: all is fine, decoded string length divisible by 3. goto end. + DECODE_CHAR(end); + // Equality char: malformed input, goto error. + HANDLE_CHAR(error); + + // No char: truncated input, goto error. + DECODE_CHAR(error); + // Equality char: malformed input, goto error. + HANDLE_CHAR(error); + + // No char: truncated input, goto error. + DECODE_CHAR(error); + // Equality char: decoded string length % 3 == 1. goto write_one_byte + HANDLE_CHAR(write_one_byte); + + // No char: truncated input, goto error. + DECODE_CHAR(error); + // Equality char: decoded string length % 3 == 2. goto write_two_bytes + HANDLE_CHAR(write_two_bytes); + + *d++= (three_chars >> 16) & 0xff; + *d++= (three_chars >> 8) & 0xff; + *d++= three_chars & 0xff; + } - for (j=0; j<3-mark; j++) - *d++= b[j]; +write_one_byte: + { + // We have seen: 'XX=' We expect one more '='. If we find that, + // save one byte in *d; otherwise goto error. + int c; + DECODE_CHAR(error); + if (c != -2) + goto error; + *d++= three_chars & 0xff; + goto end; } - if (end_ptr != NULL) - *end_ptr= src; +write_two_bytes: + // We have seen: 'XXX='. Save the two bytes in *d. + *d++= (three_chars >> 8) & 0xff; + *d++= three_chars & 0xff; + goto end; + +error: + have_error= 1; + /* fall through */ - /* - The variable 'i' is set to 'len' when padding has been read, so it - does not actually reflect the number of bytes read from 'src'. - */ - return i != len ? -1 : (int) (d - dst_base); +end: + if (end_ptr != NULL) + *end_ptr= s; + if (have_error) + return -(d - (char *)dst) - 1; + else + return d - (char *)dst; }