【深入 Base64】Javascript 实现 Base64 编解码(四)

前面 3 篇博客已经把关于 Base64 编码原理、字符编码等等基础概念搞清楚了, 这篇作为完结篇,我们来用 JS 实现 Base64  的编解码。

编码思路

因为 Base64 的编码过程的基本单位是字节,但 javascript 中字符不全部是单字节字符,也就是说我们在读取字符串的时候会出现超出 Base64 编码表范围。我们的思路就是把字符串进行转换,让 javascript 不会把双字节当成一个字符,这里就用到了前面提到的 unicode 转 utf-8。因为 javascript 的字符编码只有 ucs-2,所以对于 utf-8 码是不识别的,只会一个一个字节的识别,正好达到了我们的目的。

然后就是字符的转换。这里主要是要注意一下字符数量问题,因为 Base64 的编码过程是 3 个字符为一组编码成 4 个字符,所以,如果最后字符少于 3 个,需要按照前面科普的编码规则用“=”补充完整。

代码实现

因为前面已经把基础弄清楚了,所以实现起来也不是很困难,直接上代码吧。

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Javascipt 实现 Base64 编解码</title>
</head>
<body>
<div>
    <span>输入要编码的字符串</span>
    <input type="text" id="input">
</div>
<div>
    <span>utf-8 编码:</span>
    <span id="utf-8"></span>
</div>
<div>
    <span>utf-8 解码:</span>
    <span id="unicode"></span>
</div>
<div>
    <span>Base64 编码:</span>
    <span id="enBase64"></span>
</div>
<div>
    <span>Base64 解码:</span>
    <span id="deBase64"></span>
</div>
<script>
    // 一般的Base64编码字符
    var commonbase64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    // 对URL进行编码使用的字符
    var urlBase64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

    /*
     Base64 是以字节为单位处理,所以我们处理的字符串应该是单字节字符串
     这里我们默认是 utf-8 编码的字符串,因为 JS 无法识别 UTF-8 编码字符串,会识别成一个个单字节字符串
     */
    function encodeBase64(hexStr){
        var len = hexStr.length,
                h1, h2, h3,
                outStr = "";

        for (var i = 0; i < len; i++) {
            h1 = hexStr.charCodeAt(i);

            if(i == len){ // 最后一个字符
                outStr += commonbase64EncodeChars.charAt(h1 >> 2);
                outStr += commonbase64EncodeChars.charAt((h1 << 4) & 0x3F);
                outStr += "==";
            } else if (i == len - 1) { // 倒数第二个字符
                h2 = hexStr.charCodeAt(++i);
                outStr += commonbase64EncodeChars.charAt(h1 >> 2);
                outStr += commonbase64EncodeChars.charAt(((h1 << 4) & 0x3F) | (h2 >> 4));
                outStr += commonbase64EncodeChars.charAt((h2 << 2) & 0x3F);
                outStr += "=";
            } else { // 剩余大于二个字符
                h2 = hexStr.charCodeAt(++i);
                h3 = hexStr.charCodeAt(++i);
                outStr += commonbase64EncodeChars.charAt(h1 >> 2);
                outStr += commonbase64EncodeChars.charAt(((h1 << 4) & 0x3F) | (h2 >> 4));
                outStr += commonbase64EncodeChars.charAt((h2 << 2) & 0x3F | (h3 >> 6));
                outStr += commonbase64EncodeChars.charAt(h3 & 0x3F);
            }
        }

        return outStr;
    }

    function decodeBase64(str) {
        var str = filterBase64(str),
                len = str.length,
                h1, h2, h3, h4,
                outStr = "";

        for (var i = 0; i < len;) {
            h1 = commonbase64EncodeChars.indexOf(str.charAt(i++));
            h2 = commonbase64EncodeChars.indexOf(str.charAt(i++));
            h3 = commonbase64EncodeChars.indexOf(str.charAt(i++));
            h4 = commonbase64EncodeChars.indexOf(str.charAt(i++));

            outStr += String.fromCharCode((h1 << 2) | (h2 >> 4));
            if (h3) outStr += String.fromCharCode(((h2 << 4) & 0xF0) | (h3 >> 2));
            if (h4) outStr += String.fromCharCode(((h3 << 6) & 0xC0) | h4);
        }

        return outStr;
    }

    function filterBase64(str) {
        var len = str.length,
                c,
                outStr = "";

        for (var i = 0; i < len; i++) {
            c = str.charAt(i);
            if (commonbase64EncodeChars.indexOf(c) != -1) {
                outStr += c;
            }
        }

        return outStr;
    }

    function unicode2utf8(str) {
        var charCode,
            outStr = "",
            len = str.length;
        for (var i = 0; i < len; i++) {
            charCode = str.charCodeAt(i);

            if (charCode <= 0x7F) { // 单字节
                outStr += str.charAt(i);
            } else if (charCode <= 0x7FF) { // 双字节
                outStr += String.fromCharCode(0xC0 | ((charCode >> 6) & 0x1F));
                outStr += String.fromCharCode(0x80 | ((charCode >> 0) & 0x3F));
            } else if (charCode <= 0xFFFF) { // 三字节
                outStr += String.fromCharCode(0xE0 | ((charCode >> 12) & 0xF));
                outStr += String.fromCharCode(0x80 | ((charCode >> 6) & 0x3F));
                outStr += String.fromCharCode(0x80 | ((charCode >> 0) & 0x3F));
            } else { // 四字节
                outStr += String.fromCharCode(0xF0 | ((charCode >> 18) & 0x7));
                outStr += String.fromCharCode(0x80 | ((charCode >> 12) & 0x3F));
                outStr += String.fromCharCode(0x80 | ((charCode >> 6) & 0x3F));
                outStr += String.fromCharCode(0x80 | ((charCode >> 0) & 0x3F));
            }
        }
        return outStr;
    }

    function utf82unicode(str) {
        var len = str.length,
            outStr = "",
            charCode,
            h1, h2, h3, h4;

        for (var i = 0; i < len; i++) {
            charCode = str.charCodeAt(i);

            if (charCode < 0xC0) { // 单字节
                outStr += str.charAt(i);
            } else if (charCode < 0xE0) { // 双字节
                h1 = (charCode & 0x1F) << 6;
                h2 = (str.charCodeAt(++i) & 0x3F) << 0;
                outStr += String.fromCharCode(h1 | h2);
            } else if (charCode < 0xF0) { // 三字节
                h1 = (charCode & 0xF) << 12;
                h2 = (str.charCodeAt(++i) & 0x3F) << 6;
                h3 = (str.charCodeAt(++i) & 0x3F) << 0;
                outStr += String.fromCharCode(h1 | h2 | h3);
            } else { // 四字节
                h1 = (charCode & 0x7) << 18;
                h2 = (str.charCodeAt(++i) & 0x3F) << 12;
                h3 = (str.charCodeAt(++i) & 0x3F) << 6;
                h4 = (str.charCodeAt(++i) & 0x3F) << 0;
                outStr += String.fromCharCode(h1 | h2 | h3);
            }
        }
        return outStr;
    }

    document.querySelector("#input").addEventListener("input", function () {
        var utf8 = unicode2utf8(this.value);
        document.querySelector("#utf-8").innerText = utf8.split('').map(s => s.codePointAt().toString(16)).join(' ');
        document.querySelector("#unicode").innerText = utf82unicode(utf8);
        document.querySelector("#enBase64").innerText = encodeBase64(utf8);
        document.querySelector("#deBase64").innerText = utf82unicode(decodeBase64(encodeBase64(utf8)));
    })
</script>
</body>
</html>