I am building an app with authenticates the user against a sharepoint site which uses NTLM authentication. I found the ntlm.js which has been patched for nativescript here https://github.com/hdeshev/nativescript-ntlm-demo.
I have managed to get it working for android platform, but it fails on ios showing an 401 error. As far as I can tell, the difference happens in this segment:
Ntlm.setCredentials = function(domain, username, password) {
var magic = 'KGS!##$%'; // Create LM password hash.
var lmPassword = password.toUpperCase().substr(0, 14);
while (lmPassword.length < 14) lmPassword += '\0';
var key1 = Ntlm.createKey(lmPassword);
var key2 = Ntlm.createKey(lmPassword.substr(7));
var lmHashedPassword = des(key1, magic, 1, 0) + des(key2, magic, 1, 0);
var ntPassword = ''; // Create NT password hash.
for (var i = 0; i < password.length; i++)
ntPassword += password.charAt(i) + '\0';
var ntHashedPassword = str_md4(ntPassword);
Ntlm.domain = domain;
Ntlm.username = username;
Ntlm.lmHashedPassword = lmHashedPassword;
Ntlm.ntHashedPassword = ntHashedPassword;
};
When I log the result of 'lmhashedPassword' after going through the des() function, it simply returns 'A'. Whereas on android, it returns a longer string. Something in the des function must be cutting it off, but I cannot see what.
Here is the des function:
function des (key, message, encrypt, mode, iv, padding) {
//declaring this locally speeds things up a bit
var spfunction1 = new Array (0x1010400,0,0x10000,0x1010404,0x1010004,0x10404,0x4,0x10000,0x400,0x1010400,0x1010404,0x400,0x1000404,0x1010004,0x1000000,0x4,0x404,0x1000400,0x1000400,0x10400,0x10400,0x1010000,0x1010000,0x1000404,0x10004,0x1000004,0x1000004,0x10004,0,0x404,0x10404,0x1000000,0x10000,0x1010404,0x4,0x1010000,0x1010400,0x1000000,0x1000000,0x400,0x1010004,0x10000,0x10400,0x1000004,0x400,0x4,0x1000404,0x10404,0x1010404,0x10004,0x1010000,0x1000404,0x1000004,0x404,0x10404,0x1010400,0x404,0x1000400,0x1000400,0,0x10004,0x10400,0,0x1010004);
var spfunction2 = new Array (-0x7fef7fe0,-0x7fff8000,0x8000,0x108020,0x100000,0x20,-0x7fefffe0,-0x7fff7fe0,-0x7fffffe0,-0x7fef7fe0,-0x7fef8000,-0x80000000,-0x7fff8000,0x100000,0x20,-0x7fefffe0,0x108000,0x100020,-0x7fff7fe0,0,-0x80000000,0x8000,0x108020,-0x7ff00000,0x100020,-0x7fffffe0,0,0x108000,0x8020,-0x7fef8000,-0x7ff00000,0x8020,0,0x108020,-0x7fefffe0,0x100000,-0x7fff7fe0,-0x7ff00000,-0x7fef8000,0x8000,-0x7ff00000,-0x7fff8000,0x20,-0x7fef7fe0,0x108020,0x20,0x8000,-0x80000000,0x8020,-0x7fef8000,0x100000,-0x7fffffe0,0x100020,-0x7fff7fe0,-0x7fffffe0,0x100020,0x108000,0,-0x7fff8000,0x8020,-0x80000000,-0x7fefffe0,-0x7fef7fe0,0x108000);
var spfunction3 = new Array (0x208,0x8020200,0,0x8020008,0x8000200,0,0x20208,0x8000200,0x20008,0x8000008,0x8000008,0x20000,0x8020208,0x20008,0x8020000,0x208,0x8000000,0x8,0x8020200,0x200,0x20200,0x8020000,0x8020008,0x20208,0x8000208,0x20200,0x20000,0x8000208,0x8,0x8020208,0x200,0x8000000,0x8020200,0x8000000,0x20008,0x208,0x20000,0x8020200,0x8000200,0,0x200,0x20008,0x8020208,0x8000200,0x8000008,0x200,0,0x8020008,0x8000208,0x20000,0x8000000,0x8020208,0x8,0x20208,0x20200,0x8000008,0x8020000,0x8000208,0x208,0x8020000,0x20208,0x8,0x8020008,0x20200);
var spfunction4 = new Array (0x802001,0x2081,0x2081,0x80,0x802080,0x800081,0x800001,0x2001,0,0x802000,0x802000,0x802081,0x81,0,0x800080,0x800001,0x1,0x2000,0x800000,0x802001,0x80,0x800000,0x2001,0x2080,0x800081,0x1,0x2080,0x800080,0x2000,0x802080,0x802081,0x81,0x800080,0x800001,0x802000,0x802081,0x81,0,0,0x802000,0x2080,0x800080,0x800081,0x1,0x802001,0x2081,0x2081,0x80,0x802081,0x81,0x1,0x2000,0x800001,0x2001,0x802080,0x800081,0x2001,0x2080,0x800000,0x802001,0x80,0x800000,0x2000,0x802080);
var spfunction5 = new Array (0x100,0x2080100,0x2080000,0x42000100,0x80000,0x100,0x40000000,0x2080000,0x40080100,0x80000,0x2000100,0x40080100,0x42000100,0x42080000,0x80100,0x40000000,0x2000000,0x40080000,0x40080000,0,0x40000100,0x42080100,0x42080100,0x2000100,0x42080000,0x40000100,0,0x42000000,0x2080100,0x2000000,0x42000000,0x80100,0x80000,0x42000100,0x100,0x2000000,0x40000000,0x2080000,0x42000100,0x40080100,0x2000100,0x40000000,0x42080000,0x2080100,0x40080100,0x100,0x2000000,0x42080000,0x42080100,0x80100,0x42000000,0x42080100,0x2080000,0,0x40080000,0x42000000,0x80100,0x2000100,0x40000100,0x80000,0,0x40080000,0x2080100,0x40000100);
var spfunction6 = new Array (0x20000010,0x20400000,0x4000,0x20404010,0x20400000,0x10,0x20404010,0x400000,0x20004000,0x404010,0x400000,0x20000010,0x400010,0x20004000,0x20000000,0x4010,0,0x400010,0x20004010,0x4000,0x404000,0x20004010,0x10,0x20400010,0x20400010,0,0x404010,0x20404000,0x4010,0x404000,0x20404000,0x20000000,0x20004000,0x10,0x20400010,0x404000,0x20404010,0x400000,0x4010,0x20000010,0x400000,0x20004000,0x20000000,0x4010,0x20000010,0x20404010,0x404000,0x20400000,0x404010,0x20404000,0,0x20400010,0x10,0x4000,0x20400000,0x404010,0x4000,0x400010,0x20004010,0,0x20404000,0x20000000,0x400010,0x20004010);
var spfunction7 = new Array (0x200000,0x4200002,0x4000802,0,0x800,0x4000802,0x200802,0x4200800,0x4200802,0x200000,0,0x4000002,0x2,0x4000000,0x4200002,0x802,0x4000800,0x200802,0x200002,0x4000800,0x4000002,0x4200000,0x4200800,0x200002,0x4200000,0x800,0x802,0x4200802,0x200800,0x2,0x4000000,0x200800,0x4000000,0x200800,0x200000,0x4000802,0x4000802,0x4200002,0x4200002,0x2,0x200002,0x4000000,0x4000800,0x200000,0x4200800,0x802,0x200802,0x4200800,0x802,0x4000002,0x4200802,0x4200000,0x200800,0,0x2,0x4200802,0,0x200802,0x4200000,0x800,0x4000002,0x4000800,0x800,0x200002);
var spfunction8 = new Array (0x10001040,0x1000,0x40000,0x10041040,0x10000000,0x10001040,0x40,0x10000000,0x40040,0x10040000,0x10041040,0x41000,0x10041000,0x41040,0x1000,0x40,0x10040000,0x10000040,0x10001000,0x1040,0x41000,0x40040,0x10040040,0x10041000,0x1040,0,0,0x10040040,0x10000040,0x10001000,0x41040,0x40000,0x41040,0x40000,0x10041000,0x1000,0x40,0x10040040,0x1000,0x41040,0x10001000,0x40,0x10000040,0x10040000,0x10040040,0x10000000,0x40000,0x10001040,0,0x10041040,0x40040,0x10000040,0x10040000,0x10001000,0x10001040,0,0x10041040,0x41000,0x41000,0x1040,0x1040,0x40040,0x10000000,0x10041000);
//create the 16 or 48 subkeys we will need
var keys = des_createKeys (key);
var m=0, i, j, temp, temp2, right1, right2, left, right, looping;
var cbcleft, cbcleft2, cbcright, cbcright2
var endloop, loopinc;
var len = message.length;
var chunk = 0;
//set up the loops for single and triple des
var iterations = keys.length == 32 ? 3 : 9; //single or triple des
if (iterations == 3) {looping = encrypt ? new Array (0, 32, 2) : new Array (30, -2, -2);}
else {looping = encrypt ? new Array (0, 32, 2, 62, 30, -2, 64, 96, 2) : new Array (94, 62, -2, 32, 64, 2, 30, -2, -2);}
//pad the message depending on the padding parameter
if (padding == 2) message += " "; //pad the message with spaces
else if (padding == 1) {temp = 8-(len%8); message += String.fromCharCode (temp,temp,temp,temp,temp,temp,temp,temp); if (temp==8) len+=8;} //PKCS7 padding
else if (!padding) message += "\0\0\0\0\0\0\0\0"; //pad the message out with null bytes
//store the result here
result = "";
tempresult = "";
if (mode == 1) { //CBC mode
cbcleft = (iv.charCodeAt(m++) << 24) | (iv.charCodeAt(m++) << 16) | (iv.charCodeAt(m++) << 8) | iv.charCodeAt(m++);
cbcright = (iv.charCodeAt(m++) << 24) | (iv.charCodeAt(m++) << 16) | (iv.charCodeAt(m++) << 8) | iv.charCodeAt(m++);
m=0;
}
//loop through each 64 bit chunk of the message
while (m < len) {
left = (message.charCodeAt(m++) << 24) | (message.charCodeAt(m++) << 16) | (message.charCodeAt(m++) << 8) | message.charCodeAt(m++);
right = (message.charCodeAt(m++) << 24) | (message.charCodeAt(m++) << 16) | (message.charCodeAt(m++) << 8) | message.charCodeAt(m++);
//for Cipher Block Chaining mode, xor the message with the previous result
if (mode == 1) {if (encrypt) {left ^= cbcleft; right ^= cbcright;} else {cbcleft2 = cbcleft; cbcright2 = cbcright; cbcleft = left; cbcright = right;}}
//first each 64 but chunk of the message must be permuted according to IP
temp = ((left >>> 4) ^ right) & 0x0f0f0f0f; right ^= temp; left ^= (temp << 4);
temp = ((left >>> 16) ^ right) & 0x0000ffff; right ^= temp; left ^= (temp << 16);
temp = ((right >>> 2) ^ left) & 0x33333333; left ^= temp; right ^= (temp << 2);
temp = ((right >>> 8) ^ left) & 0x00ff00ff; left ^= temp; right ^= (temp << 8);
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
left = ((left << 1) | (left >>> 31));
right = ((right << 1) | (right >>> 31));
//do this either 1 or 3 times for each chunk of the message
for (j=0; j<iterations; j+=3) {
endloop = looping[j+1];
loopinc = looping[j+2];
//now go through and perform the encryption or decryption
for (i=looping[j]; i!=endloop; i+=loopinc) { //for efficiency
right1 = right ^ keys[i];
right2 = ((right >>> 4) | (right << 28)) ^ keys[i+1];
//the result is attained by passing these bytes through the S selection functions
temp = left;
left = right;
right = temp ^ (spfunction2[(right1 >>> 24) & 0x3f] | spfunction4[(right1 >>> 16) & 0x3f]
| spfunction6[(right1 >>> 8) & 0x3f] | spfunction8[right1 & 0x3f]
| spfunction1[(right2 >>> 24) & 0x3f] | spfunction3[(right2 >>> 16) & 0x3f]
| spfunction5[(right2 >>> 8) & 0x3f] | spfunction7[right2 & 0x3f]);
}
temp = left; left = right; right = temp; //unreverse left and right
} //for either 1 or 3 iterations
//move then each one bit to the right
left = ((left >>> 1) | (left << 31));
right = ((right >>> 1) | (right << 31));
//now perform IP-1, which is IP in the opposite direction
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
temp = ((right >>> 8) ^ left) & 0x00ff00ff; left ^= temp; right ^= (temp << 8);
temp = ((right >>> 2) ^ left) & 0x33333333; left ^= temp; right ^= (temp << 2);
temp = ((left >>> 16) ^ right) & 0x0000ffff; right ^= temp; left ^= (temp << 16);
temp = ((left >>> 4) ^ right) & 0x0f0f0f0f; right ^= temp; left ^= (temp << 4);
//for Cipher Block Chaining mode, xor the message with the previous result
if (mode == 1) {if (encrypt) {cbcleft = left; cbcright = right;} else {left ^= cbcleft2; right ^= cbcright2;}}
tempresult += String.fromCharCode ((left>>>24), ((left>>>16) & 0xff), ((left>>>8) & 0xff), (left & 0xff), (right>>>24), ((right>>>16) & 0xff), ((right>>>8) & 0xff), (right & 0xff));
chunk += 8;
if (chunk == 512) {result += tempresult; tempresult = ""; chunk = 0;}
} //for every 8 characters, or 64 bits in the message
//return the result as an array
return result + tempresult;
} //end of des
In case it may be relevant, I have changed the way the request is made too. When the user clicks login, the following promise is called:
Ntlm.login('url')
.then(() => {
console.log('Success');
appSettings.setString('token', 'abc123');
this.router.navigate(['/ilt']);
})
.catch(error => {
console.log('Failed');
appSettings.remove('token');
alert('Failed! ' + error );
})
I created a new login function in the ntlm.js file:
Ntlm.login = function(url) {
return new Promise((resolve, reject) => {
if (!Ntlm.domain || !Ntlm.username || !Ntlm.lmHashedPassword || !Ntlm.ntHashedPassword) {
Ntlm.error('No NTLM credentials specified. Use Ntlm.setCredentials(...) before making calls.');
}
var hostname = Ntlm.getLocation(url).hostname;
var msg1 = Ntlm.createMessage1(hostname);
var request = new XMLHttpRequest();
request.onload = function() {
var response = request.getResponseHeader('WWW-Authenticate');
var challenge = Ntlm.getChallenge(response);
var msg3 = Ntlm.createMessage3(challenge, hostname);
request.open('GET', url, false);
var authorization = 'NTLM ' + msg3.toBase64();
request.setRequestHeader('Authorization', authorization);
request.onload = function() {
if (request.readyState == 4 && request.status == 200) {
resolve(request.status);
}
else if (request.readyState == 4 && request.status != 200) {
reject(request.status);
}
};
request.send(null);
};
request.open('GET', url, false);
request.setRequestHeader('Authorization', 'NTLM ' + msg1.toBase64());
request.send(null);
})
};
This is all working fine on the Android version, just cant understand why it isnt on ios. Very frustrating! If anyone can make sense of this, I would be eternally grateful. I realise it is a lot of code and quite niche area!
Many thanks,
UPDATE
I think there may be a difference in the way console.log behaves in Android and iOS, which could explain some of the missing characters. I created a new test account (testuser / testing), and logged various points to try and establish what was happening in the NTLM process step by step. Here are the logs for android:
NTLM WALKTHROUGH ON ANDROID
Step 1: Creates a cryptographic hash of the users password:
lmHashedPassword = -UE}{}*ªÓ´5µî
ntHashedPassword = |SÏ¥ê}�;�� ûQ£õ
Step 2: Sends first request to the server, with the following Authorisation header:
NTLM TlRMTVNTUAABAAAAA7IAAAUABQBEAAAAJAAkACAAAABHQVRFV0FZLlNUUEFVTFNDQVRIT0xJQ0NPTExFR0UuQ08uVUtBRE1JTg==
Step 3: Server sends a challenge back to client:
¡2#�³Q%Ï
Step 4: Client encrypts this challenge with the hash of the users password and sends back to server (response).
The Authorization header is: NTLM TlRMTVNTUAADAAAAGAAYAKQAAAAYABgAvAAAAAoACgBAAAAAEgASAEoAAABIAEgAXAAAAAAAAADUAAAAAYIAAEEARABNAEkATgB0AGUAcwB0AHMAdABhAGYAZgBHAEEAVABFAFcAQQBZAC4AUwBUAFAAQQBVAEwAUwBDAEEAVABIAE8ATABJAEMAQwBPAEwATABFAEcARQAuAEMATwAuAFUASwBsEslcvTQhhY3+RgKtqufBzFrmufFKNkAHXJRcA6ThOAU105+NJBGnsn2ri6Ziuv8=
Step 5: Now the server has sent the username, challenge and response to the Domain Controller.
The DC compares and returns status of: 200
Here are the logs for iOS:
NTLM WALKTHROUGH ON IOS
Step 1: Creates a cryptographic hash of the users password:
lmHashedPassword = -UE}{}*ªÓ´5µî
ntHashedPassword = |SÏ¥ê}�;�� ûQ£õ
Step 2: Sends the first request to the server, with the following Authorisation header:
NTLM TlRMTVNTUAABAAAAA7IAAAUABQBEAAAAJAAkACAAAABHQVRFV0FZLlNUUEFVTFNDQVRIT0xJQ0NPTExFR0UuQ08uVUtBRE1JTg==
Step 3: Server sends a challenge back to client:
q�v¹,
Step 4: Client encrypts this challenge with the hash of the users password and sends back to server (response).
The Authorization header is: NTLM TlRMTVNTUAADAAAAGAAYAKQAAAAYABgAvAAAAAoACgBAAAAAEgASAEoAAABIAEgAXAAAAAAAAADUAAAAAYIAAEEARABNAEkATgB0AGUAcwB0AHMAdABhAGYAZgBHAEEAVABFAFcAQQBZAC4AUwBUAFAAQQBVAEwAUwBDAEEAVABIAE8ATABJAEMAQwBPAEwATABFAEcARQAuAEMATwAuAFUASwAP9HN5WjPCs9hMRrmttnYHieFrThwyUAWanKWtVdzOqDOJ2isUdQeV0ISmv9TT0ek=
Step 5: Now the server has sent the username, challenge and response to the Domain Controller.
The DC compares returns status of: 401
Seems the credentials are worked out the same, and then the challenge returned from the server is random. But on iOS, the challenge seems to be missing characters - possibly due to the type of characters. The client then encrypts the challenge with the hashed passwords and sends back to the server. I imagine it might be this part which is not correct on iOS.
The error in the title is thrown only in Google Chrome, according to my tests. I'm base64 encoding a big XML file so that it can be downloaded:
this.loader.src = "data:application/x-forcedownload;base64,"+
btoa("<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+"<"+this.gamesave.tagName+">"
+this.xml.firstChild.innerHTML
+"</"+this.gamesave.tagName+">");
this.loader is hidden iframe.
This error is actually quite a change because normally, Google Chrome would crash upon btoa call. Mozilla Firefox has no problems here, so the issue is browser related.
I'm not aware of any strange characters in file. Actually I do believe there are no non-ascii characters.
Q:
How do I find the problematic characters and replace them so that Chrome stops complaining?
I have tried to use Downloadify to initiate the download, but it does not work. It's unreliable and throws no errors to allow debug.
If you have UTF8, use this (actually works with SVG source), like:
btoa(unescape(encodeURIComponent(str)))
example:
var imgsrc = 'data:image/svg+xml;base64,' + btoa(unescape(encodeURIComponent(markup)));
var img = new Image(1, 1); // width, height values are optional params
img.src = imgsrc;
If you need to decode that base64, use this:
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Example:
var str = "äöüÄÖÜçéèñ";
var b64 = window.btoa(unescape(encodeURIComponent(str)))
console.log(b64);
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Note: if you need to get this to work in mobile-safari, you might need to strip all the white-space from the base64 data...
function b64_to_utf8( str ) {
str = str.replace(/\s/g, '');
return decodeURIComponent(escape(window.atob( str )));
}
2017 Update
This problem has been bugging me again.
The simple truth is, atob doesn't really handle UTF8-strings - it's ASCII only.
Also, I wouldn't use bloatware like js-base64.
But webtoolkit does have a small, nice and very maintainable implementation:
/**
*
* Base64 encode / decode
* http://www.webtoolkit.info
*
**/
var Base64 = {
// private property
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
// public method for encoding
, encode: function (input)
{
var output = "";
var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
var i = 0;
input = Base64._utf8_encode(input);
while (i < input.length)
{
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2))
{
enc3 = enc4 = 64;
}
else if (isNaN(chr3))
{
enc4 = 64;
}
output = output +
this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) +
this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);
} // Whend
return output;
} // End Function encode
// public method for decoding
,decode: function (input)
{
var output = "";
var chr1, chr2, chr3;
var enc1, enc2, enc3, enc4;
var i = 0;
input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
while (i < input.length)
{
enc1 = this._keyStr.indexOf(input.charAt(i++));
enc2 = this._keyStr.indexOf(input.charAt(i++));
enc3 = this._keyStr.indexOf(input.charAt(i++));
enc4 = this._keyStr.indexOf(input.charAt(i++));
chr1 = (enc1 << 2) | (enc2 >> 4);
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
chr3 = ((enc3 & 3) << 6) | enc4;
output = output + String.fromCharCode(chr1);
if (enc3 != 64)
{
output = output + String.fromCharCode(chr2);
}
if (enc4 != 64)
{
output = output + String.fromCharCode(chr3);
}
} // Whend
output = Base64._utf8_decode(output);
return output;
} // End Function decode
// private method for UTF-8 encoding
,_utf8_encode: function (string)
{
var utftext = "";
string = string.replace(/\r\n/g, "\n");
for (var n = 0; n < string.length; n++)
{
var c = string.charCodeAt(n);
if (c < 128)
{
utftext += String.fromCharCode(c);
}
else if ((c > 127) && (c < 2048))
{
utftext += String.fromCharCode((c >> 6) | 192);
utftext += String.fromCharCode((c & 63) | 128);
}
else
{
utftext += String.fromCharCode((c >> 12) | 224);
utftext += String.fromCharCode(((c >> 6) & 63) | 128);
utftext += String.fromCharCode((c & 63) | 128);
}
} // Next n
return utftext;
} // End Function _utf8_encode
// private method for UTF-8 decoding
,_utf8_decode: function (utftext)
{
var string = "";
var i = 0;
var c, c1, c2, c3;
c = c1 = c2 = 0;
while (i < utftext.length)
{
c = utftext.charCodeAt(i);
if (c < 128)
{
string += String.fromCharCode(c);
i++;
}
else if ((c > 191) && (c < 224))
{
c2 = utftext.charCodeAt(i + 1);
string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
i += 2;
}
else
{
c2 = utftext.charCodeAt(i + 1);
c3 = utftext.charCodeAt(i + 2);
string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
i += 3;
}
} // Whend
return string;
} // End Function _utf8_decode
}
https://www.fileformat.info/info/unicode/utf8.htm
For any character equal to or below 127 (hex 0x7F), the UTF-8
representation is one byte. It is just the lowest 7 bits of the full
unicode value. This is also the same as the ASCII value.
For characters equal to or below 2047 (hex 0x07FF), the UTF-8
representation is spread across two bytes. The first byte will have
the two high bits set and the third bit clear (i.e. 0xC2 to 0xDF). The
second byte will have the top bit set and the second bit clear (i.e.
0x80 to 0xBF).
For all characters equal to or greater than 2048 but less than 65535
(0xFFFF), the UTF-8 representation is spread across three bytes.
Use a library instead
We don't have to reinvent the wheel. Just use a library to save the time and headache.
js-base64
https://github.com/dankogai/js-base64 is good and I confirm it supports unicode very well.
Base64.encode('dankogai'); // ZGFua29nYWk=
Base64.encode('小飼弾'); // 5bCP6aO85by+
Base64.encodeURI('小飼弾'); // 5bCP6aO85by-
Base64.decode('ZGFua29nYWk='); // dankogai
Base64.decode('5bCP6aO85by+'); // 小飼弾
// note .decodeURI() is unnecessary since it accepts both flavors
Base64.decode('5bCP6aO85by-'); // 小飼弾
Using btoa with unescape and encodeURIComponent didn't work for me. Replacing all the special characters with XML/HTML entities and then converting to the base64 representation was the only way to solve this issue for me. Some code:
base64 = btoa(str.replace(/[\u00A0-\u2666]/g, function(c) {
return '&#' + c.charCodeAt(0) + ';';
}));
I just thought I should share how I actually solved the problem and why I think this is the right solution (provided you don't optimize for old browser).
Converting data to dataURL (data: ...)
var blob = new Blob(
// I'm using page innerHTML as data
// note that you can use the array
// to concatenate many long strings EFFICIENTLY
[document.body.innerHTML],
// Mime type is important for data url
{type : 'text/html'}
);
// This FileReader works asynchronously, so it doesn't lag
// the web application
var a = new FileReader();
a.onload = function(e) {
// Capture result here
console.log(e.target.result);
};
a.readAsDataURL(blob);
Allowing user to save data
Apart from obvious solution - opening new window with your dataURL as URL you can do two other things.
1. Use fileSaver.js
File saver can create actual fileSave dialog with predefined filename. It can also fallback to normal dataURL approach.
2. Use (experimental) URL.createObjectURL
This is great for reusing base64 encoded data. It creates a short URL for your dataURL:
console.log(URL.createObjectURL(blob));
//Prints: blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42
Don't forget to use the URL including the leading blob prefix. I used document.body again:
You can use this short URL as AJAX target, <script> source or <a> href location. You're responsible for destroying the URL though:
URL.revokeObjectURL('blob:http://stackoverflow.com/7c18953f-f5f8-41d2-abf5-e9cbced9bc42')
As an complement to Stefan Steiger answer: (as it doesn't look nice as a comment)
Extending String prototype:
String.prototype.b64encode = function() {
return btoa(unescape(encodeURIComponent(this)));
};
String.prototype.b64decode = function() {
return decodeURIComponent(escape(atob(this)));
};
Usage:
var str = "äöüÄÖÜçéèñ";
var encoded = str.b64encode();
console.log( encoded.b64decode() );
NOTE:
As stated in the comments, using unescape is not recommended as it may be removed in the future:
Warning: Although unescape() is not strictly deprecated (as in "removed from the Web standards"), it is defined in Annex B of the ECMA-262 standard, whose introduction states:
… All of the language features and behaviours specified in this annex have one or more undesirable characteristics and in the absence of legacy usage would be removed from this specification.
Note: Do not use unescape to decode URIs, use decodeURI or decodeURIComponent instead.
btoa() only support characters from String.fromCodePoint(0) up to String.fromCodePoint(255). For Base64 characters with a code point 256 or higher you need to encode/decode these before and after.
And in this point it becomes tricky...
Every possible sign are arranged in a Unicode-Table. The Unicode-Table is divided in different planes (languages, math symbols, and so on...). Every sign in a plane has a unique code point number. Theoretically, the number can become arbitrarily large.
A computer stores the data in bytes (8 bit, hexadecimal 0x00 - 0xff, binary 00000000 - 11111111, decimal 0 - 255). This range normally use to save basic characters (Latin1 range).
For characters with higher codepoint then 255 exist different encodings. JavaScript use 16 bits per sign (UTF-16), the string called DOMString. Unicode can handle code points up to 0x10fffff. That means, that a method must be exist to store several bits over several cells away.
String.fromCodePoint(0x10000).length == 2
UTF-16 use surrogate pairs to store 20bits in two 16bit cells. The first higher surrogate begins with 110110xxxxxxxxxx, the lower second one with 110111xxxxxxxxxx. Unicode reserved own planes for this: https://unicode-table.com/de/#high-surrogates
To store characters in bytes (Latin1 range) standardized procedures use UTF-8.
Sorry to say that, but I think there is no other way to implement this function self.
function stringToUTF8(str)
{
let bytes = [];
for(let character of str)
{
let code = character.codePointAt(0);
if(code <= 127)
{
let byte1 = code;
bytes.push(byte1);
}
else if(code <= 2047)
{
let byte1 = 0xC0 | (code >> 6);
let byte2 = 0x80 | (code & 0x3F);
bytes.push(byte1, byte2);
}
else if(code <= 65535)
{
let byte1 = 0xE0 | (code >> 12);
let byte2 = 0x80 | ((code >> 6) & 0x3F);
let byte3 = 0x80 | (code & 0x3F);
bytes.push(byte1, byte2, byte3);
}
else if(code <= 2097151)
{
let byte1 = 0xF0 | (code >> 18);
let byte2 = 0x80 | ((code >> 12) & 0x3F);
let byte3 = 0x80 | ((code >> 6) & 0x3F);
let byte4 = 0x80 | (code & 0x3F);
bytes.push(byte1, byte2, byte3, byte4);
}
}
return bytes;
}
function utf8ToString(bytes, fallback)
{
let valid = undefined;
let codePoint = undefined;
let codeBlocks = [0, 0, 0, 0];
let result = "";
for(let offset = 0; offset < bytes.length; offset++)
{
let byte = bytes[offset];
if((byte & 0x80) == 0x00)
{
codeBlocks[0] = byte & 0x7F;
codePoint = codeBlocks[0];
}
else if((byte & 0xE0) == 0xC0)
{
codeBlocks[0] = byte & 0x1F;
byte = bytes[++offset];
if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }
codeBlocks[1] = byte & 0x3F;
codePoint = (codeBlocks[0] << 6) + codeBlocks[1];
}
else if((byte & 0xF0) == 0xE0)
{
codeBlocks[0] = byte & 0xF;
for(let blockIndex = 1; blockIndex <= 2; blockIndex++)
{
byte = bytes[++offset];
if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }
codeBlocks[blockIndex] = byte & 0x3F;
}
if(valid === false) { break; }
codePoint = (codeBlocks[0] << 12) + (codeBlocks[1] << 6) + codeBlocks[2];
}
else if((byte & 0xF8) == 0xF0)
{
codeBlocks[0] = byte & 0x7;
for(let blockIndex = 1; blockIndex <= 3; blockIndex++)
{
byte = bytes[++offset];
if(offset >= bytes.length || (byte & 0xC0) != 0x80) { valid = false; break; }
codeBlocks[blockIndex] = byte & 0x3F;
}
if(valid === false) { break; }
codePoint = (codeBlocks[0] << 18) + (codeBlocks[1] << 12) + (codeBlocks[2] << 6) + (codeBlocks[3]);
}
else
{
valid = false; break;
}
result += String.fromCodePoint(codePoint);
}
if(valid === false)
{
if(!fallback)
{
throw new TypeError("Malformed utf-8 encoding.");
}
result = "";
for(let offset = 0; offset != bytes.length; offset++)
{
result += String.fromCharCode(bytes[offset] & 0xFF);
}
}
return result;
}
function decodeBase64(text, binary)
{
if(/[^0-9a-zA-Z\+\/\=]/.test(text)) { throw new TypeError("The string to be decoded contains characters outside of the valid base64 range."); }
let codePointA = 'A'.codePointAt(0);
let codePointZ = 'Z'.codePointAt(0);
let codePointa = 'a'.codePointAt(0);
let codePointz = 'z'.codePointAt(0);
let codePointZero = '0'.codePointAt(0);
let codePointNine = '9'.codePointAt(0);
let codePointPlus = '+'.codePointAt(0);
let codePointSlash = '/'.codePointAt(0);
function getCodeFromKey(key)
{
let keyCode = key.codePointAt(0);
if(keyCode >= codePointA && keyCode <= codePointZ)
{
return keyCode - codePointA;
}
else if(keyCode >= codePointa && keyCode <= codePointz)
{
return keyCode + 26 - codePointa;
}
else if(keyCode >= codePointZero && keyCode <= codePointNine)
{
return keyCode + 52 - codePointZero;
}
else if(keyCode == codePointPlus)
{
return 62;
}
else if(keyCode == codePointSlash)
{
return 63;
}
return undefined;
}
let codes = Array.from(text).map(character => getCodeFromKey(character));
let bytesLength = Math.ceil(codes.length / 4) * 3;
if(codes[codes.length - 2] == undefined) { bytesLength = bytesLength - 2; } else if(codes[codes.length - 1] == undefined) { bytesLength--; }
let bytes = new Uint8Array(bytesLength);
for(let offset = 0, index = 0; offset < bytes.length;)
{
let code1 = codes[index++];
let code2 = codes[index++];
let code3 = codes[index++];
let code4 = codes[index++];
let byte1 = (code1 << 2) | (code2 >> 4);
let byte2 = ((code2 & 0xf) << 4) | (code3 >> 2);
let byte3 = ((code3 & 0x3) << 6) | code4;
bytes[offset++] = byte1;
bytes[offset++] = byte2;
bytes[offset++] = byte3;
}
if(binary) { return bytes; }
return utf8ToString(bytes, true);
}
function encodeBase64(bytes) {
if (bytes === undefined || bytes === null) {
return '';
}
if (bytes instanceof Array) {
bytes = bytes.filter(item => {
return Number.isFinite(item) && item >= 0 && item <= 255;
});
}
if (
!(
bytes instanceof Uint8Array ||
bytes instanceof Uint8ClampedArray ||
bytes instanceof Array
)
) {
if (typeof bytes === 'string') {
const str = bytes;
bytes = Array.from(unescape(encodeURIComponent(str))).map(ch =>
ch.codePointAt(0)
);
} else {
throw new TypeError('bytes must be of type Uint8Array or String.');
}
}
const keys = [
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
'm',
'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'+',
'/'
];
const fillKey = '=';
let byte1;
let byte2;
let byte3;
let sign1 = ' ';
let sign2 = ' ';
let sign3 = ' ';
let sign4 = ' ';
let result = '';
for (let index = 0; index < bytes.length; ) {
let fillUpAt = 0;
// tslint:disable:no-increment-decrement
byte1 = bytes[index++];
byte2 = bytes[index++];
byte3 = bytes[index++];
if (byte2 === undefined) {
byte2 = 0;
fillUpAt = 2;
}
if (byte3 === undefined) {
byte3 = 0;
if (!fillUpAt) {
fillUpAt = 3;
}
}
// tslint:disable:no-bitwise
sign1 = keys[byte1 >> 2];
sign2 = keys[((byte1 & 0x3) << 4) + (byte2 >> 4)];
sign3 = keys[((byte2 & 0xf) << 2) + (byte3 >> 6)];
sign4 = keys[byte3 & 0x3f];
if (fillUpAt > 0) {
if (fillUpAt <= 2) {
sign3 = fillKey;
}
if (fillUpAt <= 3) {
sign4 = fillKey;
}
}
result += sign1 + sign2 + sign3 + sign4;
if (fillUpAt) {
break;
}
}
return result;
}
let base64 = encodeBase64("\u{1F604}"); // unicode code point escapes for smiley
let str = decodeBase64(base64);
console.log("base64", base64);
console.log("str", str);
document.body.innerText = str;
how to use it: decodeBase64(encodeBase64("\u{1F604}"))
demo: https://jsfiddle.net/qrLadeb8/
Another solution for browser without using unescape:
function ToBinary(str)
{
let result="";
str=encodeURIComponent(str);
for(let i=0;i<str.length;i++)
if(str[i]=="%")
{
result+=String.fromCharCode(parseInt(str.substring(i+1,i+3),16));
i+=2;
}
else
result+=str[i];
return result;
}
btoa(ToBinary("тест"));//0YLQtdGB0YI=
I just ran into this problem myself.
First, modify your code slightly:
var download = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+"<"+this.gamesave.tagName+">"
+this.xml.firstChild.innerHTML
+"</"+this.gamesave.tagName+">";
this.loader.src = "data:application/x-forcedownload;base64,"+
btoa(download);
Then use your favorite web inspector, put a breakpoint on the line of code that assigns this.loader.src, then execute this code:
for (var i = 0; i < download.length; i++) {
if (download[i].charCodeAt(0) > 255) {
console.warn('found character ' + download[i].charCodeAt(0) + ' "' + download[i] + '" at position ' + i);
}
}
Depending on your application, replacing the characters that are out of range may or may not work, since you'll be modifying the data. See the note on MDN about unicode characters with the btoa method:
https://developer.mozilla.org/en-US/docs/Web/API/window.btoa
A solution that converts the string to utf-8, which is slightly shorter than the utf-16 or URLEncoded versions many of the other answers suggest. It's also more compatible with how other languages like python and PHP would decode the strings:
Encode
function btoa_utf8(value) {
return btoa(
String.fromCharCode(
...new TextEncoder('utf-8')
.encode(hash_value)
)
);
}
Decode
function atob_utf8(value) {
const value_latin1 = atob(value);
return new TextDecoder('utf-8').decode(
Uint8Array.from(
{ length: value_latin1.length },
(element, index) => value_latin1.charCodeAt(index)
)
)
}
You can replace the 'utf-8' string in either of these with a different character encoding if you prefer.
Note This depends on the TextEncoder class. This is supported in most browsers nowadays but if you need to target older browsers check if it's available.