Encode String to HEX - javascript

i have my function to convert string to hex:
function encode(str){
str = encodeURIComponent(str).split('%').join('');
return str.toLowerCase();
}
example:
守护村子
alert(encode('守护村子'));
the output would be:
e5ae88e68aa4e69d91e5ad90
It works on Chinese characters. But when i do it with English letters
alert(encode('Hello World'));
it outputs:
hello20world
I have tried this for converting string to hex:
function String2Hex(tmp) {
var str = '';
for(var i = 0; i < tmp.length; i++) {
str += tmp[i].charCodeAt(0).toString(16);
}
return str;
}
then tried it on the Chinese characters above, but it outputs the UTF-8 HEX:
5b8862a467515b50
not the ANSI Hex:
e5ae88e68aa4e69d91e5ad90
I also have searched converting UFT8 to ANSI but no luck.
Anyone could help me? Thanks!

As a self-contained solution in functional style, you can encode with:
plain.split("")
.map(c => c.charCodeAt(0).toString(16).padStart(2, "0"))
.join("");
The split on an empty string produces an array with one character (or rather, one UTF-16 codepoint) in each element. Then we can map each to a HEX string of the character code.
Then to decode:
hex.split(/(\w\w)/g)
.filter(p => !!p)
.map(c => String.fromCharCode(parseInt(c, 16)))
.join("")
This time the regex passed to split captures groups of two characters, but this form of split will intersperse them with empty strings (the stuff "between" the captured groups, which is nothing!). So filter is used to remove the empty strings. Then map decodes each character.

On Node.js, you can do:
const myString = "This is my string to be encoded/decoded";
const encoded = Buffer.from(myString).toString('hex'); // encoded == 54686973206973206d7920737472696e6720746f20626520656e636f6465642f6465636f646564
const decoded = Buffer.from(encoded, 'hex').toString(); // decoded == "This is my string to be encoded/decoded"

I solved it by downloading utf8.js
https://github.com/mathiasbynens/utf8.js
then using the String2Hex function from above:
alert(String2Hex(utf8.encode('守护村子')));
It gives me the output I want:
e5ae88e68aa4e69d91e5ad90

This should work.
var str="some random string";
var result = "";
for (i=0; i<str.length; i++) {
hex = str.charCodeAt(i).toString(16);
result += ("000"+hex).slice(-4);
}

If you want to properly handle UTF8 strings you can try these:
function utf8ToHex(str) {
return Array.from(str).map(c =>
c.charCodeAt(0) < 128 ? c.charCodeAt(0).toString(16) :
encodeURIComponent(c).replace(/\%/g,'').toLowerCase()
).join('');
},
function hexToUtf8: function(hex) {
return decodeURIComponent('%' + hex.match(/.{1,2}/g).join('%'));
}
Demo: https://jsfiddle.net/lyquix/k2tjbrvq/

Another way to do it
function toHex(txt){
const encoder = new TextEncoder();
return Array
.from(encoder.encode(txt))
.map(b => b.toString(16).padStart(2, '0'))
.join('')
}

Related

How to encode html code or url to this format \u00253Cbr\....?

How to encode html code or url to this format?:
\u00253Cb\u002522\....
THANK YOU!
To convert URL to this you need to convert your URL (string) to unicode format
you can try following function for it.
// here toUnicode function is attached to string so that it can used further with
//other strings also
String.prototype.toUnicode = function(){
var result = "";
for(var i = 0; i < this.length; i++){
// Assumption: all characters are < 0xffff
result += "\\u" + ("000" + this[i].charCodeAt(0).toString(16)).substr(-4);
}
return result;
};
let name = "john";
name.toUnicode(); // this will return unicodes of strings
And on another side you can get the Unicode URL using window.location.href (which will be in unicode) you can convert back into string using following function.
// this function coverts the unicode to string char by char
// so you need to loop through the string and call this function a
//nd create resultant string
function unicodeToChar(text) {
return text.replace(/\\u[\dA-F]{4}/gi,
function (match) {
return String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16));
});
}

Javascript hexadecimal to ASCII with latin extended symbols

I am getting a hexadecimal value of my string that looks like this:
String has letters with diacritics: č,š,ř, ...
Hexadecimal value of this string is:
0053007400720069006E006700200068006100730020006C0065007400740065007200730020007700690074006800200064006900610063007200690074006900630073003A0020010D002C00200161002C00200159002C0020002E002E002E
The problem is that when i try to convert this value back to ascii it poorly converts the č,š,ř,.. and returns symbol of little box with question mark in it instead of these symbols.
My code for converting hex to ascii:
function convertHexadecimal(hexx){
let index = hexx.indexOf("~");
let strInfo = hexx.substring(0, index+1);
let strMessage = hexx.substring(index+1);
var hex = strMessage.toString();
var str = '';
for (var i = 0; i < hex.length; i += 2){
str += String.fromCharCode(parseInt(hex.substr(i, 2), 16));
}
console.log("Zpráva: " + str);
var strFinal = strInfo + str;
return strFinal;
}
Can somebody help me with this?
First an example solution:
let demoHex = `0053007400720069006E006700200068006100730020006C0065007400740065007200730020007700690074006800200064006900610063007200690074006900630073003A0020010D002C00200161002C00200159002C0020002E002E002E`;
function hexToString(hex) {
let str="";
for( var i = 0; i < hex.length; i +=4) {
str += String.fromCharCode( Number("0x" + hex.substr(i,4)));
}
return str;
}
console.log("Decoded string: %s", hexToString(demoHex) );
What it's doing:
It's treating the hex characters as a sequence of 4 hexadecimal digits that provide the UTF-16 character code of a character.
It gets each set of 4 digits in a loop using String.prototype.substr. Note MDN says .substr is deprecated but this is not mentioned in the ECMASript standard - rewrite it to use substring or something else as you wish.
Hex characters are prefixed with "0x" to make them a valid number representation in JavaScript and converted to a number object using Number. The number is then converted to a character string using the String.fromCharCode static method.
I guessed the format of the hex string by looking at it, which means a general purpose encoding routine to encode UTF16 characters (not code points) into hex could look like:
const hexEncodeUTF16 =
str=>str.split('')
.map( char => char.charCodeAt(0).toString(16).padStart(4,'0'))
.join('');
console.log( hexEncodeUTF16( "String has letters with diacritics: č, š, ř, ..."));
I hope these examples show what needs doing - there are any number of ways to implement it in code.

Writing a Hexadecimal Escape Character Sequence using Variables

Testing Hex Character Codes
Problem
What does a Vertical Tab or a Backspace character actually do? I want to find out.
My experiment is to find out exactly what happens when every hex character is put into a string. I thought the best way to do this would be to created a nested loop to go through each of the 16 hexadecimal characters to create each possible 2 digit hex character code.
I soon discovered that you cannot use the \x escape character with interpolated variables, and so I expect what I have set out to do might be impossible.
const hexCharacters = "0123456789ABCDEF";
let code = "";
let char1 = "";
let char2 = "";
for (charPos1 = 0; charPos1 < hexCharacters.length; charPos1++) {
for (charPos2 = 0; charPos2 < hexCharacters.length; charPos2++) {
char1 = hexCharacters[charPos1];
char2 = hexCharacters[charPos2];
code = `${char1}${char2}`;
printHexChar(code);
}
}
function printHexChar(string) {
let output = `<p>Hex Code ${string} = \x${string}</p>`; // THE PROBLEM IS CLEAR
document.write(output)
}
I know it will also probably fail once it gets past 7F or whichever is the last character in the set, but that's not the main issue here! :D
Potential solution
string.prototype.fromCharCode
This sort of string method approach would seem to be the answer, but it is meant for U-16 character codes, and that is not what I wanted to test. There doesn't seem to be an existing string method for hex codes. Probably because nobody would ever want one, but nevertheless it would be cool.
Conclusion
Is there any way to create an escape character sequence from assembled parts that will render not as plain text, but as a proper escape character sequence?
Apologies if this has been asked before in some form, but with my feeble understanding of things I just couldn't find an answer.
You can use String.fromCharCode with parseInt.
`<p>Hex Code ${string} = ${String.fromCharCode(parseInt(string, 16))}</p>`;
const hexCharacters = "0123456789ABCDEF";
let code = "";
let char1 = "";
let char2 = "";
for (charPos1 = 0; charPos1 < hexCharacters.length; charPos1++) {
for (charPos2 = 0; charPos2 < hexCharacters.length; charPos2++) {
char1 = hexCharacters[charPos1];
char2 = hexCharacters[charPos2];
code = `${char1}${char2}`;
printHexChar(code);
}
}
function printHexChar(string) {
let output = `<p>Hex Code ${string} = ${String.fromCharCode(parseInt(string, 16))}</p>`;
document.write(output)
}
eval works as well, though it should generally be avoided.
`<p>Hex Code ${string} = ${eval('"\\x'+string+'"')}</p>`
If you want to output \x literally, then in a string literal you need to escape the escape character, so `\\x`.
string.prototype.fromCharCode [...] is meant for U-16 character codes
JavaScript uses one character encoding. The following strings are all equal:
let a = String.fromCharCode(27);
let b = "\x1B";
let c = "\u001B";
console.log(a === b, b === c);
If I understand correctly, you want to produce a string literal that shows \x escape sequences -- not the actual character:
// Prepare string
let chars = Array.from({length: 128}, (_, i) => String.fromCharCode(i))
.join("");
// Escape them
let escaped = Array.from(chars, ch => `\\x${ch.charCodeAt().toString(16).padStart(2, "0")}`).join("");
console.log(escaped);
But you might also use JSON.stringify. Although it uses different escape sequences (\u instead of \x), and only for non-display characters, it will be the exact same string when evaluated. Here is a demo:
// Prepare string
let chars = Array.from({length: 128}, (_, i) => String.fromCharCode(i))
.join("");
// Escape them
let escaped = '"' + Array.from(chars, ch => `\\x${ch.charCodeAt().toString(16).padStart(2, "0")}`).join("") + '"';
console.log(escaped);
// Or JSONify them
let json = JSON.stringify(chars);
console.log(json);
// Compare them, when evaluated:
console.log(eval(escaped) === eval(json));
Finally, note that there is nothing special about hexadecimal: it is just a representation of an integer. In the end, it is the numerical value that is important, not the representation of it. It is that numerical value that corresponds to a character.
Addendum
If you prefer code that sticks to old-style JavaScript, here is something equivalent of the last code snippet:
// Prepare string
let chars = "";
for (let i = 0; i < 128; i++) {
chars += String.fromCharCode(i);
}
// Escape the characters in this string
let escaped = '"';
for (let i = 0; i < chars.length; i++) {
let ch = chars.charCodeAt(i);
let hex = ch.toString(16);
if (hex.length === 1) hex = "0" + hex;
escaped += "\\x" + hex;
}
escaped += '"';
console.log(escaped);
// Or JSONify them
let json = JSON.stringify(chars);
console.log(json);
// Compare them, when evaluated:
console.log(eval(escaped) === eval(json));

How to remove the (.) in a string and then display every alphabetic character with a dot

I would like to have a a function who takes a string a parameter, removes the dots, loops trough every alphabetic character and display like this (A.B.C.) when input is (A..B..C) for example.
How can I build this function?
Here for I have the next function in mind, unfortunately is not working I get a output result like this (hfff) when input string is "h..f.ff", would like to have this output (H.F.F.F)
function filter (initials) {
let result = initials.replace(/\./g, '')
let i;
for (i = 0; i < result.length; i++) {
result[i] + ".";
}
return result
console.log(result)
}
const initials = "h..f.ff"
console.log(filter(initials))
You could use split, map and join
function filter(initials) {
return initials.replace(/[^a-z]/gi, '') // Remove non-letters
.toUpperCase()
.split('') // Convert to an Array
.map(l => l + '.') // Add dots
.join(''); // Join
}
const initials = "h..f.ff";
console.log(filter(initials));
You need to assign this
result[i] + ".";
to something. Do:
let i,newresult;
for (i = 0; i < result.length; i++) {
newresult += result[i] + ".";
}
well you can:
make the string uppercase
split the string char-by-char
filter only letters
join using "."
function format(string){
return string.toUpperCase()
.split("")
.filter(c => /[a-zA-Z]/.test(c))
.join(".")
}
format("h..f.ff") // "H.F.F.F"
Use replaceAll to remove all . char with nothing.
Then from that string, make all letters uppercase.
From that string, split the whole word up into and array of letters using split (Each piece of the string on each side of the parameter passed to split gets turned into an element in a new array. if you leave the parameter blank, it's just each character in the string)
Finally join each those elements together with a . between them using join
function filter (initials) {
initials=initials.replaceAll('.','');
initials=initials.toUpperCase();
initials=initials.split('');
initials=initials.join('.');
return initials;
}
var test = "h..f..t....e.Z";
console.log(filter(test));
Thanks #blex for the snippet advice

Get first letter of each word in a string, in JavaScript

How would you go around to collect the first letter of each word in a string, as in to receive an abbreviation?
Input: "Java Script Object Notation"
Output: "JSON"
I think what you're looking for is the acronym of a supplied string.
var str = "Java Script Object Notation";
var matches = str.match(/\b(\w)/g); // ['J','S','O','N']
var acronym = matches.join(''); // JSON
console.log(acronym)
Note: this will fail for hyphenated/apostrophe'd words Help-me I'm Dieing will be HmImD. If that's not what you want, the split on space, grab first letter approach might be what you want.
Here's a quick example of that:
let str = "Java Script Object Notation";
let acronym = str.split(/\s/).reduce((response,word)=> response+=word.slice(0,1),'')
console.log(acronym);
I think you can do this with
'Aa Bb'.match(/\b\w/g).join('')
Explanation: Obtain all /g the alphanumeric characters \w that occur after a non-alphanumeric character (i.e: after a word boundary \b), put them on an array with .match() and join everything in a single string .join('')
Depending on what you want to do you can also consider simply selecting all the uppercase characters:
'JavaScript Object Notation'.match(/[A-Z]/g).join('')
Easiest way without regex
var abbr = "Java Script Object Notation".split(' ').map(function(item){return item[0]}).join('');
This is made very simple with ES6
string.split(' ').map(i => i.charAt(0)) //Inherit case of each letter
string.split(' ').map(i => i.charAt(0)).toUpperCase() //Uppercase each letter
string.split(' ').map(i => i.charAt(0)).toLowerCase() //lowercase each letter
This ONLY works with spaces or whatever is defined in the .split(' ') method
ie, .split(', ') .split('; '), etc.
string.split(' ') .map(i => i.charAt(0)) .toString() .toUpperCase().split(',')
To add to the great examples, you could do it like this in ES6
const x = "Java Script Object Notation".split(' ').map(x => x[0]).join('');
console.log(x); // JSON
and this works too but please ignore it, I went a bit nuts here :-)
const [j,s,o,n] = "Java Script Object Notation".split(' ').map(x => x[0]);
console.log(`${j}${s}${o}${n}`);
#BotNet flaw:
i think i solved it after excruciating 3 days of regular expressions tutorials:
==> I'm a an animal
(used to catch m of I'm) because of the word boundary, it seems to work for me that way.
/(\s|^)([a-z])/gi
Try -
var text = '';
var arr = "Java Script Object Notation".split(' ');
for(i=0;i<arr.length;i++) {
text += arr[i].substr(0,1)
}
alert(text);
Demo - http://jsfiddle.net/r2maQ/
Using map (from functional programming)
'use strict';
function acronym(words)
{
if (!words) { return ''; }
var first_letter = function(x){ if (x) { return x[0]; } else { return ''; }};
return words.split(' ').map(first_letter).join('');
}
Alternative 1:
you can also use this regex to return an array of the first letter of every word
/(?<=(\s|^))[a-z]/gi
(?<=(\s|^)) is called positive lookbehind which make sure the element in our search pattern is preceded by (\s|^).
so, for your case:
// in case the input is lowercase & there's a word with apostrophe
const toAbbr = (str) => {
return str.match(/(?<=(\s|^))[a-z]/gi)
.join('')
.toUpperCase();
};
toAbbr("java script object notation"); //result JSON
(by the way, there are also negative lookbehind, positive lookahead, negative lookahead, if you want to learn more)
Alternative 2:
match all the words and use replace() method to replace them with the first letter of each word and ignore the space (the method will not mutate your original string)
// in case the input is lowercase & there's a word with apostrophe
const toAbbr = (str) => {
return str.replace(/(\S+)(\s*)/gi, (match, p1, p2) => p1[0].toUpperCase());
};
toAbbr("java script object notation"); //result JSON
// word = not space = \S+ = p1 (p1 is the first pattern)
// space = \s* = p2 (p2 is the second pattern)
It's important to trim the word before splitting it, otherwise, we'd lose some letters.
const getWordInitials = (word: string): string => {
const bits = word.trim().split(' ');
return bits
.map((bit) => bit.charAt(0))
.join('')
.toUpperCase();
};
$ getWordInitials("Java Script Object Notation")
$ "JSON"
How about this:
var str = "", abbr = "";
str = "Java Script Object Notation";
str = str.split(' ');
for (i = 0; i < str.length; i++) {
abbr += str[i].substr(0,1);
}
alert(abbr);
Working Example.
If you came here looking for how to do this that supports non-BMP characters that use surrogate pairs:
initials = str.split(' ')
.map(s => String.fromCodePoint(s.codePointAt(0) || '').toUpperCase())
.join('');
Works in all modern browsers with no polyfills (not IE though)
Getting first letter of any Unicode word in JavaScript is now easy with the ECMAScript 2018 standard:
/(?<!\p{L}\p{M}*)\p{L}/gu
This regex finds any Unicode letter (see the last \p{L}) that is not preceded with any other letter that can optionally have diacritic symbols (see the (?<!\p{L}\p{M}*) negative lookbehind where \p{M}* matches 0 or more diacritic chars). Note that u flag is compulsory here for the Unicode property classes (like \p{L}) to work correctly.
To emulate a fully Unicode-aware \b, you'd need to add a digit matching pattern and connector punctuation:
/(?<!\p{L}\p{M}*|[\p{N}\p{Pc}])\p{L}/gu
It works in Chrome, Firefox (since June 30, 2020), Node.js, and the majority of other environments (see the compatibility matrix here), for any natural language including Arabic.
Quick test:
const regex = /(?<!\p{L}\p{M}*)\p{L}/gu;
const string = "Żerard Łyżwiński";
// Extracting
console.log(string.match(regex)); // => [ "Ż", "Ł" ]
// Extracting and concatenating into string
console.log(string.match(regex).join("")) // => ŻŁ
// Removing
console.log(string.replace(regex, "")) // => erard yżwiński
// Enclosing (wrapping) with a tag
console.log(string.replace(regex, "<span>$&</span>")) // => <span>Ż</span>erard <span>Ł</span>yżwiński
console.log("_Łukasz 1Żukowski".match(/(?<!\p{L}\p{M}*|[\p{N}\p{Pc}])\p{L}/gu)); // => null
In ES6:
function getFirstCharacters(str) {
let result = [];
str.split(' ').map(word => word.charAt(0) != '' ? result.push(word.charAt(0)) : '');
return result;
}
const str1 = "Hello4 World65 123 !!";
const str2 = "123and 456 and 78-1";
const str3 = " Hello World !!";
console.log(getFirstCharacters(str1));
console.log(getFirstCharacters(str2));
console.log(getFirstCharacters(str3));
Output:
[ 'H', 'W', '1', '!' ]
[ '1', '4', 'a', '7' ]
[ 'H', 'W', '!' ]
This should do it.
var s = "Java Script Object Notation",
a = s.split(' '),
l = a.length,
i = 0,
n = "";
for (; i < l; ++i)
{
n += a[i].charAt(0);
}
console.log(n);
The regular expression versions for JavaScript is not compatible with Unicode on older than ECMAScript 6, so for those who want to support characters such as "å" will need to rely on non-regex versions of scripts.
Event when on version 6, you need to indicate Unicode with \u.
More details: https://mathiasbynens.be/notes/es6-unicode-regex
Yet another option using reduce function:
var value = "Java Script Object Notation";
var result = value.split(' ').reduce(function(previous, current){
return {v : previous.v + current[0]};
},{v:""});
$("#output").text(result.v);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<pre id="output"/>
This is similar to others, but (IMHO) a tad easier to read:
const getAcronym = title =>
title.split(' ')
.map(word => word[0])
.join('');
ES6 reduce way:
const initials = inputStr.split(' ').reduce((result, currentWord) =>
result + currentWord.charAt(0).toUpperCase(), '');
alert(initials);
Try This Function
const createUserName = function (name) {
const username = name
.toLowerCase()
.split(' ')
.map((elem) => elem[0])
.join('');
return username;
};
console.log(createUserName('Anisul Haque Bhuiyan'));

Categories

Resources