How to compress string by JS? [duplicate] - javascript

This question already has answers here:
Find the characters in a string which are not duplicated
(29 answers)
Closed 2 years ago.
trying writing func for compressing string(2 equal letter become 1). Don't get how to save the progress of compressing(trying different combo with concat - unsuccessful). Every time my for take full string
CodePen
let pass = '1122333456';
function CompressPass(string) {
for (let i = 0; i < string.length; i++) {
let concisePassword = string.split('');
const item = string[i];
const nextItem = string[i + 1];
console.log(+item, +nextItem);
if (item === nextItem) {
concisePassword.splice(i, 1);
console.log(concisePassword);
} else {
console.log('not equal: ', +item, +nextItem);
}
};
}
CompressPass(pass);

maybe this help you
let pass = '11223322345666600222222';
function CompressPass (string) {
string += '-';
let str = '';
for (let i = 1; i < string.length; i++) {
const nextItem = string[i];
const item = string[i - 1];
if (item != nextItem){
str += item;
}
};
return str;
}
console.log(CompressPass(pass))

you can try 'lz-string' for compressing and de-decompressing string in javascript.
** I used that for compressing local storage data ( as local storage has only a 5MB limit )
** the results may not be visible for shorter strings, but you can try it.
link: https://pieroxy.net/blog/pages/lz-string/index.html
example : (from this above-mentioned link )
<script language="javascript" src="lz-string.js"></script>
<script>
var string = "This is my compression test.";
alert("Size of sample is: " + string.length);
var compressed = LZString.compress(string);
alert("Size of compressed sample is: " + compressed.length);
string = LZString.decompress(compressed);
alert("Sample is: " + string);
</script>

Related

How to optimize the code? (reverse string) [duplicate]

This question already has answers here:
Reversing string without affecting any special characters
(7 answers)
Reverse string without reversing special characters using Javascript
(3 answers)
Closed 1 year ago.
The task says : Change the order of words in a sentence but let non-alphanumerical characters stay in the same place e.g: 'Hello, it is world here.' -> "here, world is it Hello."
So far i have created something like this, but i need to optimize it (there are too many loops).
var text = 'Hello, it is world here.';
function reverseFunc(text){
let alpha = text.match(/[a-z0-9]+/gi).reverse();
let nonAlpha = text.match(/[\W_]+/gi);
let result = [];
console.log("Alpha : " + alpha)
console.log("Non alpha : " +nonAlpha)
if(alpha == null || nonAlpha == null) console.log(text)
else if((/[a-z0-9]/i).test(text.split("")[0])){
if (alpha.length == nonAlpha.length) {
for (let i = 0; i < nonAlpha.length; i++) {
result.push(alpha[i], nonAlpha[i]);
}
console.log(result.join(""))
return(result.join(""))
}else{
for (let i = 0; i < alpha.length; i++) {
result.push(alpha[i], nonAlpha[i]);
}
console.log(result.join(""))
return(result.join(""))
}
}else{
if (nonAlpha.length == alpha.length) {
for (let i = 0; i < alpha.length; i++) {
result.push(nonAlpha[i], alpha[i]);
}
console.log(result.join(""))
return(result.join(""))
}else{
for (let i = 0; i < nonAlpha.length; i++) {
result.push(nonAlpha[i], alpha[i]);
}
console.log(result.join(""))
return(result.join(""))
}
}
}
reverseFunc(text)
I also have a problem when the text is only 1 char long e.g ".", or " ". I tried this:
if(alpha == null || nonAlpha == null) console.log(text)
but it seems like it is working only for alphanumeric chars.How could this algorithm be corrected?

Perform a merge on two strings

I'm trying to build a collaborative doc editor and implement operational transformation. Imagine we have a string that is manipulated simultaneously by 2 users. They can only add characters, not remove them. We want to incorporate both of their changes.
The original string is: catspider
The first user does this: cat<span id>spider</span>
The second user does this: c<span id>atspi</span>der
I'm trying to write a function that will produce: c<span id>at<span id>spi</span>der</span>
The function I've written is close, but it produces c<span id>at<span i</span>d>spider</span> codepen here
String.prototype.splice = function(start, newSubStr) {
return this.slice(0, start) + newSubStr + this.slice(start);
};
function merge(saved, working, requested) {
if (!saved || !working || !requested) {
return false;
}
var diffSavedWorking = createDiff(working, saved);
var diffRequestedWorking = createDiff(working, requested);
var newStr = working;
for (var i = 0; i < Math.max(diffRequestedWorking.length, diffSavedWorking.length); i++) {
//splice does an insert `before` -- we will assume that the saved document characters
//should always appear before the requested document characters in this merger operation
//so we first insert requested and then saved, which means that the final string will have the
//original characters first.
if (diffRequestedWorking[i]) {
newStr = newStr.splice(i, diffRequestedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffRequestedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
if (diffSavedWorking[i]) {
newStr = newStr.splice(i, diffSavedWorking[i]);
//we need to update the merge arrays by the number of
//inserted characters.
var length = diffSavedWorking[i].length;
insertNatX(diffSavedWorking, length, i + 1);
insertNatX(diffRequestedWorking, length, i + 1);
}
}
return newStr;
}
//arr1 should be the shorter array.
//returns inserted characters at their
//insertion index.
function createDiff(arr1, arr2) {
var diff = [];
var j = 0;
for (var i = 0; i < arr1.length; i++) {
diff[i] = "";
while (arr2[j] !== arr1[i]) {
diff[i] += arr2[j];
j++;
}
j++;
}
var remainder = arr2.substr(j);
if (remainder) diff[i] = remainder;
return diff;
}
function insertNatX(arr, length, pos) {
for (var j = 0; j < length; j++) {
arr.splice(pos, 0, "");
}
}
var saved = 'cat<span id>spider</span>';
var working = 'catspider';
var requested = 'c<span id>atspi</span>der';
console.log(merge(saved, working, requested));
Would appreciate any thoughts on a better / simpler way to achieve this.

Get the next key from array from string with symbols

I'm working on a simple but difficult problem for me right now, I'm use to work in jQuery but need this to be done in Javascript.
So simple as it is, the user inputs a string lets say:
"hey, wanna hang today?". It should output the next character in my array, so it would be like this: "ifz, xboob iboh upebz?".
And I have tried everything I can come up with. Hopefully some of you guys see the problem right away.
I have set up a short jsFiddle that shows similar to what I got.
function gen() {
var str = document.getElementById('str').value,
output = document.getElementById('output');
var alph = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','æ','ø','å','a'];
for (var i=0;i<str.length;i++) {
var index = str[i].charAt(0),
e = alph.indexOf(index);
console.log(alph[e + 1]);
output.innerHTML += alph[e + 1];
}
}
If you only want to skip to next letter with those chars and leave the others like space and ? as they are:
var index = str[i].charAt(0),
e = alph.indexOf(index);
if(e == -1){
output.innerHTML += index;
}else{
output.innerHTML += alph[e + 1];
}
Update: using #David Thomas method, you could do the following: (wouldnt work for 'å' though)
var index= str[i].toLowerCase().charCodeAt(0);
if((index > 96 && index < 123)){ // a to z
output.innerHTML += String.fromCharCode(str[i].charCodeAt(0)+1);
}else{
output.innerHTML += str[i];
}
}
I'd personally recommend the following approach, which should work with any alphabet for which there's a Unicode representation and, somewhat importantly, doesn't require a hard-coded array of letters/punctuation for each language:
function gen() {
var str = document.getElementById('str').value,
strTo = '',
output = document.getElementById('output');
for (var i = 0; i < str.length; i++) {
strTo += String.fromCharCode(str[i].charCodeAt(0) + 1);
}
output.textContent = strTo;
}
// hey, wanna hang today? -> ifz-!xboob!iboh!upebz#
JS Fiddle demo.
References:
String.prototype.charCodeAt().
String.prototype.fromCharCode().
Why does gen(',') === 'a'?
var alph = 'abcdefghijklmnopqrstuvwxyz';
var e = alph.indexOf(',');
console.log(e);
// -1
console.log(alph[e + 1]);
// 'a'
You need to take this case into account; otherwise, any characters that aren't in alph will map to 'a'.
(I see that you've also duplicated 'a' at the start and end of alph. This works, though it's more common either to use the modulus operator % or to check explicitly if e === alph.length - 1.)
You just have to add an array with the non respected characters:
var ex = ['?','!',' ','%','$','&','/']
In whole
for (var i=0;i<str.length;i++) {
var index = str[i].charAt(0)
if (alph.indexOf(index) >-1) {
var e = alph.indexOf(index);
output.innerHTML += alph[e + 1];
} else {
var e = index;
output.innerHTML += e;
}
}
JSFIDDLE: http://jsfiddle.net/TRNCFRMCN/hs15f0kd/8/.

How to convert text to binary code in JavaScript?

I want JavaScript to translate text in a textarea into binary code.
For example, if a user types in "TEST" into the textarea, the value "01010100 01000101 01010011 01010100" should be returned.
I would like to avoid using a switch statement to assign each character a binary code value (e.g. case "T": return "01010100) or any other similar technique.
Here's a JSFiddle to show what I mean. Is this possible in native JavaScript?
What you should do is convert every char using charCodeAt function to get the Ascii Code in decimal. Then you can convert it to Binary value using toString(2):
function convert() {
var output = document.getElementById("ti2");
var input = document.getElementById("ti1").value;
output.value = "";
for (var i = 0; i < input.length; i++) {
output.value += input[i].charCodeAt(0).toString(2) + " ";
}
}
<input id="ti1" value ="TEST"/>
<input id="ti2"/>
<button onClick="convert();">Convert!</button>
And here's a fiddle: http://jsfiddle.net/fA24Y/1/
This might be the simplest you can get:
function text2Binary(string) {
return string.split('').map(function (char) {
return char.charCodeAt(0).toString(2);
}).join(' ');
}
traverse the string
convert every character to their char code
convert the char code to binary
push it into an array and add the left 0s
return a string separated by space
Code:
function textToBin(text) {
var length = text.length,
output = [];
for (var i = 0;i < length; i++) {
var bin = text[i].charCodeAt().toString(2);
output.push(Array(8-bin.length+1).join("0") + bin);
}
return output.join(" ");
}
textToBin("!a") => "00100001 01100001"
Another way
function textToBin(text) {
return (
Array
.from(text)
.reduce((acc, char) => acc.concat(char.charCodeAt().toString(2)), [])
.map(bin => '0'.repeat(8 - bin.length) + bin )
.join(' ')
);
}
Here's a pretty generic, native implementation, that I wrote some time ago,
// ABC - a generic, native JS (A)scii(B)inary(C)onverter.
// (c) 2013 Stephan Schmitz <eyecatchup#gmail.com>
// License: MIT, http://eyecatchup.mit-license.org
// URL: https://gist.github.com/eyecatchup/6742657
var ABC = {
toAscii: function(bin) {
return bin.replace(/\s*[01]{8}\s*/g, function(bin) {
return String.fromCharCode(parseInt(bin, 2))
})
},
toBinary: function(str, spaceSeparatedOctets) {
return str.replace(/[\s\S]/g, function(str) {
str = ABC.zeroPad(str.charCodeAt().toString(2));
return !1 == spaceSeparatedOctets ? str : str + " "
})
},
zeroPad: function(num) {
return "00000000".slice(String(num).length) + num
}
};
and to be used as follows:
var binary1 = "01100110011001010110010101101100011010010110111001100111001000000110110001110101011000110110101101111001",
binary2 = "01100110 01100101 01100101 01101100 01101001 01101110 01100111 00100000 01101100 01110101 01100011 01101011 01111001",
binary1Ascii = ABC.toAscii(binary1),
binary2Ascii = ABC.toAscii(binary2);
console.log("Binary 1: " + binary1);
console.log("Binary 1 to ASCII: " + binary1Ascii);
console.log("Binary 2: " + binary2);
console.log("Binary 2 to ASCII: " + binary2Ascii);
console.log("Ascii to Binary: " + ABC.toBinary(binary1Ascii)); // default: space-separated octets
console.log("Ascii to Binary /wo spaces: " + ABC.toBinary(binary1Ascii, 0)); // 2nd parameter false to not space-separate octets
Source is on Github (gist): https://gist.github.com/eyecatchup/6742657
Hope it helps. Feel free to use for whatever you want (well, at least for whatever MIT permits).
var PADDING = "00000000"
var string = "TEST"
var resultArray = []
for (var i = 0; i < string.length; i++) {
var compact = string.charCodeAt(i).toString(2)
var padded = compact.substring(0, PADDING.length - compact.length) + compact
resultArray.push(padded)
}
console.log(resultArray.join(" "))
The other answers will work for most cases. But it's worth noting that charCodeAt() and related don't work with UTF-8 strings (that is, they throw errors if there are any characters outside the standard ASCII range). Here's a workaround.
// UTF-8 to binary
var utf8ToBin = function( s ){
s = unescape( encodeURIComponent( s ) );
var chr, i = 0, l = s.length, out = '';
for( ; i < l; i ++ ){
chr = s.charCodeAt( i ).toString( 2 );
while( chr.length % 8 != 0 ){ chr = '0' + chr; }
out += chr;
}
return out;
};
// Binary to UTF-8
var binToUtf8 = function( s ){
var i = 0, l = s.length, chr, out = '';
for( ; i < l; i += 8 ){
chr = parseInt( s.substr( i, 8 ), 2 ).toString( 16 );
out += '%' + ( ( chr.length % 2 == 0 ) ? chr : '0' + chr );
}
return decodeURIComponent( out );
};
The escape/unescape() functions are deprecated. If you need polyfills for them, you can check out the more comprehensive UTF-8 encoding example found here: http://jsfiddle.net/47zwb41o
Just a hint into the right direction
var foo = "TEST",
res = [ ];
foo.split('').forEach(function( letter ) {
var bin = letter.charCodeAt( 0 ).toString( 2 ),
padding = 8 - bin.length;
res.push( new Array( padding+1 ).join( '0' ) + bin );
});
console.log( res );
8-bit characters with leading 0
'sometext'
.split('')
.map((char) => '00'.concat(char.charCodeAt(0).toString(2)).slice(-8))
.join(' ');
If you need 6 or 7 bit, just change .slice(-8)
Thank you Majid Laissi for your answer
I made 2 functions out from your code:
the goal was to implement convertation of string to VARBINARY, BINARY and back
const stringToBinary = function(string, maxBytes) {
//for BINARY maxBytes = 255
//for VARBINARY maxBytes = 65535
let binaryOutput = '';
if (string.length > maxBytes) {
string = string.substring(0, maxBytes);
}
for (var i = 0; i < string.length; i++) {
binaryOutput += string[i].charCodeAt(0).toString(2) + ' ';
}
return binaryOutput;
};
and backward convertation:
const binaryToString = function(binary) {
const arrayOfBytes = binary.split(' ');
let stringOutput = '';
for (let i = 0; i < arrayOfBytes.length; i++) {
stringOutput += String.fromCharCode(parseInt(arrayOfBytes[i], 2));
}
return stringOutput;
};
and here is a working example: https://jsbin.com/futalidenu/edit?js,console
Provided you're working in node or a browser with BigInt support, this version cuts costs by saving the expensive string construction for the very end:
const zero = 0n
const shift = 8n
function asciiToBinary (str) {
const len = str.length
let n = zero
for (let i = 0; i < len; i++) {
n = (n << shift) + BigInt(str.charCodeAt(i))
}
return n.toString(2).padStart(len * 8, 0)
}
It's about twice as fast as the other solutions mentioned here including this simple es6+ implementation:
const toBinary = s => [...s]
.map(x => x
.codePointAt()
.toString(2)
.padStart(8,0)
)
.join('')
If you need to handle unicode characters, here's this guy:
const zero = 0n
const shift = 8n
const bigShift = 16n
const byte = 255n
function unicodeToBinary (str) {
const len = str.length
let n = zero
for (let i = 0; i < len; i++) {
const bits = BigInt(str.codePointAt(i))
n = (n << (bits > byte ? bigShift : shift)) + bits
}
const bin = n.toString(2)
return bin.padStart(8 * Math.ceil(bin.length / 8), 0)
}
this seems to be the simplified version
Array.from('abc').map((each)=>each.charCodeAt(0).toString(2)).join(" ")
This is as short as you can get. It's based on the top-rated answer but transformed to a reduce function.
"TEST".split("").reduce(function (a, b) { return a + b.charCodeAt(0).toString(2)}, "")
const textToBinary = (string) => {
return string.split('').map((char) =>
char.charCodeAt().toString(2)).join(' ');
}
console.log(textToBinary('hello world'))
var UTF8ToBin=function(f){for(var a,c=0,d=(f=unescape(encodeURIComponent(f))).length,b="";c<d;c++){for(a=f.charCodeAt(c).toString(2);a.length%8!=0;){a="0"+a}b+=a}return b},binToUTF8=function(f){for(var a,c=0,d=f.length,b="";c<d;c+=8){b+="%"+((a=parseInt(f.substr(c,8),2).toString(16)).length%2==0?a:"0"+a)}return decodeURIComponent(b)};
This is a small minified JavaScript Code to convert UTF8 to Binary and Vice versa.
This is a solution for UTF-8-based textual binary representation. It leverages TextEncoder, which encodes a string to its UTF-8 bytes.
This solution separates characters by spaces. The individual "byte-bits" of multi-byte characters are separated by a minus character (-).
// inspired by https://stackoverflow.com/a/40031979/923560
function stringToUtf8BinaryRepresentation(inputString) {
const result = Array.from(inputString).map(
char => [... new TextEncoder().encode(char)].map(
x => x.toString(2).padStart(8, '0')
).join('-')
).join(' ');
return result;
}
// ### example usage #########################
function print(inputString) {
console.log("--------------");
console.log(inputString);
console.log(stringToUtf8BinaryRepresentation(inputString));
}
// compare with https://en.wikipedia.org/wiki/UTF-8#Encoding
// compare with https://en.wikipedia.org/wiki/UTF-8#Codepage_layout
// compare with UTF-16, which JavaScript uses for strings: https://en.wikipedia.org/wiki/UTF-16#Examples
print("TEST");
print("hello world");
print("$");
print("£");
print("€");
print("한");
print("𐍈");
print("παράδειγμα");
print("🤡");
print("👨‍👩‍👧‍👦");
print("👩🏻‍🤝‍🧑🏿");
print("🇺🇦");
use the code: 'text'.split('').map(e=>{return e.charCodeAt(0).toString(2)}) e.g.-
const text='some text';
const output=text.split('').map(e=>{return e.charCodeAt(0).toString(2)})
Simple using Buffer
const text = "TEST";
[...Buffer.from(text).values()] // [ 84, 69, 83, 84 ]
.map(byte => byte.toString(2).padStart(8, 0)) // [ '01010100', '01000101', '01010011', '01010100' ]
.join(' ') // '01010100 01000101 01010011 01010100'
The shortest and simplest solution:
"x".charCodeAt().toString(2) // 1111000
String.charCodeAt() charCodeAt(0) returns unicode: "x".charCodeAt() // 120
Object.toString() charCodeAt().toString(2) converts unicode to binary.
For multiple string characters:
[..."Tesla"].map((i) => i.charCodeAt().toString(2)).join(" ");
// 1010100 1100101 1110011 1101100 1100001
Spread syntax (...)
[..."Tesla"] // ['T', 'e', 's', 'l', 'a']
Array.map()
[..."Tesla"].map((i) => i.charCodeAt()) // [84, 101, 115, 108, 97]
Array.join() Put a space " " after each element in the array map(i) and convert the array to string.
I'm pretty sure that you can do something like this:
Returns a STRING:
const toBinary = (str)=>{
let r = []
for (let i=0; i<str.length; i++) {
r.push(str.charCodeAt(i).toString(2));
}
return r.join("");
}
Or, as an int:
const toBinary = (str)=>{
let r = []
for (let i=0; i<str.length; i++) {
r.push(str.charCodeAt(i).toString(2));
}
return parseInt(r.join(""));
}

What's the best way to count keywords in JavaScript?

What's the best and most efficient way to count keywords in JavaScript? Basically, I'd like to take a string and get the top N words or phrases that occur in the string, mainly for the use of suggesting tags. I'm looking more for conceptual hints or links to real-life examples than actual code, but I certainly wouldn't mind if you'd like to share code as well. If there are particular functions that would help, I'd also appreciate that.
Right now I think I'm at using the split() function to separate the string by spaces and then cleaning punctuation out with a regular expression. I'd also want it to be case-insensitive.
Cut, paste + execute demo:
var text = "Text to be examined to determine which n words are used the most";
// Find 'em!
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
// Sort 'em!
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
// Come back any time, straaanger!
var n = 10;
var message = ["The top " + n + " words are:"];
for (var i = 0; i < n; i++)
{
message.push(wordList[i][0] + " - " + wordList[i][1] + " occurance" +
(wordList[i][1] == 1 ? "" : "s"));
}
alert(message.join("\n"));
Reusable function:
function getTopNWords(text, n)
{
var wordRegExp = /\w+(?:'\w{1,2})?/g;
var words = {};
var matches;
while ((matches = wordRegExp.exec(text)) != null)
{
var word = matches[0].toLowerCase();
if (typeof words[word] == "undefined")
{
words[word] = 1;
}
else
{
words[word]++;
}
}
var wordList = [];
for (var word in words)
{
if (words.hasOwnProperty(word))
{
wordList.push([word, words[word]]);
}
}
wordList.sort(function(a, b) { return b[1] - a[1]; });
var topWords = [];
for (var i = 0; i < n; i++)
{
topWords.push(wordList[i][0]);
}
return topWords;
}
Once you have that array of words cleaned up, and let's say you call it wordArray:
var keywordRegistry = {};
for(var i = 0; i < wordArray.length; i++) {
if(keywordRegistry.hasOwnProperty(wordArray[i]) == false) {
keywordRegistry[wordArray[i]] = 0;
}
keywordRegistry[wordArray[i]] = keywordRegistry[wordArray[i]] + 1;
}
// now keywordRegistry will have, as properties, all of the
// words in your word array with their respective counts
// this will alert (choose something better than alert) all words and their counts
for(var keyword in keywordRegistry) {
alert("The keyword '" + keyword + "' occurred " + keywordRegistry[keyword] + " times");
}
That should give you the basics of doing this part of the work.
Try to split you string on words and count the resulting words, then sort on the counts.
This builds upon a previous answer by insin by only having one loop:
function top_words(text, n) {
// Split text on non word characters
var words = text.toLowerCase().split(/\W+/)
var positions = new Array()
var word_counts = new Array()
for (var i=0; i<words.length; i++) {
var word = words[i]
if (!word) {
continue
}
if (typeof positions[word] == 'undefined') {
positions[word] = word_counts.length
word_counts.push([word, 1])
} else {
word_counts[positions[word]][1]++
}
}
// Put most frequent words at the beginning.
word_counts.sort(function (a, b) {return b[1] - a[1]})
// Return the first n items
return word_counts.slice(0, n)
}
// Let's see if it works.
var text = "Words in here are repeated. Are repeated, repeated!"
alert(top_words(text, 3))
The result of the example is: [['repeated',3], ['are',2], ['words', 1]]
I would do exactly what you have mentioned above to isolate each word. I would then probably add each word as the index of an array with the number of occurrences as the value.
For example:
var a = new Array;
a[word] = a[word]?a[word]+1:1;
Now you know how many unique words there are (a.length) and how many occurrences of each word existed (a[word]).

Categories

Resources