Any alternative way of using this .length & .split()?

Any alternative way of using this .length & .split()? - javascript

I want to split lower, upper & also the value of textBox without using .split() and also I want
to find the length of the string without using .length. Can anybody solve my problem I am tried but
I cannot find the exact logic for this problem.
var lowercase = "abcdefghijklmnopqrstuvwxyz";
var uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
function Print() {
var input = document.getElementById('demo').value;
document.write(document.getElementById('demo1').innerHTML = toUpper(input));
}
function toUpper(input) {
var upperCase = uppercase.split(""); //other way to split uppercase
var lowerCase = lowercase.split(""); //other way to split lowercase
var inputText = input.split(""); //other way to split input
var newText = "";
var found;
for (var i = 0; i < inputText.length; i++) { //not using .length to other way to find the size of inputText
found = false;
for (var ctr = 0; ctr < lowerCase.length; ctr++) { //not using .length other way to find the size of lowerCase
if (inputText[i] == lowerCase[ctr]) {
found = true;
break;
}
}
if (found) { //true
newText = newText + upperCase[ctr];
} else {
newText = newText + inputText[i];
}
}
return newText;
}

You can count the length of a string using the array function reduce.
Reduce loops over all elements in an array and executes a function you give it to reduce it to one value, you can read more here.
To get reduce working on strings, you need to use Array.from, like this:
Array.from(lowerCase).reduce((sum, carry) => sum + 1, 0) // 26
Reduce accepts a starting argument, which we set to zero here.
This way you do not need to use the split or length functions.
You don't need to check if the input is in a string either, you can use charCodeAt() and fromCharCode().
If you take your input and loop through it using Array.from() then forEach, you can get something which looks like this:
function print() {
const input = document.querySelector('#input').value;
document.querySelector('#target').value = stringToUpper(input);
}
function stringToUpper(input) {
let output = "";
Array.from(input).forEach(char => output += charToUpper(char));
return output;
}
function charToUpper(char) {
let code = char.charCodeAt(0);
code >= 97 && code <= 122 ? code -= 32 : code;
return String.fromCharCode(code);
}
<div>
<input id="input" placeholder="enter text here">
</div>
<button onclick="print()">To Upper</button>
<div>
<input id="target">
</div>
The key line is where we take the output and add the char (as upper) to it:
output += charToUpper(char)
If you don't know about arrow functions, you can read more here
This line:
code >= 97 && code <= 122 ? code -= 32 : code;
is just checking if the char is lower case (number between 97 and 122) and if so, subtracting 32 to get it to upper case.
The reason it is subtract not add is in utf-16, the chars are laid out like this:
ABCDEFGHIJKLMNOPQRTUWXYZabcdefghijklmnopqrtuwxyz
See here for more

I don't know what you mean by "split the value of textBox", but one way to determine the length of a string without using .length would be to use a for...of loop and have a counter increment each time it runs to keep track of the number of characters in the string.
let string = 'boo'
let lengthCounter = 0
for (let char of string) {
lengthCounter++
}
//lengthCounter = 3
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...of

You can define your own split and length functions:
function mySplit(a){
var counter = 0;
rslt = [];
var val = a[counter];
while(typeof val != "undefined"){
rslt.push(a[counter]);
counter ++;
val = a[counter];
}
return rslt;
}
function myLength(a){
var counter = 0;
var val = a[counter];
while(typeof val != "undefined"){
counter ++;
val = a[counter];
}
return counter;
}
Your function now should be like:
function toUpper(input) {
var upperCase = mySplit(uppercase);
var lowerCase = mySplit(lowercase);
var inputText = mySplit(input);
var newText = "";
var found;
for (var i = 0; i < myLength(inputText); i++) {
found = false;
for (var ctr = 0; ctr < myLength(lowerCase); ctr++) {
if (inputText[i] == lowerCase[ctr]) {
found = true;
break;
}
}
if (found) { //true
newText = newText + upperCase[ctr];
} else {
newText = newText + inputText[i];
}
}
return newText;
}

The simplest way would be to just use the build in function of javascript .toUpperCase() (see example 1). https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toUpperCase
Else if you insist on using a for.loop you may do so aswell (see example two). You do not need the split() function since a string already is an arrayof characters. Also be aware that not all characters in the web have lowercase counterparts, so the logic itself is flawed.
//REM: This lines are not required.
/*
var lowercase = "abcdefghijklmnopqrstuvwxyz";
var uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
function Print() {
var input = document.getElementById('demo').value;
document.write(document.getElementById('demo1').innerHTML = toUpper(input));
}
*/
//REM: Version 1 (using string.toUpperCase())
(function toUpper1(input){
var tReturn = (input || '').toUpperCase();
console.log('toUpper1', tReturn);
return tReturn
}('abcDEFghiJKL'));
//REM: Version 2 (using your way)
(function toUpper2(input){
var tReturn = '';
if(input && input.length){
for(let i=0, j=input.length; i<j; i++){
tReturn += (input[i] === input[i].toLowerCase()) ? input[i].toUpperCase() : input[i]
}
};
console.log('toUpper2', tReturn);
return tReturn
}('abcDEFghiJKL'));

Related

How can I extract all contained characters in a String? [duplicate]

I have a string with repeated letters. I want letters that are repeated more than once to show only once.
Example input: aaabbbccc
Expected output: abc
I've tried to create the code myself, but so far my function has the following problems:
if the letter doesn't repeat, it's not shown (it should be)
if it's repeated once, it's show only once (i.e. aa shows a - correct)
if it's repeated twice, shows all (i.e. aaa shows aaa - should be a)
if it's repeated 3 times, it shows 6 (if aaaa it shows aaaaaa - should be a)
function unique_char(string) {
var unique = '';
var count = 0;
for (var i = 0; i < string.length; i++) {
for (var j = i+1; j < string.length; j++) {
if (string[i] == string[j]) {
count++;
unique += string[i];
}
}
}
return unique;
}
document.write(unique_char('aaabbbccc'));
The function must be with loop inside a loop; that's why the second for is inside the first.

Fill a Set with the characters and concatenate its unique entries:
function unique(str) {
return String.prototype.concat.call(...new Set(str));
}
console.log(unique('abc')); // "abc"
console.log(unique('abcabc')); // "abc"

Convert it to an array first, then use Josh Mc’s answer at How to get unique values in an array, and rejoin, like so:
var nonUnique = "ababdefegg";
var unique = Array.from(nonUnique).filter(function(item, i, ar){ return ar.indexOf(item) === i; }).join('');
All in one line. :-)

Too late may be but still my version of answer to this post:
function extractUniqCharacters(str){
var temp = {};
for(var oindex=0;oindex<str.length;oindex++){
temp[str.charAt(oindex)] = 0; //Assign any value
}
return Object.keys(temp).join("");
}

You can use a regular expression with a custom replacement function:
function unique_char(string) {
return string.replace(/(.)\1*/g, function(sequence, char) {
if (sequence.length == 1) // if the letter doesn't repeat
return ""; // its not shown
if (sequence.length == 2) // if its repeated once
return char; // its show only once (if aa shows a)
if (sequence.length == 3) // if its repeated twice
return sequence; // shows all(if aaa shows aaa)
if (sequence.length == 4) // if its repeated 3 times
return Array(7).join(char); // it shows 6( if aaaa shows aaaaaa)
// else ???
return sequence;
});
}

Using lodash:
_.uniq('aaabbbccc').join(''); // gives 'abc'

Per the actual question: "if the letter doesn't repeat its not shown"
function unique_char(str)
{
var obj = new Object();
for (var i = 0; i < str.length; i++)
{
var chr = str[i];
if (chr in obj)
{
obj[chr] += 1;
}
else
{
obj[chr] = 1;
}
}
var multiples = [];
for (key in obj)
{
// Remove this test if you just want unique chars
// But still keep the multiples.push(key)
if (obj[key] > 1)
{
multiples.push(key);
}
}
return multiples.join("");
}
var str = "aaabbbccc";
document.write(unique_char(str));

Your problem is that you are adding to unique every time you find the character in string. Really you should probably do something like this (since you specified the answer must be a nested for loop):
function unique_char(string){
var str_length=string.length;
var unique='';
for(var i=0; i<str_length; i++){
var foundIt = false;
for(var j=0; j<unique.length; j++){
if(string[i]==unique[j]){
foundIt = true;
break;
}
}
if(!foundIt){
unique+=string[i];
}
}
return unique;
}
document.write( unique_char('aaabbbccc'))
In this we only add the character found in string to unique if it isn't already there. This is really not an efficient way to do this at all ... but based on your requirements it should work.
I can't run this since I don't have anything handy to run JavaScript in ... but the theory in this method should work.

Try this if duplicate characters have to be displayed once, i.e.,
for i/p: aaabbbccc o/p: abc
var str="aaabbbccc";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 ){
return obj;
}
}
).join("");
//output: "abc"
And try this if only unique characters(String Bombarding Algo) have to be displayed, add another "and" condition to remove the characters which came more than once and display only unique characters, i.e.,
for i/p: aabbbkaha o/p: kh
var str="aabbbkaha";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 && str.lastIndexOf(obj,i-1)==-1){ // another and condition
return obj;
}
}
).join("");
//output: "kh"

<script>
uniqueString = "";
alert("Displays the number of a specific character in user entered string and then finds the number of unique characters:");
function countChar(testString, lookFor) {
var charCounter = 0;
document.write("Looking at this string:<br>");
for (pos = 0; pos < testString.length; pos++) {
if (testString.charAt(pos) == lookFor) {
charCounter += 1;
document.write("<B>" + lookFor + "</B>");
} else
document.write(testString.charAt(pos));
}
document.write("<br><br>");
return charCounter;
}
function findNumberOfUniqueChar(testString) {
var numChar = 0,
uniqueChar = 0;
for (pos = 0; pos < testString.length; pos++) {
var newLookFor = "";
for (pos2 = 0; pos2 <= pos; pos2++) {
if (testString.charAt(pos) == testString.charAt(pos2)) {
numChar += 1;
}
}
if (numChar == 1) {
uniqueChar += 1;
uniqueString = uniqueString + " " + testString.charAt(pos)
}
numChar = 0;
}
return uniqueChar;
}
var testString = prompt("Give me a string of characters to check", "");
var lookFor = "startvalue";
while (lookFor.length > 1) {
if (lookFor != "startvalue")
alert("Please select only one character");
lookFor = prompt(testString + "\n\nWhat should character should I look for?", "");
}
document.write("I found " + countChar(testString, lookFor) + " of the<b> " + lookFor + "</B> character");
document.write("<br><br>I counted the following " + findNumberOfUniqueChar(testString) + " unique character(s):");
document.write("<br>" + uniqueString)
</script>

Here is the simplest function to do that
function remove(text)
{
var unique= "";
for(var i = 0; i < text.length; i++)
{
if(unique.indexOf(text.charAt(i)) < 0)
{
unique += text.charAt(i);
}
}
return unique;
}

The one line solution will be to use Set. const chars = [...new Set(s.split(''))];

If you want to return values in an array, you can use this function below.
const getUniqueChar = (str) => Array.from(str)
.filter((item, index, arr) => arr.slice(index + 1).indexOf(item) === -1);
console.log(getUniqueChar("aaabbbccc"));
Alternatively, you can use the Set constructor.
const getUniqueChar = (str) => new Set(str);
console.log(getUniqueChar("aaabbbccc"));

Here is the simplest function to do that pt. 2
const showUniqChars = (text) => {
let uniqChars = "";
for (const char of text) {
if (!uniqChars.includes(char))
uniqChars += char;
}
return uniqChars;
};

const countUnique = (s1, s2) => new Set(s1 + s2).size
a shorter way based on #le_m answer

let unique=myArray.filter((item,index,array)=>array.indexOf(item)===index)

Format string and deleting special characters using ASCII code doesn't work (JavaScript)

Im trying to format any given string using ASCII code for reference. The format must delete any special character except numbers, "-" or "_" and spaces. Here's the code:
function FormatString(sentence) {
result = new String();
j = 0;
sentence = sentence.toUpperCase();
i = 0;
while(i < sentence.length) {
if (
(sentence[i]>=65 && sentence[i]<=90) ||
(sentence[i]>=48 && sentence[j]<=57) ||
sentence[i]==32 || sentence[i]==45
) {
sentence[j] = result[i];
j = j + 1;
}
i = i + 1;
}
return result;
}
and then call the function
console.log(FormatString('No running in the hallways!!!'))
the output should be "No running in the hallways"

Provided that using Regular Expressions would be the best way to solve your problem, I am assuming you want to use ASCII comparison for a reason.
The following solution works:
function FormatString (sentence) {
let result = "";
const uppercaseSentence = sentence.toUpperCase();
for (let i=0; i<uppercaseSentence.length; i++) {
let cc = uppercaseSentence.charCodeAt(i);
if ((cc>=65 && cc<=90) || (cc>=48 && cc<=57) || cc==32 || cc==45) {
result += sentence[i];
}
}
return result;
}
I am reporting below your function and commenting what was wrong with it:
function FormatString(sentence) {
// you are declaring a global variable here.
// let result = ... or var result = ... makes it local
// In javascript you can use an string literal `""` in place of new String()
result = new String();
// again a global variable
j = 0;
// you are chaning the passed parameter to an uppercase string
// you lost the original string now
sentence = sentence.toUpperCase();
// again a global variable
i = 0;
// nothing wron with the while loop, only the for loop is more concise in
// this case
while(i < sentence.length) {
if (
// sentence[i] is a streing with length 1 and it returns false if
// compared with a number. Use instead sentence.charCodeAt(i) which
// returns the ASCII code of the i-th character
(sentence[i]>=65 && sentence[i]<=90) ||
// there is a j here in place of an i
(sentence[i]>=48 && sentence[j]<=57) ||
sentence[i]==32 || sentence[i]==45
) {
// you can't assign single characters in javascript
// what you want to do is result += sentence[i],
// but remember that sentence is now all uppercase and so will be
// result!
sentence[j] = result[i];
j = j + 1;
}
i = i + 1;
}
return result;
}

Maybe string.replace with regex will help in your need:
const regex = /[-_]/g
"afasf-afsaf_fafa".replace(regex,''); //"afasfafsaffafa"

Test if input values match constant value

Ive got an assignment and am a bit stuck.
Need to match an input string to the values in a constant, but I am matching individual characters.
My constant would be ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUWXYZ'
My input would be, var input = 'ABOZ'
I need a test to check if each letter in the input variable exist in the ALPHABET constant.
Hope I made sense.
Cheers

Here's a single line answer to your question:
(ALPHABET.match(new RegExp((input.split('').join('|')), 'g'))).length == input.length
which would return true only if all the characters in input are present in ALPHABET
Here's a working demo http://jsfiddle.net/kayen/akL4A/

One way is to loop over the input and search if it exits in the constant
Possible code
var ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUWXYZ';
var input = 'ABOZ'
var count = 0;
for(x in input) {
if(ALPHABET.indexOf(input[x])>-1){
count++;
continue;
}
else{
break;
}
}
if(count==input.length) {
alert("true");
}
Demo

Tested and works in Firefox 16. Remember this implementation does not verify if input is null or other defensive checks. You should do it by yourself.
This is a case sensitive result.
Case insensitive :
function validInput(input) {
var ALPHABET = "ABCDEFGHIJKLMNOPQRSTUWXYZ";
for (var i = 0; i < input.length; i++) {
var charAtI = input.charAt(i);
var indexOfCharAtI = ALPHABET.indexOf(charAtI);
if (indexOfCharAtI < 0) {
return false;
}
}
return true;
}
Case insensitive :
function validInput(input) {
var ALPHABET = "ABCDEFGHIJKLMNOPQRSTUWXYZ";
for (var i = 0; i < input.length; i++) {
var charAtI = input.charAt(i);
charAtI = charAtI.toUpperCase();
var indexOfCharAtI = ALPHABET.indexOf(charAtI);
if (indexOfCharAtI < 0) {
return false;
}
}
return true;
}

Here's an example of a function which would return true for a match or false for a mismatch. (Please note this is a case sensitive test).
var ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
var input = 'ABOZ';
function testStr(str, constant){
var matchFlag = true;
var strSplit = str.split("");
for(var i=0; i<strSplit.length;i++){
if(constant.indexOf(strSplit[i]) == -1){
matchFlag = false;
}
}
return matchFlag;
}
alert(testStr(input, ALPHABET)); //TRUE
DEMO

Javascript Function to split and return a value from a string

I am trying to grab a certain value. I am new to javascript and I can't figure out why this is not working.
If I parse "kid_2" I should get "kostas". Instead of "Kostas" I always get "02-23-2000". So I must have a logic problem in the loop but I am really stuck.
function getold_val(fieldname,str){
var chunks=str.split("||");
var allchunks = chunks.length-1;
for(k=0;k<allchunks;k++){
var n=str.indexOf(fieldname);
alert(chunks[k]);
if(n>0){
var chunkd=chunks[k].split("::");
alert(chunkd);
return chunkd[1];
}
}
}
var test = getold_val('kid_2','date_1::02-23-2000||date_2::06-06-1990||kid_1::George||kid_2::Kostas||');
alert(test);

A regex may be a little more appealing. Here's a fiddle:
function getValue(source, key){
return (new RegExp("(^|\\|)" + key + "::([^$\\|]+)", "i").exec(source) || {})[2];
}
getValue("date_1::02-23-2000||date_2::06-06-1990||kid_1::George||kid_2::Kostas||","kid_2");
But if you want something a little more involved, you can parse that string into a dictionary like so (fiddle):
function splitToDictionary(val, fieldDelimiter, valueDelimiter){
var dict = {},
fields = val.split(fieldDelimiter),
kvp;
for (var i = 0; i < fields.length; i++) {
if (fields[i] !== "") {
kvp = fields[i].split(valueDelimiter);
dict[kvp[0]] = kvp[1];
}
}
return dict;
}
var dict = splitToDictionary("date_1::02-23-2000||date_2::06-06-1990||kid_1::George||kid_2::Kostas||","||","::");
console.log(dict["date_1"]);
console.log(dict["date_2"]);
console.log(dict["kid_1"]);
console.log(dict["kid_2"]);

This works, here's my fiddle.
function getold_val(fieldname,str) {
var chunks = str.split('||');
for(var i = 0; i < chunks.length-1; i++) {
if(chunks[i].indexOf(fieldname) >= 0) {
return(chunks[i].substring(fieldname.length+2));
}
}
}
alert(getold_val('kid_2', 'date_1::02-23-2000||date_2::06-06-1990||kid_1::George||kid_2::Kostas||'));
The issue with your code was (as #slebetman noticed as well) the fact that a string index can be 0 because it starts exactly in the first letter.
The code is almost the same as yours, I just didn't use the second .split('::') because I felt a .substring(...) would be easier.

There are two bugs. The first error is in the indexOf call:
var n = str.indexOf(fieldname);
This will always return a value greater than or equal to 0 since the field exists in the string. What you should be doing is:
var n = chunks[k].indexOf(fieldname);
The second error is in your if statement. It should be:
if(n >= 0) {
...
}
or
if(n > -1) {
...
}
The substring you are looking for could very well be the at the beginning of the string, in which case its index is 0. indexOf returns -1 if it cannot find what you're looking for.
That being said, here's a better way to do what you're trying to do:
function getold_val(fieldName, str) {
var keyValuePairs = str.split("||");
var returnValue = null;
if(/||$/.match(str)) {
keyValuePairs = keyValuePairs.slice(0, keyValuePairs.length - 1);
}
var found = false;
var i = 0;
while(i < keyValuePairs.length && !found) {
var keyValuePair = keyValuePairs[i].split("::");
var key = keyValuePair[0];
var value = keyValuePair[1];
if(fieldName === key) {
returnValue = value;
found = true;
}
i++;
}
return returnValue;
}

JavaScript strings outside of the BMP

BMP being Basic Multilingual Plane
According to JavaScript: the Good Parts:
JavaScript was built at a time when Unicode was a 16-bit character set, so all characters in JavaScript are 16 bits wide.
This leads me to believe that JavaScript uses UCS-2 (not UTF-16!) and can only handle characters up to U+FFFF.
Further investigation confirms this:
> String.fromCharCode(0x20001);
The fromCharCode method seems to only use the lowest 16 bits when returning the Unicode character. Trying to get U+20001 (CJK unified ideograph 20001) instead returns U+0001.
Question: is it at all possible to handle post-BMP characters in JavaScript?
2011-07-31: slide twelve from Unicode Support Shootout: The Good, The Bad, & the (mostly) Ugly covers issues related to this quite well:

Depends what you mean by ‘support’. You can certainly put non-UCS-2 characters in a JS string using surrogates, and browsers will display them if they can.
But, each item in a JS string is a separate UTF-16 code unit. There is no language-level support for handling full characters: all the standard String members (length, split, slice etc) all deal with code units not characters, so will quite happily split surrogate pairs or hold invalid surrogate sequences.
If you want surrogate-aware methods, I'm afraid you're going to have to start writing them yourself! For example:
String.prototype.getCodePointLength= function() {
return this.length-this.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g).length+1;
};
String.fromCodePoint= function() {
var chars= Array.prototype.slice.call(arguments);
for (var i= chars.length; i-->0;) {
var n = chars[i]-0x10000;
if (n>=0)
chars.splice(i, 1, 0xD800+(n>>10), 0xDC00+(n&0x3FF));
}
return String.fromCharCode.apply(null, chars);
};

I came to the same conclusion as bobince. If you want to work with strings containing unicode characters outside of the BMP, you have to reimplement javascript's String methods. This is because javascript counts characters as each 16-bit code value. Symbols outside of the BMP need two code values to be represented. You therefore run into a case where some symbols count as two characters and some count only as one.
I've reimplemented the following methods to treat each unicode code point as a single character: .length, .charCodeAt, .fromCharCode, .charAt, .indexOf, .lastIndexOf, .splice, and .split.
You can check it out on jsfiddle: http://jsfiddle.net/Y89Du/
Here's the code without comments. I tested it, but it may still have errors. Comments are welcome.
if (!String.prototype.ucLength) {
String.prototype.ucLength = function() {
// this solution was taken from
// http://stackoverflow.com/questions/3744721/javascript-strings-outside-of-the-bmp
return this.length - this.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g).length + 1;
};
}
if (!String.prototype.codePointAt) {
String.prototype.codePointAt = function (ucPos) {
if (isNaN(ucPos)){
ucPos = 0;
}
var str = String(this);
var codePoint = null;
var pairFound = false;
var ucIndex = -1;
var i = 0;
while (i < str.length){
ucIndex += 1;
var code = str.charCodeAt(i);
var next = str.charCodeAt(i + 1);
pairFound = (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF);
if (ucIndex == ucPos){
codePoint = pairFound ? ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000 : code;
break;
} else{
i += pairFound ? 2 : 1;
}
}
return codePoint;
};
}
if (!String.fromCodePoint) {
String.fromCodePoint = function () {
var strChars = [], codePoint, offset, codeValues, i;
for (i = 0; i < arguments.length; ++i) {
codePoint = arguments[i];
offset = codePoint - 0x10000;
if (codePoint > 0xFFFF){
codeValues = [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)];
} else{
codeValues = [codePoint];
}
strChars.push(String.fromCharCode.apply(null, codeValues));
}
return strChars.join("");
};
}
if (!String.prototype.ucCharAt) {
String.prototype.ucCharAt = function (ucIndex) {
var str = String(this);
var codePoint = str.codePointAt(ucIndex);
var ucChar = String.fromCodePoint(codePoint);
return ucChar;
};
}
if (!String.prototype.ucIndexOf) {
String.prototype.ucIndexOf = function (searchStr, ucStart) {
if (isNaN(ucStart)){
ucStart = 0;
}
if (ucStart < 0){
ucStart = 0;
}
var str = String(this);
var strUCLength = str.ucLength();
searchStr = String(searchStr);
var ucSearchLength = searchStr.ucLength();
var i = ucStart;
while (i < strUCLength){
var ucSlice = str.ucSlice(i,i+ucSearchLength);
if (ucSlice == searchStr){
return i;
}
i++;
}
return -1;
};
}
if (!String.prototype.ucLastIndexOf) {
String.prototype.ucLastIndexOf = function (searchStr, ucStart) {
var str = String(this);
var strUCLength = str.ucLength();
if (isNaN(ucStart)){
ucStart = strUCLength - 1;
}
if (ucStart >= strUCLength){
ucStart = strUCLength - 1;
}
searchStr = String(searchStr);
var ucSearchLength = searchStr.ucLength();
var i = ucStart;
while (i >= 0){
var ucSlice = str.ucSlice(i,i+ucSearchLength);
if (ucSlice == searchStr){
return i;
}
i--;
}
return -1;
};
}
if (!String.prototype.ucSlice) {
String.prototype.ucSlice = function (ucStart, ucStop) {
var str = String(this);
var strUCLength = str.ucLength();
if (isNaN(ucStart)){
ucStart = 0;
}
if (ucStart < 0){
ucStart = strUCLength + ucStart;
if (ucStart < 0){ ucStart = 0;}
}
if (typeof(ucStop) == 'undefined'){
ucStop = strUCLength - 1;
}
if (ucStop < 0){
ucStop = strUCLength + ucStop;
if (ucStop < 0){ ucStop = 0;}
}
var ucChars = [];
var i = ucStart;
while (i < ucStop){
ucChars.push(str.ucCharAt(i));
i++;
}
return ucChars.join("");
};
}
if (!String.prototype.ucSplit) {
String.prototype.ucSplit = function (delimeter, limit) {
var str = String(this);
var strUCLength = str.ucLength();
var ucChars = [];
if (delimeter == ''){
for (var i = 0; i < strUCLength; i++){
ucChars.push(str.ucCharAt(i));
}
ucChars = ucChars.slice(0, 0 + limit);
} else{
ucChars = str.split(delimeter, limit);
}
return ucChars;
};
}

More recent JavaScript engines have String.fromCodePoint.
const ideograph = String.fromCodePoint( 0x20001 ); // outside the BMP
Also a code-point iterator, which gets you the code-point length.
function countCodePoints( str )
{
const i = str[Symbol.iterator]();
let count = 0;
while( !i.next().done ) ++count;
return count;
}
console.log( ideograph.length ); // gives '2'
console.log( countCodePoints(ideograph) ); // '1'

Yes, you can. Although support to non-BMP characters directly in source documents is optional according to the ECMAScript standard, modern browsers let you use them. Naturally, the document encoding must be properly declared, and for most practical purposes you would need to use the UTF-8 encoding. Moreover, you need an editor that can handle UTF-8, and you need some input method(s); see e.g. my Full Unicode Input utility.
Using suitable tools and settings, you can write var foo = '𠀁'.
The non-BMP characters will be internally represented as surrogate pairs, so each non-BMP character counts as 2 in the string length.

Using for (c of this) instruction, one can make various computations on a string that contains non-BMP characters. For instance, to compute the string length, and to get the nth character of the string:
String.prototype.magicLength = function()
{
var c, k;
k = 0;
for (c of this) // iterate each char of this
{
k++;
}
return k;
}
String.prototype.magicCharAt = function(n)
{
var c, k;
k = 0;
for (c of this) // iterate each char of this
{
if (k == n) return c + "";
k++;
}
return "";
}

This old topic has now a simple solution in ES6:
Split characters into an array
simple version
[..."😴😄😃⛔🎠🚓🚇"] // ["😴", "😄", "😃", "⛔", "🎠", "🚓", "🚇"]
Then having each one separated you can handle them easily for most common cases.
Credit: DownGoat
Full solution
To overcome special emojis as the one in the comment, one can search for the connection charecter (char code 8205 in UTF-16) and make some modifications. Here is how:
let myStr = "👩‍👩‍👧‍👧😃𝌆"
let arr = [...myStr]
for (i = arr.length-1; i--; i>= 0) {
if (arr[i].charCodeAt(0) == 8205) { // special combination character
arr[i-1] += arr[i] + arr[i+1]; // combine them back to a single emoji
arr.splice(i, 2)
}
}
console.log(arr.length) //3
Haven't found a case where this doesn't work. Comment if you do.
To conclude
it seems that JS uses the 8205 char code to represent UCS-2 characters as a UTF-16 combinations.

Develop Reference

JavaScript is the programming language of the Web.

Any alternative way of using this .length & .split()? - javascript

Related

How can I extract all contained characters in a String? [duplicate]

Format string and deleting special characters using ASCII code doesn't work (JavaScript)

Test if input values match constant value

Javascript Function to split and return a value from a string

JavaScript strings outside of the BMP

Categories

Resources