Protecting Punctuation in Custom Uppercase-Conversion Function in JavaScript - javascript

function func1(word) {
matches = word.match(/([A-Z][a-z]+[-!$%^&*()_+|~=`{}\[\]:";'<>?,.\/]*)/g);
if (!matches || matches.length === 1) {
return word.toUpperCase(); }
else return func2(matches) }
function func2(matched) {
x = (matches.length) - 1;
matches[x] = matches[x].toUpperCase();
return matches.join('');
}
function func3(isolated) {
output = isolated.split(/\s/);
output2 = [];
for (i = 0; i < output.length; i++) {
output2.push(func1(output[i])); }
output = output2.join(' ');
return output;
}
The idea is to convert things to uppercase, rendering McGee, McIntosh, etc as McGEE, McINTOSH, etc. Resulted from this thread here: JavaScript Convert Names to Uppercase, Except Mc/Mac/etc
Initially it was destroying all punctuation, because it didn't fit in with the matches so it just vanished into thin air. So I added the punctuation into the regular expression on line two.
Unfortunately, I then came across the word "Free-Throw", which renders as "Free-THROW" instead of "FREE-THROW". Under the old code it rendered as "FreeTHROW", which isn't any better.
Is there a way I can tackle this other than carefully-phrased inputs? It's for an After Effects expression so there aren't any users to deal with but I'd rather be able to include that hyphen, and if a double-barreled McSomething shows up (McGee-Smith for example) I won't have much choice.

The comment suggesting limited scope is correct, just the wrong way around: rather than defining which prefixes to handle I can easily define punctuation to protect. Dashes and apostrophes are the only characters I really have to worry about appearing mid-word, and dashes are always going to be between "words" within a word.
So instead of all-capsing the last match to the regular expression, I now all-caps both that and any match which ends in a dash.
Apostrophes are removed before the process stars, and re-inserted at the same index after capitalisation is complete.
function func1(word) {
filter = word.indexOf("’");
word = word.replace("’","");
matches = word.match(/([-!$%^&*()_+|~=`{}\[\]:"“”;'’<>?,.\/]*[A-Z][a-z]+[-!$%^&*()_+|~=`{}\[\]:"“”;'’<>?,.\/]*)/g);
if (!matches || matches.length === 1) {
func2out = word.toUpperCase();
if (filter >= 0){
func2out = [func2out.slice(0,filter), "’", func2out.slice(filter)].join('');
}
return func2out;}
else return func2(matches) }
function func2(matched) {
for(x = 0; x < matches.length; x++) {
if (matches[x].indexOf('-') >= 0 || (x == (matches.length-1))) matches[x] = matches[x].toUpperCase(); }
func2out = matches.join('');
if (filter >= 0) {
func2out = [func2out.slice(0,filter), "’", func2out.slice(filter)].join('');
}
return func2out;
}
function func3(isolated) {
output = isolated.split(/\s/);
output2 = [];
for (i = 0; i < output.length; i++) {
output2.push(func1(output[i])); }
output = output2.join(' ');
return output;
}

Related

Determine if an array has repeated element by sorting the array

<script>
var arr = [];
function repeater(str){
for (var i=0; i<str.length;i++)
{arr.push(str[i])}
arr.sort()
for (var g=0;g<arr.length;g++) {
if (arr[g]==arr[g+1])
{return false}
else {return true}
}
}
document.write(repeater("jrtgrt"))
console.log(arr)
</script>
Create a function that takes a string and returns either true or false depending on whether or not it has repeating characters.
The array is working by console, but the second part doesn't seem to be running.
Your loop is terminating after the first comparison because either way return is called.
<script>
var arr = [];
function repeater(str) {
for (var i = 0; i < str.length; i++) {
arr.push(str[i])
}
arr.sort()
for (var g = 0; g < arr.length - 1; g++) {
console.log(arr[g], arr[g + 1])
if (arr[g] == arr[g + 1]) {
return true
}
}
return false;
}
document.write(repeater("12s35sd46"))
console.log(arr)
</script>
There are a few ways to simplify your code.
You don't need a for loop to create an array from a string. You can use the split function.
arr = str.split("");
Your entire function can literally be simplified to this using a regular expression.
function repeater(str) {
return /(.).*\1/.test(str);
}
Here's an explanation of each part of the regular expression:
(.) any character, capture to use later
.* any character any number of times
\1 first captured character
An approach might not necessarily rely on an array's sort method. One, for instance, could stepwise shorten the list of characters and then, via an array's some method, determine whether the current character has a duplicate counterpart in the yet remaining list of characters. Thus one also keeps the amount of iteration cycles low ... example ...
function hasDuplicateChars(stringValue) {
var hasDuplicate = false;
var charList = stringValue.split('');
var char;
// continue taking the first entry of `charList` while mutating the latter.
while (!hasDuplicate && (char = charList.shift())) { // `char` either will be a string or a undefined value.
hasDuplicate = charList.some(function (listItem) { return (char === listItem); })
}
return hasDuplicate;
}
console.log('hasDuplicateChars("") ? ', hasDuplicateChars(""));
console.log('hasDuplicateChars("x") ? ', hasDuplicateChars("x"));
console.log('hasDuplicateChars("abcdefghijklmnopqrstuvwxyz") ? ', hasDuplicateChars("abcdefghijklmnopqrstuvwxyz"));
console.log('hasDuplicateChars("TheQuickBrownFox") ? ', hasDuplicateChars("TheQuickBrownFox"));
console.log('hasDuplicateChars("Hallo, world.") ? ', hasDuplicateChars("Hallo, world."));
console.log('hasDuplicateChars(" ") ? ', hasDuplicateChars(" "));
.as-console-wrapper { max-height: 100%!important; top: 0; }

How to check whether word within array is included in string, without regex?

I’m trying to figure out why my code is not giving the right output.
My input shouldn’t be contained within the array elements.
I found an easy way to solve it with regex, so I am not using regex for that one.
Please, break down my code, and tell me what is the problem with the code.
function checkInput(input, words) {
var arr = input.toLowerCase().split(" ");
var i, j;
var matches = 0;
for (i = 0; i < arr.length; i++) {
for (j = 0; j < words.length; j++) {
if (arr[i] != words[j]) {
matches++;
}
}
}
if (matches > 0) {
return true;
} else {
return false;
}
};
console.log(checkInput("Move an array element from one array", ["from"])); // should be false
console.log(checkInput("Move an array element from one array", ["elem"])); // should be true
if (arr[i] != words[j]) will be true at some point or another most of the time.
You want to check the opposite and return the opposite logic, so:
if(arr[i] == words[j]) {
matches++;
}
and
if (matches > 0) {
return false;
} else {
return true;
}
But a simpler way would be:
function checkInput(input, words){
let lowerCaseInput = input.toLowerCase().split(" ");
return words.find(word => lowerCaseInput.includes(word)) === undefined;
}
Array.prototype.find will return undefined iff no element is found that satisfies a specific rule provided by the callback function, word => lowerCaseInput.includes(word) in this case. So we check whether its return value is undefined which will tell us whether a word has been matched in input.
Note that your function unnecessarily checks for the entire words array even though it only matters whether one word matches.
Also, the words in words are case-sensitive! If you don’t want that, replace .includes(word) by .includes(word.toLowerCase()).
Because you are using matches > 0, you think it will return true only when no matches is found. But what happens is that when you have input ab aa cc and word aa
first iteration matches = 1
second iteration matches = 1
third iteration matches = 2
So matches will contain how many times word is different from items of input. So as result, it will always return true as long as input is more than two words long, for at least one word of input will be different from word. You can rather consider increasing the value of matches if word is found instead.
function checkInput(input, words) {
var arr = input.toLowerCase().split(" ");
var i, j;
var matches = 0;
for (i = 0; i < arr.length; i++) {
for (j = 0; j < words.length; j++) {
if (arr[i] === words[j]) {
matches++;
}
}
}
if (matches === 0) {
return true;
} else {
return false;
}
};
console.log(checkInput("Move an array element from one array", ["from"]));
console.log(checkInput("Move an array element from one array", ["elem"]));

How to write a character matching algorithm in JavaScript?

Given this input s1 = "dadxx" s2 = "ddxx" I'd expect the output to contain a bunch of a,b pairs wherever each character in s1 matched a character in s2 and vice versa (duplicates allowed). Among those pairs would be 0,0 because s1[0] and s2[0] are both equal to d.
The problem is that my output doesn't contain 2,1 even though s1[2] and s2[1] are both equal to d.
Can someone fix my algorithm or make a better one?
Here's a JSFiddle if it helps.
Here's my code:
// For each char, see if other string contains it
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
for (i = 0; i < a.length; i++) {
found = b.indexOf(a[i])
if (found >= 0) {
if (a===s1) console.log(i,found)
else console.log(found,i)
}
}
}
I believe the problem you're having is that you're only checking for a single match for s1[i] in s2 by using indexOf. That will find the first index of a matched value, not every index.
If you instead iterate through both strings and compare every character, you get the result I think you're trying to achieve.
// Define strings
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
// Convert strings to lower case for case insensitive matching
// Remove if case sensitive matching required
a = a.toLowerCase();
b = b.toLowerCase();
// Iterate through every letter in s1
for (i = 0; i < a.length; i++) {
// Iterate through every letter in s2
for (j = 0; j < b.length; j++) {
// Check if the letter in s1 matches letter in s2
if (a[i] === b[j]) {
// Changed per request of OP
(a === s1) ? console.log(i, j) : console.log(j, i);
// console.log([i, j]);
}
}
}
}
Working JSBin example: https://jsbin.com/wecijopohi/edit?js,console
You say duplicates are allowed but not required. I'm submitting this as a more modern approach, not as a correction to the accepted solution, which looks good to me. https://jsfiddle.net/avc705zr/3/
match = (a, b) => {
let re, match, matches = []
a.split('').forEach((l, i) => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([i, match.index])
}
})
return matches
}
However, in my experience when you actually need functionality like this, you only need one of the strings to exhausted. In other words, you are looking for matches in string 2 of all instances in string 1 -- which is to say, unique characters in string 1. So a modification which might come up in the real world might instead be like:
Array.prototype.unique = function() {
return this.filter(function (value, index, self) {
return self.indexOf(value) === index;
});
}
match = (a, b) => {
let re, match, matches = []
a.split('').unique().forEach(l => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([l, match.index])
}
})
return matches
}

Javascript Palindrome Check

I have to write a script to check if a word entered by a user is a Palindrome. I've gotten as far as validating the word and displaying the number of characters. Also not supposed to use the reverse method.
I've looked at some of the examples here and think I need to turn the user input into a string and use a "for" loop and if/else statement. But how do I turn the user input into a string in order to check each character? This is a total mess but it's all I've got so far:
function checkWord(userWord3) {
var answer = "Your word is";
answer += retrieveWord(userWord3);
return (answer);
}
function retrieveWord(userWord) {
var string = userWord;
var i = userWord.length;
for(var i = 0; i < str.length / 2; i++) {
alert(str[i], str[str.length -i -1]);
if( str[i] != str[str.length - i -1] ) {
return false;
}
}
}
You can try this function
function isPalindrome(str){
if(str.length < 2) return true;
if(str[0] != str.slice(-1)) return false;
return isPalindrome(str.slice(1,-1));
}
It uses recursion and its logic is as follows
The empty and 1 character string are considered palindromes
if(str.length == 0 || str.length == 1) return true;
if the first and last characters are not the same the word is not a palindrome
if(str[0] != str.slice(-1)) return false;
if the first and last are the same continue searching in the remaining string
return isPalindrome(str.slice(1,-1));
var result = document.querySelector(".result");
var palindrome = "<span class='palindrome'>it is a palindrome</span>";
var notpalindrome = "<span class='notpalindrome'>it is NOT a palindrome</span>";
function isPalindrome(str){
if(str.length == 0 || str.length == 1) return true;
if(str[0] != str.slice(-1)) return false;
return isPalindrome(str.slice(1,-1));
}
document.querySelector("input").addEventListener("keyup", function(){
if(isPalindrome(this.value)){
result.innerHTML = palindrome;
} else {
result.innerHTML = notpalindrome;
}
})
.palindrome{color: green;}
.notpalindrome{color: red;}
<input type="text" />
<span class="result"></span>
How are you collecting the user input? In just about every case, it will come into the program as a string (i.e. textbox, prompt), so you don't have to worry about converting it into one.
This code simply takes the word, breaks it into an array, reverses the array and then compares that reversal against the original word. It works for me:
function test(input){
var originalData = input;
var a = [];
for(var i = 0; i < input.length; ++i){
a.push(input.charAt(i));
}
a.reverse();
return (a.join('') === originalData) ? true : false;
}
var word = document.getElementById("userWord");
alert(test(word));
See working version at: https://jsfiddle.net/6cett0bc/6/
The most basic version I can think of is to split the word into letters and check the first against the last, until you end up in the middle, where it doesn't matter if there is an odd amount of letters.
UPDATE I've tested the performance of various implementations and changed my array based answer to a pure string based solution.
If you're curious, here are the performance benchmarks
The fastest solution (so far):
function palindrome(word) {
var middle = Math.ceil(word.length / 2), // determine the middle
i; // prepare the iteration variable
// loop from 0 to middle
for (i = 0; i <= middle; ++i) {
// check letter i against it counterpart on the opposite side
// of the word
if (word[i] !== word[(word.length - 1) - i]) {
// it is not a palindrom
return false;
}
}
// otherwise it is
return true;
}
// listen for clicks on the button and send the entered value to the palindrom function
document.querySelector('button').addEventListener('click', function(e) {
// obtain the input element
var element = document.querySelector('input');
// add/remove the 'palindrom' CSS class to the input field, depending on
// the output of palindrome function
if (palindrome(element.value)) {
element.classList.add('palindrome');
}
else {
element.classList.remove('palindrome');
}
});
input {
color: red;
}
input.palindrome {
color: green;
}
<input name=check placeholder=palindrome><button>check</button>
The text turns green if you have entered a palindrome successfully, red (default) otherwise.

JavaScript strings outside of the BMP

BMP being Basic Multilingual Plane
According to JavaScript: the Good Parts:
JavaScript was built at a time when Unicode was a 16-bit character set, so all characters in JavaScript are 16 bits wide.
This leads me to believe that JavaScript uses UCS-2 (not UTF-16!) and can only handle characters up to U+FFFF.
Further investigation confirms this:
> String.fromCharCode(0x20001);
The fromCharCode method seems to only use the lowest 16 bits when returning the Unicode character. Trying to get U+20001 (CJK unified ideograph 20001) instead returns U+0001.
Question: is it at all possible to handle post-BMP characters in JavaScript?
2011-07-31: slide twelve from Unicode Support Shootout: The Good, The Bad, & the (mostly) Ugly covers issues related to this quite well:
Depends what you mean by ‘support’. You can certainly put non-UCS-2 characters in a JS string using surrogates, and browsers will display them if they can.
But, each item in a JS string is a separate UTF-16 code unit. There is no language-level support for handling full characters: all the standard String members (length, split, slice etc) all deal with code units not characters, so will quite happily split surrogate pairs or hold invalid surrogate sequences.
If you want surrogate-aware methods, I'm afraid you're going to have to start writing them yourself! For example:
String.prototype.getCodePointLength= function() {
return this.length-this.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g).length+1;
};
String.fromCodePoint= function() {
var chars= Array.prototype.slice.call(arguments);
for (var i= chars.length; i-->0;) {
var n = chars[i]-0x10000;
if (n>=0)
chars.splice(i, 1, 0xD800+(n>>10), 0xDC00+(n&0x3FF));
}
return String.fromCharCode.apply(null, chars);
};
I came to the same conclusion as bobince. If you want to work with strings containing unicode characters outside of the BMP, you have to reimplement javascript's String methods. This is because javascript counts characters as each 16-bit code value. Symbols outside of the BMP need two code values to be represented. You therefore run into a case where some symbols count as two characters and some count only as one.
I've reimplemented the following methods to treat each unicode code point as a single character: .length, .charCodeAt, .fromCharCode, .charAt, .indexOf, .lastIndexOf, .splice, and .split.
You can check it out on jsfiddle: http://jsfiddle.net/Y89Du/
Here's the code without comments. I tested it, but it may still have errors. Comments are welcome.
if (!String.prototype.ucLength) {
String.prototype.ucLength = function() {
// this solution was taken from
// http://stackoverflow.com/questions/3744721/javascript-strings-outside-of-the-bmp
return this.length - this.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g).length + 1;
};
}
if (!String.prototype.codePointAt) {
String.prototype.codePointAt = function (ucPos) {
if (isNaN(ucPos)){
ucPos = 0;
}
var str = String(this);
var codePoint = null;
var pairFound = false;
var ucIndex = -1;
var i = 0;
while (i < str.length){
ucIndex += 1;
var code = str.charCodeAt(i);
var next = str.charCodeAt(i + 1);
pairFound = (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF);
if (ucIndex == ucPos){
codePoint = pairFound ? ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000 : code;
break;
} else{
i += pairFound ? 2 : 1;
}
}
return codePoint;
};
}
if (!String.fromCodePoint) {
String.fromCodePoint = function () {
var strChars = [], codePoint, offset, codeValues, i;
for (i = 0; i < arguments.length; ++i) {
codePoint = arguments[i];
offset = codePoint - 0x10000;
if (codePoint > 0xFFFF){
codeValues = [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)];
} else{
codeValues = [codePoint];
}
strChars.push(String.fromCharCode.apply(null, codeValues));
}
return strChars.join("");
};
}
if (!String.prototype.ucCharAt) {
String.prototype.ucCharAt = function (ucIndex) {
var str = String(this);
var codePoint = str.codePointAt(ucIndex);
var ucChar = String.fromCodePoint(codePoint);
return ucChar;
};
}
if (!String.prototype.ucIndexOf) {
String.prototype.ucIndexOf = function (searchStr, ucStart) {
if (isNaN(ucStart)){
ucStart = 0;
}
if (ucStart < 0){
ucStart = 0;
}
var str = String(this);
var strUCLength = str.ucLength();
searchStr = String(searchStr);
var ucSearchLength = searchStr.ucLength();
var i = ucStart;
while (i < strUCLength){
var ucSlice = str.ucSlice(i,i+ucSearchLength);
if (ucSlice == searchStr){
return i;
}
i++;
}
return -1;
};
}
if (!String.prototype.ucLastIndexOf) {
String.prototype.ucLastIndexOf = function (searchStr, ucStart) {
var str = String(this);
var strUCLength = str.ucLength();
if (isNaN(ucStart)){
ucStart = strUCLength - 1;
}
if (ucStart >= strUCLength){
ucStart = strUCLength - 1;
}
searchStr = String(searchStr);
var ucSearchLength = searchStr.ucLength();
var i = ucStart;
while (i >= 0){
var ucSlice = str.ucSlice(i,i+ucSearchLength);
if (ucSlice == searchStr){
return i;
}
i--;
}
return -1;
};
}
if (!String.prototype.ucSlice) {
String.prototype.ucSlice = function (ucStart, ucStop) {
var str = String(this);
var strUCLength = str.ucLength();
if (isNaN(ucStart)){
ucStart = 0;
}
if (ucStart < 0){
ucStart = strUCLength + ucStart;
if (ucStart < 0){ ucStart = 0;}
}
if (typeof(ucStop) == 'undefined'){
ucStop = strUCLength - 1;
}
if (ucStop < 0){
ucStop = strUCLength + ucStop;
if (ucStop < 0){ ucStop = 0;}
}
var ucChars = [];
var i = ucStart;
while (i < ucStop){
ucChars.push(str.ucCharAt(i));
i++;
}
return ucChars.join("");
};
}
if (!String.prototype.ucSplit) {
String.prototype.ucSplit = function (delimeter, limit) {
var str = String(this);
var strUCLength = str.ucLength();
var ucChars = [];
if (delimeter == ''){
for (var i = 0; i < strUCLength; i++){
ucChars.push(str.ucCharAt(i));
}
ucChars = ucChars.slice(0, 0 + limit);
} else{
ucChars = str.split(delimeter, limit);
}
return ucChars;
};
}
More recent JavaScript engines have String.fromCodePoint.
const ideograph = String.fromCodePoint( 0x20001 ); // outside the BMP
Also a code-point iterator, which gets you the code-point length.
function countCodePoints( str )
{
const i = str[Symbol.iterator]();
let count = 0;
while( !i.next().done ) ++count;
return count;
}
console.log( ideograph.length ); // gives '2'
console.log( countCodePoints(ideograph) ); // '1'
Yes, you can. Although support to non-BMP characters directly in source documents is optional according to the ECMAScript standard, modern browsers let you use them. Naturally, the document encoding must be properly declared, and for most practical purposes you would need to use the UTF-8 encoding. Moreover, you need an editor that can handle UTF-8, and you need some input method(s); see e.g. my Full Unicode Input utility.
Using suitable tools and settings, you can write var foo = '𠀁'.
The non-BMP characters will be internally represented as surrogate pairs, so each non-BMP character counts as 2 in the string length.
Using for (c of this) instruction, one can make various computations on a string that contains non-BMP characters. For instance, to compute the string length, and to get the nth character of the string:
String.prototype.magicLength = function()
{
var c, k;
k = 0;
for (c of this) // iterate each char of this
{
k++;
}
return k;
}
String.prototype.magicCharAt = function(n)
{
var c, k;
k = 0;
for (c of this) // iterate each char of this
{
if (k == n) return c + "";
k++;
}
return "";
}
This old topic has now a simple solution in ES6:
Split characters into an array
simple version
[..."😴😄😃⛔🎠🚓🚇"] // ["😴", "😄", "😃", "⛔", "🎠", "🚓", "🚇"]
Then having each one separated you can handle them easily for most common cases.
Credit: DownGoat
Full solution
To overcome special emojis as the one in the comment, one can search for the connection charecter (char code 8205 in UTF-16) and make some modifications. Here is how:
let myStr = "👩‍👩‍👧‍👧😃𝌆"
let arr = [...myStr]
for (i = arr.length-1; i--; i>= 0) {
if (arr[i].charCodeAt(0) == 8205) { // special combination character
arr[i-1] += arr[i] + arr[i+1]; // combine them back to a single emoji
arr.splice(i, 2)
}
}
console.log(arr.length) //3
Haven't found a case where this doesn't work. Comment if you do.
To conclude
it seems that JS uses the 8205 char code to represent UCS-2 characters as a UTF-16 combinations.

Categories

Resources