Matching a letter from any language spoken

Matching a letter from any language spoken - javascript

.NET, Java, Perl, PHP, Python3(?) all support PCRE's \p{L} regex that matches unicode character representing a letter, but there is no such a shortcut in JavaScript (as far as I know)... I'm working on a library focused on string manipulations, and I badly need the equivalent for JavaScript. So far I've got the 1172 characters long regex bellow, built in a rather clunky way. I would appreciate if someone could confirm/deny if got it right, or better, how to make it more general and accurate. Here's the way I've got it, top to bottom:
//
// JavaScript synonym for (.NET/Java/Perl/PCRE)'s `\p{L}` regexp.
// get range of characters
function crange (a, z) {
var rng = [];
if (a <= z) {
for (
var cc = a.charCodeAt(0) - 1,
stop = z.charCodeAt(0),
fromcc = String.fromCharCode;
++cc <= stop;
rng.push(fromcc(cc))
);
}
return rng;
}
// maps a list to another 2D-list
// containing arrays with successive integers in it
Array.prototype.intranges = function () {
for (
var it = 0,
// filter unique and numericaly sorted
// list of integers from given array
self = this.filter(_intranges).sort(_nsort),
len = self.length,
res = [],
buff,
curr;
buff = curr = self[it], it < len;
// ignore update
) {
// increment while integers are successive
while (self[(++it)] == (++buff));
// save
res.push(
(self[(self.indexOf(curr) + 1)] == self[it]) ?
[curr] : [curr, self[(it - 1)]]
);
}
return res;
};
var letter_regex =
// get all characters < 0xffff;
crange('\u0000', '\uffff')
// create [(int) codePoint, (char) character] pairs
.map(function (c, i) {
return [i, c];
})
// this one is tricky...
// what holds true for a character
// that is regular letter,
// not punctuation, whitespace, number,
// or any other (fancy) unicode symbol?
//
// I'm sure this part can be improved.
// It checks if a character has it's
// lower/upper-case version,
// assuming it's true for letters only...
.filter(function (pair) {
var p1 = pair[1];
return p1.toUpperCase() != p1.toLowerCase();
})
// fetch those code-points
.map(function (pair) {
return pair[0];
})
// build integer subranges out of them
.intranges()
// build a string out of it
// that can be used by `RegExp`
.map(function (ccrange) {
return ccrange.map(function (cc) {
var c = cc.toString(16);
return (cc <= 0xff) ? ('\\x' + pad02(c)) : ('\\u' + pad04(c));
}).join('-');
})
.join('');
//
//
// and it generated this (10ft) long string:
//
// letter_regex = '\x41-\x5A\x61-\x7A\xB5\xC0-\xD6\xD8-\xDE\xE0-\xF6\xF8-\u0137\u0139-\u0148\u014A-\u018C\u018E-\u019A\u019C-\u01A9\u01AC-\u01B9\u01BC-\u01BD\u01BF\u01C4-\u01EF\u01F1-\u0220\u0222-\u0233\u023A-\u0254\u0256-\u0257\u0259\u025B\u0260\u0263\u0265-\u0266\u0268-\u0269\u026B\u026F\u0271-\u0272\u0275\u027D\u0280\u0283\u0288-\u028C\u0292\u0345\u0370-\u0373\u0376-\u0377\u037B-\u037D\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AF\u03B1-\u03D1\u03D5-\u03F2\u03F4-\u03F5\u03F7-\u03FB\u03FD-\u0481\u048A-\u0527\u0531-\u0556\u0561-\u0586\u10A0-\u10C5\u10C7\u10CD\u1D79\u1D7D\u1E00-\u1E95\u1E9B\u1E9E\u1EA0-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F51\u1F53\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB1\u1FB3\u1FB8-\u1FBC\u1FBE\u1FC3\u1FC8-\u1FCC\u1FD0-\u1FD1\u1FD8-\u1FDB\u1FE0-\u1FE1\u1FE5\u1FE8-\u1FEC\u1FF3\u1FF8-\u1FFC\u2126\u212A-\u212B\u2132\u214E\u2160-\u217F\u2183-\u2184\u24B6-\u24E9\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2C70\u2C72-\u2C73\u2C75-\u2C76\u2C7E-\u2CE3\u2CEB-\u2CEE\u2CF2-\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA640-\uA66D\uA680-\uA697\uA722-\uA72F\uA732-\uA76F\uA779-\uA787\uA78B-\uA78D\uA790-\uA793\uA7A0-\uA7AA\uFF21-\uFF3A\uFF41-\uFF5A';
//
//
function pad02 (c) {
return (Array(3).slice(c.length).join('0') + c).toUpperCase();
}
function pad04 (c) {
return (Array(5).slice(c.length).join('0') + c).toUpperCase();
}
// filter out unique integers
function _intranges (node, pos, self) {
return _isint(node) && (pos <= self.indexOf(node));
}
function _isint (n) {
return (n | 0) === n;
}
function _nsort (n1, n2) {
return n1 - n2;
}
// /eof

I think I have the regex equiv. for \p{L}, I've used BabelMap app to generate it. It covers 48k+ letter characters in {Ll, Lm, Lo, Lt, Lu} sets:
// JavaScript unicode letter regex: (4185 characters)
letter_regex = /[\x41-\x5A\x61-\x7A\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u08A0\u08A2-\u08AC\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097F\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3D\u0C58\u0C59\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D60\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F4\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191C\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19C1-\u19C7\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5\u1CF6\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FCC\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA697\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA78E\uA790-\uA793\uA7A0-\uA7AA\uA7F8-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA80-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/;
//
I've posted the version that includes code points > 0xffff and characters it matches here, (it's to much text to dump in single SO post).

The below javascript solution catch the most common cases:
function(control) {
// copied from https://stackoverflow.com/questions/23577220/matching-a-letter-from-any-language-spoken
const isControlValid = control.value.match(/^[\x41-\x5A\x61-\x7A\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u08A0\u08A2-\u08AC\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097F\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3D\u0C58\u0C59\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D60\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E87\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA\u0EAB\u0EAD-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F4\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191C\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19C1-\u19C7\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5\u1CF6\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FCC\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA697\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA78E\uA790-\uA793\uA7A0-\uA7AA\uA7F8-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA80-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC,'-.\s]+$/g);
// (([^\x00-\x7F]+|[\w]+)?[.|\-|'|\s]{2,}([^\x00-\x7F]+|[\w]+)?)+ Validate cases like "asfagas--awgawg", "awfawf....asawgaw"
// ([^\x00-\x7F]+|[\w]+)?[\-|\s]{1}$ Validate cases like "awfwafaw ", "awfwafaw-"
// ^[\-|\s]{1}([^\x00-\x7F]+|[\w]+)?$ Validate cases like " awfwafaw ", "-awfwafaw"
const hasErrorCombinations = control.value.match(/(([^\x00-\x7F]+|[\w]+)?[.\s\-']{4,}|[.\-']{3,}|[.]{2,}|[\-]{2,}|[']{2,}|[\s]{2,}([^\x00-\x7F]+|[\w]+)?)+|([^\x00-\x7F]+|[\w]+)?[\-|\s]{1}$|^[\-|\s]{1}([^\x00-\x7F]+|[\w]+)?$|^[\s]+|[\s]$/g);
const hasAdditionalSymbolsCheckErrors = control.value.match(/[()+*]+/g);
return isControlValid && !hasErrorCombinations && !hasAdditionalSymbolsCheckErrors;
}

Related

How to build a function that searches for string occurrences?

I need help Writing a function subLength() that takes 2 parameters, a string and a single character. The function should search the string for the two occurrences of the character and return the length between them including the 2 characters. If there are less than 2 or more than 2 occurrences of the character the function should return 0. How can I solve this problem using loops?
subLength('Saturday', 'a'); // returns 6
subLength('summer', 'm'); // returns 2
subLength('digitize', 'i'); // returns 0
subLength('cheesecake', 'k'); // returns 0

Here I loop through the characters of the string to find each value that is the char.
if the length isn't 2, return 0.
using slice, get only the characters within the two found indexs and get that length adding one to fix the offset
const subLength = (str, char) => {
let strChars = str.toLowerCase().split(""),
found = [],
length = 0;
strChars.forEach((val, index) => {
if (val === char) {
found.push(index);
}
});
if (found.length != 2) {
return length;
}
return str.slice(found[0], found[1]).length + 1;
}
console.log(subLength('Saturday', 'a')); // returns 6
console.log(subLength('summer', 'm')); // returns 2
console.log(subLength('digitize', 'i')); // returns 0
console.log(subLength('cheesecake', 'k')); // returns 0

You can try this logic:
Loop over string and count number of occurance
if count is 2,
Create a regex to capture the string in between.
Return its length
Else return 0
function subLength(str, char) {
let length = 0;
const occuranceCount = Array
.from(str)
.filter((c) => c.toLowerCase() === char.toLowerCase())
.length
if (occuranceCount === 2) {
const regex = new RegExp(`${char}(.*)${char}`)
length = str.match(regex)[0].length
}
console.log(length)
return length;
}
subLength('Saturday', 'a'); // returns 6
subLength('summer', 'm'); // returns 2
subLength('digitize', 'i'); // returns 0
subLength('cheesecake', 'k'); // returns 0
Using just for loop:
function subLength(str, char) {
let count = 0;
let initPosition;
let lastPosition;
for (let i = 0; i < str.length; i++) {
if (str[i] === char) {
count++
if (count > 2) {
return 0;
}
if (initPosition === undefined) {
initPosition = i
} else {
lastPosition = i+1
}
}
}
return count < 2 ? 0 : lastPosition - initPosition;
}
console.log(subLength('Saturday', 'a')); // returns 6
console.log(subLength('summer', 'm')); // returns 2
console.log(subLength('digitize', 'i')); // returns 0
console.log(subLength('cheesecake', 'k')); // returns 0

I too am going through the Codecademy course where this question came up which led me to this post.
Using the RegExp solution provided by #Rajesh (thank you!!) I started to break it down to better understand what was going on and making notes/comments because I am still pretty new and haven't used or been exposed to some of these things.
At the end of it all I thought I'd share what I ended up with in case anyone found it helpful.
function subLength(str, char) {
// Outputting to the console what we are looking for given the value of the string and character from the test cases at the end of this script.
console.log(`Showing the subLength for the string: "${str}" between "${char}" and "${char}" including the "${char}" positions.`);
// create the length variable which will be returned by the function
let length = 0;
// ** Search the string for the two occurrences of the character and count them. Then assign the result to the occurrenceCount variable for use in the if else statement.
// The "Array" class is a global object that is used in the construction off arrays.
// The Array.from() static method creates a new, shallow-copied Array instance from an array-like or iterable object.
// The Array.filter() method creates a new array with all elements that pass the test implemented by the provided function. The "c" represents each element of the array/string which is then compared to the char variable. if it is a match it gets added to the Array. We use .toLowerCase on both to ensure case compatibility.
// Appending the Array with ".length" assigns occurrenceCount the numeric value of the array's length rather than the array of characters.
const occurrenceCount = Array.from(str).filter((c) => c.toLowerCase() === char.toLowerCase());
console.log(' The contents of the occurrenceCountArray = ' + occurrenceCount);
console.log(' The character occurrence count = ' + occurrenceCount.length);
// if the string has two occurrences : return the length between them including the two characters : else the string has less than 2 or more than 2 characters : return 0.
if (occurrenceCount.length === 2) {
// The RegExp object is used for matching text with a pattern. The "(.*)" in between the ${char}'s will match and capture as much as possible aka greedy match. "()" = capture anything matched. (" = start of group. "." = match any character. "*" = Greedy match that matches everything in place of the "*". ")" = end of group.
const regex = new RegExp(`${char}(.*)${char}`);
// log to console the pattern being matched
console.log(` regex pattern to find = ${regex}`);
// log to the console the [0] = index 0 pattern that was captured from the string using str.match(regex)[0]
console.log(` regex output = ${str.match(regex)[0]}`);
// Use".length" to count the number of characters in the regex string at index 0 of the regex array and assign that value to the length variable.
length = str.match(regex)[0].length;
// Output the results to the console
console.log(` The distance from "${char}" to "${char}" (including the "${char}" positions) in the string: ${str} = ${length}\n`);
// return the length value
return length;
} else {
// Output the results to the console
console.log(` The string either has too many or too few occurrences.\n The subLength = ${length}\n`);
// return the length value
return length;
}
}
// test cases
subLength('Saturday', 'a'); // returns 6
subLength('summer', 'm'); // returns 2
subLength('digitize', 'i'); // returns 0
subLength('cheesecake', 'k'); // returns 0

The answer I am getting is this:
const subLength = (str, char) => {
let charCount = 0;
let len = -1;
for (let i=0; i<str.length; i++) {
if (str[i] == char) {
charCount++;
if (charCount > 2) {
return 0;
}
if (len == -1) {
len = i;
} else {
len = i - len + 1
}
}
}
if (charCount < 2) {
return 0;
}
return len;
};

It is better to try yourself a solution first. It is a very bad practice to just ask a solution for your homework!!!
Even if the solution can be JUST a few lines of code i wrote for you with commments a working solution :
const subLength = (str,char) => {
// create an empty array
const strarr = [];
// push string into array
strarr.push(str);
//initiate a count variable
let count = 0;
// WRITE YOUR REGULAR EXPRESSION
// Using the regular expression constructor - new RegExp("ab{2}", "g") .
const regString = `[${char}]`;
const regex = new RegExp(regString, "g");
// iterate through the string array to
for (let i = 0; i < strarr.length; i++) {
// calculate how many time the character occurs
count = (strarr[i].match(regex) || []).length;
};
// check with if condition
//if count is 2
if (count === 2) {
// calculate the index of first ocurrance of the string
first = str.indexOf(char);
// calculate the index of second ocurrance of the string
second = str.lastIndexOf(char);
// calculate the distance between them
return second - first + 1;
// if count is greater than two return 0
}
else if (count > 2) {
return count = 0;
}
// if count is less than two return 0
else if (count < 2) {
return 0;
}
};
console.log(subLength("iiiiliiile","l"));

I just answered this problem in codeAcademy and this is the solution that I came up with, just using if-statements and string.indexOf
const subLength = (strng, char) => {
let firstIndex = strng.indexOf(char);
let secondIndex = strng.indexOf(char, (firstIndex + 1));
let thirdIndex = strng.indexOf(char, (secondIndex + 1));
if (firstIndex === -1){
return 0
} else if (secondIndex === -1){
return 0
} else if (thirdIndex === -1 ){
return (secondIndex - firstIndex + 1)
} else {
return 0
};
};

How to get odd and even position characters from a string?

I'm trying to figure out how to remove every second character (starting from the first one) from a string in Javascript.
For example, the string "This is a test!" should become "hsi etTi sats!"
I also want to save every deleted character into another array.
I have tried using replace method and splice method, but wasn't able to get them to work properly. Mostly because replace only replaces the first character.
function encrypt(text, n) {
if (text === "NULL") return n;
if (n <= 0) return text;
var encArr = [];
var newString = text.split("");
var j = 0;
for (var i = 0; i < text.length; i += 2) {
encArr[j++] = text[i];
newString.splice(i, 1); // this line doesn't work properly
}
}

You could reduce the characters of the string and group them to separate arrays using the % operator. Use destructuring to get the 2D array returned to separate variables
let str = "This is a test!";
const [even, odd] = [...str].reduce((r,char,i) => (r[i%2].push(char), r), [[],[]])
console.log(odd.join(''))
console.log(even.join(''))
Using a for loop:
let str = "This is a test!",
odd = [],
even = [];
for (var i = 0; i < str.length; i++) {
i % 2 === 0
? even.push(str[i])
: odd.push(str[i])
}
console.log(odd.join(''))
console.log(even.join(''))

It would probably be easier to use a regular expression and .replace: capture two characters in separate capturing groups, add the first character to a string, and replace with the second character. Then, you'll have first half of the output you need in one string, and the second in another: just concatenate them together and return:
function encrypt(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
console.log(encrypt('This is a test!'));

Pretty simple with .reduce() to create the two arrays you seem to want.
function encrypt(text) {
return text.split("")
.reduce(({odd, even}, c, i) =>
i % 2 ? {odd: [...odd, c], even} : {odd, even: [...even, c]}
, {odd: [], even: []})
}
console.log(encrypt("This is a test!"));
They can be converted to strings by using .join("") if you desire.

I think you were on the right track. What you missed is replace is using either a string or RegExp.
The replace() method returns a new string with some or all matches of a pattern replaced by a replacement. The pattern can be a string or a RegExp, and the replacement can be a string or a function to be called for each match. If pattern is a string, only the first occurrence will be replaced.
Source: String.prototype.replace()
If you are replacing a value (and not a regular expression), only the first instance of the value will be replaced. To replace all occurrences of a specified value, use the global (g) modifier
Source: JavaScript String replace() Method
So my suggestion would be to continue still with replace and pass the right RegExp to the function, I guess you can figure out from this example - this removes every second occurrence for char 't':
let count = 0;
let testString = 'test test test test';
console.log('original', testString);
// global modifier in RegExp
let result = testString.replace(/t/g, function (match) {
count++;
return (count % 2 === 0) ? '' : match;
});
console.log('removed', result);

like this?
var text = "This is a test!"
var result = ""
var rest = ""
for(var i = 0; i < text.length; i++){
if( (i%2) != 0 ){
result += text[i]
} else{
rest += text[i]
}
}
console.log(result+rest)

Maybe with split, filter and join:
const remaining = myString.split('').filter((char, i) => i % 2 !== 0).join('');
const deleted = myString.split('').filter((char, i) => i % 2 === 0).join('');

You could take an array and splice and push each second item to the end of the array.
function encrypt(string) {
var array = [...string],
i = 0,
l = array.length >> 1;
while (i <= l) array.push(array.splice(i++, 1)[0]);
return array.join('');
}
console.log(encrypt("This is a test!"));

function encrypt(text) {
text = text.split("");
var removed = []
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed.push(letter)
return false;
}
return true
}).join("")
return {
full: encrypted + removed.join(""),
encrypted: encrypted,
removed: removed
}
}
console.log(encrypt("This is a test!"))
Splice does not work, because if you remove an element from an array in for loop indexes most probably will be wrong when removing another element.

I don't know how much you care about performance, but using regex is not very efficient.
Simple test for quite a long string shows that using filter function is on average about 3 times faster, which can make quite a difference when performed on very long strings or on many, many shorts ones.
function test(func, n){
var text = "";
for(var i = 0; i < n; ++i){
text += "a";
}
var start = new Date().getTime();
func(text);
var end = new Date().getTime();
var time = (end-start) / 1000.0;
console.log(func.name, " took ", time, " seconds")
return time;
}
function encryptREGEX(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
function encrypt(text) {
text = text.split("");
var removed = "";
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed += letter;
return false;
}
return true
}).join("")
return encrypted + removed
}
var timeREGEX = test(encryptREGEX, 10000000);
var timeFilter = test(encrypt, 10000000);
console.log("Using filter is faster ", timeREGEX/timeFilter, " times")
Using actually an array for storing removed letters and then joining them is much more efficient, than using a string and concatenating letters to it.
I changed an array to string in filter solution to make it the same like in regex solution, so they are more comparable.

Javascript: Counting frequency of emojis in text

I'm trying to count the frequency of emojis in a block of text. For example:
"I love 🚀🚀🚀 so much 😍 " -> [{🚀:3}, {😍:1}]
In order to count the frequency of characters in a block of text, I'm using
function getFrequency(string) {
var freq = {};
for (var i=0; i<string.length;i++) {
var character = string.charAt(i);
if (freq[character]) {
freq[character]++;
} else {
freq[character] = 1;
}
}
return freq;
};
source: https://stackoverflow.com/a/18619975/4975358
^The above code works great, but it does not recognize emoji characters:
{�: 1, �: 3, �: 2}
Also, I'd prefer the output to be a list of json objects of length 1, as opposed to one long json object.

You can use the callback of the String.replace function and a unicode aware RegExp detecting everything from the unicode blocks "Miscellaneous Symbols" to "Pictographs Transport and Map Symbols" (0x1F300 to 0x1F6FF):
let str = "I love 🚀🚀🚀 so much 😍 ";
let freq = {};
str.replace(/[\u{1F300}-\u{1F6FF}]/gu, char => freq[char] = (freq[char] || 0) + 1);
console.log(freq);
If you prefer to avoid RegExp or String.replace, you can destructure the string into an array and reduce it to the frequencies as follows:
let str = "I love 🚀🚀🚀 so much 😍 ";
let freq = [...str].reduce((freq, char) => {
if (char >= '\u{1F300}' && char < '\u{1F700}') freq[char] = (freq[char] || 0) + 1;
return freq;
}, {});
console.log(freq);

charAt won't help you here. for...of will parse the string correctly into Unicode codepoints including those in the astral plane. We use character.length to determine whether or not this is a supplementary plane character. If you really want to know if it's an emoji, you'd need to tighten this up.
const input = "I love 🚀🚀🚀 so much 😍 ";
function getFrequency(string) {
var freq = {};
for (character of string) {
if (character.length === 1) continue;
if (freq[character]) {
freq[character]++;
} else {
freq[character] = 1;
}
}
return freq;
};
console.log(getFrequency(input));
To create an array of single-valued objects, run the output through this:
function breakProperties(obj) {
return Object.keys(obj).map(function(key) {
var result = {};
result[key] = obj[key];
return result;
});
}

How to write a character matching algorithm in JavaScript?

Given this input s1 = "dadxx" s2 = "ddxx" I'd expect the output to contain a bunch of a,b pairs wherever each character in s1 matched a character in s2 and vice versa (duplicates allowed). Among those pairs would be 0,0 because s1[0] and s2[0] are both equal to d.
The problem is that my output doesn't contain 2,1 even though s1[2] and s2[1] are both equal to d.
Can someone fix my algorithm or make a better one?
Here's a JSFiddle if it helps.
Here's my code:
// For each char, see if other string contains it
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
for (i = 0; i < a.length; i++) {
found = b.indexOf(a[i])
if (found >= 0) {
if (a===s1) console.log(i,found)
else console.log(found,i)
}
}
}

I believe the problem you're having is that you're only checking for a single match for s1[i] in s2 by using indexOf. That will find the first index of a matched value, not every index.
If you instead iterate through both strings and compare every character, you get the result I think you're trying to achieve.
// Define strings
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
// Convert strings to lower case for case insensitive matching
// Remove if case sensitive matching required
a = a.toLowerCase();
b = b.toLowerCase();
// Iterate through every letter in s1
for (i = 0; i < a.length; i++) {
// Iterate through every letter in s2
for (j = 0; j < b.length; j++) {
// Check if the letter in s1 matches letter in s2
if (a[i] === b[j]) {
// Changed per request of OP
(a === s1) ? console.log(i, j) : console.log(j, i);
// console.log([i, j]);
}
}
}
}
Working JSBin example: https://jsbin.com/wecijopohi/edit?js,console

You say duplicates are allowed but not required. I'm submitting this as a more modern approach, not as a correction to the accepted solution, which looks good to me. https://jsfiddle.net/avc705zr/3/
match = (a, b) => {
let re, match, matches = []
a.split('').forEach((l, i) => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([i, match.index])
}
})
return matches
}
However, in my experience when you actually need functionality like this, you only need one of the strings to exhausted. In other words, you are looking for matches in string 2 of all instances in string 1 -- which is to say, unique characters in string 1. So a modification which might come up in the real world might instead be like:
Array.prototype.unique = function() {
return this.filter(function (value, index, self) {
return self.indexOf(value) === index;
});
}
match = (a, b) => {
let re, match, matches = []
a.split('').unique().forEach(l => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([l, match.index])
}
})
return matches
}

How do get input 2^3 to Math.pow(2, 3)?

I have this simple calculator script, but it doesn't allow power ^.
function getValues() {
var input = document.getElementById('value').value;
document.getElementById('result').innerHTML = eval(input);
}
<label for="value">Enter: </label><input id="value">
<div id="result">Results</div>
<button onclick="getValues()">Get Results</button>
I tried using input = input.replace( '^', 'Math.pow(,)');
But I do not know how to get the values before '^' and after into the brackets.
Example: (1+2)^3^3 should give 7,625,597,484,987

Use a regular expression with capture groups:
input = '3 + 2 ^3';
input = input.replace(/(\d+)\s*\^\s*(\d+)/g, 'Math.pow($1, $2)');
console.log(input);
This will only work when the arguments are just numbers. It won't work with sub-expressions or when you repeat it, like
(1+2)^3^3
This will require writing a recursive-descent parser, and that's far more work than I'm willing to put into an answer here. Get a textbook on compiler design to learn how to do this.

I don't think you'll be able to do this with simple replace.
If you want to parse infix operators, you build two stacks, one for symbols, other for numbers. Then sequentially walk the formula ignoring everything else than symbols, numbers and closing parenthesis. Put symbols and numbers into their stacks, but when you encounter closing paren, take last symbol and apply it to two last numbers. (was invented by Dijkstra, I think)
const formula = '(1+2)^3^3'
const symbols = []
const numbers = []
function apply(n2, n1, s) {
if (s === '^') {
return Math.pow(parseInt(n1, 10), parseInt(n2, 10))
}
return eval(`${n1} ${s} ${n2}`)
}
const applyLast = () => apply(numbers.pop(), numbers.pop(), symbols.pop())
const tokenize = formula => formula.split(/(\d+)|([\^\/\)\(+\-\*])/).filter(t => t !== undefined && t !== '')
const solver = (formula) => {
const tf = tokenize(formula)
for (let l of formula) {
const parsedL = parseInt(l, 10)
if (isNaN(parsedL)) {
if (l === ')') {
numbers.push(applyLast())
continue
} else {
if (~['+', '-', '*', '/', '^'].indexOf(l))
symbols.push(l)
continue
}
}
numbers.push(l)
}
while (symbols.length > 0)
numbers.push(applyLast())
return numbers.pop()
}
console.log(solver(formula))

Get your input into a string and do...
var input = document.getElementById('value').value;
var values = input.split('^'); //will save an array with [value1, value 2]
var result = Math.pow(values[0], values[1]);
console.log(result);
This only if your only operation is a '^'
EDIT: Saw example after edit, this no longer works.

function getValues() {
var input = document.getElementById('value').value;
// code to make ^ work like Math.pow
input = input.replace( '^', '**');
document.getElementById('result').innerHTML = eval(input);
}
The ** operator can replace the Math.pow function in most modern browsers. The next version of Safari (v10.1) coming out any day supports it.

As said in other answers here, you need a real parser to solve this correctly. A regex will solve simple cases, but for nested statements you need a recursive parser. For Javascript one library that offers this is peg.js.
In your case, the example given in the online version can be quickly extended to handle powers:
Expression
= head:Term tail:(_ ("+" / "-") _ Term)* {
var result = head, i;
for (i = 0; i < tail.length; i++) {
if (tail[i][1] === "+") { result += tail[i][3]; }
if (tail[i][1] === "-") { result -= tail[i][3]; }
}
return result;
}
Term
= head:Pow tail:(_ ("*" / "/") _ Pow)* { // Here I replaced Factor with Pow
var result = head, i;
for (i = 0; i < tail.length; i++) {
if (tail[i][1] === "*") { result *= tail[i][3]; }
if (tail[i][1] === "/") { result /= tail[i][3]; }
}
return result;
}
// This is the new part I added
Pow
= head:Factor tail:(_ "^" _ Factor)* {
var result = 1;
for (var i = tail.length - 1; 0 <= i; i--) {
result = Math.pow(tail[i][3], result);
}
return Math.pow(head, result);
}
Factor
= "(" _ expr:Expression _ ")" { return expr; }
/ Integer
Integer "integer"
= [0-9]+ { return parseInt(text(), 10); }
_ "whitespace"
= [ \t\n\r]*
It returns the expected output 7625597484987 for the input string (1+2)^3^3.

Here is a Python-based version of this question, with solution using pyparsing: changing ** operator to power function using parsing?

Develop Reference

JavaScript is the programming language of the Web.

Matching a letter from any language spoken - javascript

Related

How to build a function that searches for string occurrences?

How to get odd and even position characters from a string?

Javascript: Counting frequency of emojis in text

How to write a character matching algorithm in JavaScript?

How do get input 2^3 to Math.pow(2, 3)?

Categories

Resources