Im trying to get the Text Statistics function from https://github.com/cgiffard/TextStatistics.js/blob/master/index.js
Working in Excel
I have minified the JavaScript code to shorten the concatenations
Function Text_Statistics1(textString As String)
Dim code As String
code = "function text_stats(teststringtoprocess){(function(e){function t(e){var t=['li','p','h1','h2','h3','h4','h5','h6','dd'];t.forEach(function(t){e=e.replace('</'+t+'>','.')});e=e.replace(/<[^>]+>/g,'').replace(/[,:;()\-]/,' ').replace(/[\.!?]/,'.').replace(/^\s+/,'').replace(/[ ]*(\n|\r\n|\r)[ ]*/,' ').replace(/([\.])[\. ]+/,'.').replace(/[ ]*([\.])/,'. ').replace(/\s+/,' ').replace(/\s+$/,'');e+='.';return e}function r(e){return new n(e)}var n=function(n){this.text=n?t(n):this.text};n.prototype.fleschKincaidReadingEase=function(e){e=e?t(e):this.text;return Math.round((206.835-1.015*this.averageWordsPerSentence(e)-84.6*this.averageSyllablesPerWord(e))*10)/10};n.prototype.fleschKincaidGradeLevel=function(e){e=e?t(e):this.text;return Math.round((.39*this.averageWordsPerSentence(e)+11.8*this.averageSyllablesPerWord(e)-15.59)*10)/10};n.prototype.gunningFogScore=function(e){e=e?t(e):this.text;"
code = code + "return Math.round((this.averageWordsPerSentence(e)+this.percentageWordsWithThreeSyllables(e,false))*.4*10)/10};n.prototype.colemanLiauIndex=function(e){e=e?t(e):this.text;return Math.round((5.89*(this.letterCount(e)/this.wordCount(e))-.3*(this.sentenceCount(e)/this.wordCount(e))-15.8)*10)/10};n.prototype.smogIndex=function(e){e=e?t(e):this.text;return Math.round(1.043*Math.sqrt(this.wordsWithThreeSyllables(e)*(30/this.sentenceCount(e))+3.1291)*10)/10};n.prototype.automatedReadabilityIndex=function(e){e=e?t(e):this.text;"
code = code + "return Math.round((4.71*(this.letterCount(e)/this.wordCount(e))+.5*(this.wordCount(e)/this.sentenceCount(e))-21.43)*10)/10};n.prototype.textLength=function(e){e=e?t(e):this.text;return e.length};n.prototype.letterCount=function(e){e=e?t(e):this.text;e=e.replace(/[^a-z]+/ig,'');return e.length};n.prototype.sentenceCount=function(e){e=e?t(e):this.text;return e.replace(/[^\.!?]/g,'').length||1};n.prototype.wordCount=function(e){e=e?t(e):this.text;return e.split(/[^a-z0-9]+/i).length||1};n.prototype.averageWordsPerSentence=function(e){e=e?t(e):this.text;"
code = code + "return this.wordCount(e)/this.sentenceCount(e)};n.prototype.averageSyllablesPerWord=function(e){e=e?t(e):this.text;var n=0,r=this.wordCount(e),i=this;e.split(/\s+/).forEach(function(e){n+=i.syllableCount(e)});return(n||1)/(r||1)};n.prototype.wordsWithThreeSyllables=function(e,n){e=e?t(e):this.text;var r=0,i=this;n=n===false?false:true;e.split(/\s+/).forEach(function(e){if(!e.match(/^[A-Z]/)||n){if(i.syllableCount(e)>2)r++}});return r};n.prototype.percentageWordsWithThreeSyllables=function(e,n){e=e?t(e):this.text;return this.wordsWithThreeSyllables(e,n)/this.wordCount(e)*100};n.prototype.syllableCount=function(e){var t=0,n=0,r=0;e=e.toLowerCase().replace(/[^a-z]/g,'');var i={simile:3,forever:3,shoreline:2};if(i.hasOwnProperty(e))return i[e];var s=[/cial/,/tia/,/cius/,/cious/,/giu/,/ion/,/iou/,/sia$/,/[^aeiuoyt]{2,}ed$/,/.ely$/,/[cg]h?e[rsd]?$/,/rved?$/,/[aeiouy][dt]es?$/,/[aeiouy][^aeiouydt]e[rsd]?$/,/^[dr]e[aeiou][^aeiou]+$/,/[aeiouy]rse$/];"
code = code + "var o=[/ia/,/riet/,/dien/,/iu/,/io/,/ii/,/[aeiouym]bl$/,/[aeiou]{3}/,/^mc/,/ism$/,/([^aeiouy])\1l$/,/[^l]lien/,/^coa[dglx]./,/[^gq]ua[^auieo]/,/dnt$/,/uity$/,/ie(r|st)$/];var u=[/^un/,/^fore/,/ly$/,/less$/,/ful$/,/ers?$/,/ings?$/];u.forEach(function(t){if(e.match(t)){e=e.replace(t,'');n++}});r=e.split(/[^aeiouy]+/ig).filter(function(e){return!!e.replace(/\s+/ig,'').length}).length;t=r+n;s.forEach(function(n){if(e.match(n))t--});o.forEach(function(n){if(e.match(n))t++});return t||1};typeof module!='undefined'&&module.exports?module.exports=r:typeof define!='undefined'?define('textstatistics',[],function(){return r}):e.textstatistics=r})(this);"
'code = code + " return textstatistics(s).fleschKincaidReadingEase();" & _
'"return stat.fleschKincaidReadingEase();" & _
code = code + "return textstatistics(teststringtoprocess).fleschKincaidReadingEase();}"
'code = code + "return textstatistics(teststringtoprocess);}"
Dim o As New ScriptControl
o.Language = "JScript"
With o
.AddCode code
Text_Statistics1 = .Run("text_stats", textString)
End With
End Function
I'm getting object doesn't support this property or method - I think its due to the instantiation of Text Statistics.
Do I need to convert the javascript to just be a set of functions?
UPDATE:
Slightly different approach using eval
Function Text_Stat(textString As String, textstat As String)
Dim code As String
code = "(function(e){function t(e){var t=['li','p','h1','h2','h3','h4','h5','h6','dd'];t.forEach(function(t){e=e.replace('</'+t+'>','.')});e=e.replace(/<[^>]+>/g,'').replace(/[,:;()\-]/,' ').replace(/[\.!?]/,'.').replace(/^\s+/,'').replace(/[ ]*(\n|\r\n|\r)[ ]*/,' ').replace(/([\.])[\. ]+/,'.').replace(/[ ]*([\.])/,'. ').replace(/\s+/,' ').replace(/\s+$/,'');e+='.';return e}function r(e){return new n(e)}var n=function(n){this.text=n?t(n):this.text};n.prototype.fleschKincaidReadingEase=function(e){e=e?t(e):this.text;return Math.round((206.835-1.015*this.averageWordsPerSentence(e)-84.6*this.averageSyllablesPerWord(e))*10)/10};n.prototype.fleschKincaidGradeLevel=function(e){e=e?t(e):this.text;" & _
"return Math.round((.39*this.averageWordsPerSentence(e)+11.8*this.averageSyllablesPerWord(e)-15.59)*10)/10};n.prototype.gunningFogScore=function(e){e=e?t(e):this.text;return Math.round((this.averageWordsPerSentence(e)+this.percentageWordsWithThreeSyllables(e,false))*.4*10)/10};n.prototype.colemanLiauIndex=function(e){e=e?t(e):this.text;return Math.round((5.89*(this.letterCount(e)/this.wordCount(e))-.3*(this.sentenceCount(e)/this.wordCount(e))-15.8)*10)/10};n.prototype.smogIndex=function(e){e=e?t(e):this.text;return Math.round(1.043*Math.sqrt(this.wordsWithThreeSyllables(e)*(30/this.sentenceCount(e))+3.1291)*10)/10};n.prototype.automatedReadabilityIndex=function(e){e=e?t(e):this.text;" & _
"return Math.round((4.71*(this.letterCount(e)/this.wordCount(e))+.5*(this.wordCount(e)/this.sentenceCount(e))-21.43)*10)/10};n.prototype.textLength=function(e){e=e?t(e):this.text;return e.length};n.prototype.letterCount=function(e){e=e?t(e):this.text;e=e.replace(/[^a-z]+/ig,'');return e.length};n.prototype.sentenceCount=function(e){e=e?t(e):this.text;return e.replace(/[^\.!?]/g,'').length||1};n.prototype.wordCount=function(e){e=e?t(e):this.text;return e.split(/[^a-z0-9]+/i).length||1};n.prototype.averageWordsPerSentence=function(e){e=e?t(e):this.text;return this.wordCount(e)/this.sentenceCount(e)};n.prototype.averageSyllablesPerWord=function(e){e=e?t(e):this.text;var n=0,r=this.wordCount(e),i=this;e.split(/\s+/).forEach(function(e){n+=i.syllableCount(e)});return(n||1)/(r||1)};n.prototype.wordsWithThreeSyllables=function(e,n){e=e?t(e):this.text;" & _
"var r=0,i=this;n=n===false?false:true;e.split(/\s+/).forEach(function(e){if(!e.match(/^[A-Z]/)||n){if(i.syllableCount(e)>2)r++}});return r};n.prototype.percentageWordsWithThreeSyllables=function(e,n){e=e?t(e):this.text;" & _
"return this.wordsWithThreeSyllables(e,n)/this.wordCount(e)*100};n.prototype.syllableCount=function(e){var t=0,n=0,r=0;e=e.toLowerCase().replace(/[^a-z]/g,'');var i={simile:3,forever:3,shoreline:2};if(i.hasOwnProperty(e))return i[e];var s=[/cial/,/tia/,/cius/,/cious/,/giu/,/ion/,/iou/,/sia$/,/[^aeiuoyt]{2,}ed$/,/.ely$/,/[cg]h?e[rsd]?$/,/rved?$/,/[aeiouy][dt]es?$/,/[aeiouy][^aeiouydt]e[rsd]?$/,/^[dr]e[aeiou][^aeiou]+$/,/[aeiouy]rse$/];var o=[/ia/,/riet/,/dien/,/iu/,/io/,/ii/,/[aeiouym]bl$/,/[aeiou]{3}/,/^mc/,/ism$/,/([^aeiouy])\1l$/,/[^l]lien/,/^coa[dglx]./,/[^gq]ua[^auieo]/,/dnt$/,/uity$/,/ie(r|st)$/];" & _
"var u=[/^un/,/^fore/,/ly$/,/less$/,/ful$/,/ers?$/,/ings?$/];u.forEach(function(t){if(e.match(t)){e=e.replace(t,'');n++}});r=e.split(/[^aeiouy]+/ig).filter(function(e){return!!e.replace(/\s+/ig,'').length}).length;t=r+n;s.forEach(function(n){if(e.match(n))t--});o.forEach(function(n){if(e.match(n))t++});return t||1};typeof module!='undefined'&&module.exports?module.exports=r:typeof define!='undefined'?define('textstatistics',[],function(){return r}):e.textstatistics=r})(this);" & _
"var stat = new textstatistics('Your text here');alert(stat.sentenceCount('This. dfgdfg. is. a. long. sentence.'));"
Dim o As New ScriptControl
o.Language = "JScript"
With o
.AllowUI = True
.AddCode code
.Eval "stat.sentenceCount('This. dfgdfg. is. a. long. sentence.')"
'result = .Eval(code)
'Debug.Print .Eval("'Hello World'.substring(1, 4);")
'result = .Eval(result)
'Text_Stat = .Run(result)
End With
End Function
JSFiddle shows it working here http://jsfiddle.net/hwr26dkf/
UPDATE: 01/10/2014
Final Working VBA version thanks to Michael Petch
Function Text_Statistics(statType As Integer, textString As String)
Dim wc, sc As Integer
Dim s1, s2, code As String
Dim oTextStats As Object
Dim o As New ScriptControl
code = "function cleanText(e){var t=['li','p','h1','h2','h3','h4','h5','h6','dd'];t.forEach(function(t){e=e.replace('</'+t+'>','.')});e=e.replace(/<[^>]+>/g,'').replace(/[,:;()\-]/,' ').replace(/[\.!?]/,'.').replace(/^\s+/,'').replace(/[ ]*(\n|\r\n|\r)[ ]*/,' ').replace(/([\.])[\. ]+/,'.').replace(/[ ]*([\.])/,'. ').replace(/\s+/,' ').replace(/\s+$/,'');e+='.';return e}function textStatistics(e){return new TextStatistics(e)}if(!Array.prototype.forEach){Array.prototype.forEach=function(e){var t=this.length;" & _
"if(typeof e!='function')throw new TypeError;var n=arguments[1];for(var r=0;r<t;r++){if(r in this)e.call(n,this[r],r,this)}}}if(!Array.prototype.filter){Array.prototype.filter=function(e){'use strict';if(this===void 0||this===null){throw new TypeError}var t=Object(this);var n=t.length>>>0;if(typeof e!=='function'){throw new TypeError}var r=[];var i=arguments.length>=2?arguments[1]:void 0;for(var s=0;s<n;s++){if(s in t){var o=t[s];if(e.call(i,o,s,t)){r.push(o)}}}return r}}var TextStatistics=function(t){this.text=t?cleanText(t):this.text};TextStatistics.prototype.fleschKincaidReadingEase=function(e){e=e?cleanText(e):this.text;return Math.round((206.835-1.015*this.averageWordsPerSentence(e)-84.6*this.averageSyllablesPerWord(e))*10)/10};TextStatistics.prototype.fleschKincaidGradeLevel=function(e){e=e?cleanText(e):this.text;" & _
"return Math.round((.39*this.averageWordsPerSentence(e)+11.8*this.averageSyllablesPerWord(e)-15.59)*10)/10};TextStatistics.prototype.gunningFogScore=function(e){e=e?cleanText(e):this.text;return Math.round((this.averageWordsPerSentence(e)+this.percentageWordsWithThreeSyllables(e,false))*.4*10)/10};TextStatistics.prototype.colemanLiauIndex=function(e){e=e?cleanText(e):this.text;return Math.round((5.89*(this.letterCount(e)/this.wordCount(e))-.3*(this.sentenceCount(e)/this.wordCount(e))-15.8)*10)/10};" & _
"TextStatistics.prototype.smogIndex=function(e){e=e?cleanText(e):this.text;return Math.round(1.043*Math.sqrt(this.wordsWithThreeSyllables(e)*(30/this.sentenceCount(e))+3.1291)*10)/10};TextStatistics.prototype.automatedReadabilityIndex=function(e){e=e?cleanText(e):this.text;return Math.round((4.71*(this.letterCount(e)/this.wordCount(e))+.5*(this.wordCount(e)/this.sentenceCount(e))-21.43)*10)/10};TextStatistics.prototype.textLength=function(e){e=e?cleanText(e):this.text;return e.length};TextStatistics.prototype.letterCount=function(e){e=e?cleanText(e):this.text;e=e.replace(/[^a-z]+/ig,'');return e.length};TextStatistics.prototype.sentenceCount=function(e){e=e?cleanText(e):this.text;" & _
"return e.replace(/[^\.!?]/g,'').length||1};TextStatistics.prototype.wordCount=function(e){e=e?cleanText(e):this.text;return e.split(/[^a-z0-9]+/i).length||1};TextStatistics.prototype.averageWordsPerSentence=function(e){e=e?cleanText(e):this.text;return this.wordCount(e)/this.sentenceCount(e)};TextStatistics.prototype.averageSyllablesPerWord=function(e){e=e?cleanText(e):this.text;" & _
"var t=0,n=this.wordCount(e),r=this;e.split(/\s+/).forEach(function(e){t+=r.syllableCount(e)});return(t||1)/(n||1)};TextStatistics.prototype.wordsWithThreeSyllables=function(e,t){e=e?cleanText(e):this.text;var n=0,r=this;t=t===false?false:true;e.split(/\s+/).forEach(function(e){if(!e.match(/^[A-Z]/)||t){if(r.syllableCount(e)>2)n++}});return n};TextStatistics.prototype.percentageWordsWithThreeSyllables=function(e,t){e=e?cleanText(e):this.text;return this.wordsWithThreeSyllables(e,t)/this.wordCount(e)*100};" & _
"TextStatistics.prototype.syllableCount=function(e){var t=0,n=0,r=0;e=e.toLowerCase().replace(/[^a-z]/g,'');var i={simile:3,forever:3,shoreline:2};if(i.hasOwnProperty(e))return i[e];var s=[/cial/,/tia/,/cius/,/cious/,/giu/,/ion/,/iou/,/sia$/,/[^aeiuoyt]{2,}ed$/,/.ely$/,/[cg]h?e[rsd]?$/,/rved?$/,/[aeiouy][dt]es?$/,/[aeiouy][^aeiouydt]e[rsd]?$/,/^[dr]e[aeiou][^aeiou]+$/,/[aeiouy]rse$/];var o=[/ia/,/riet/,/dien/,/iu/,/io/,/ii/,/[aeiouym]bl$/,/[aeiou]{3}/,/^mc/,/ism$/,/([^aeiouy])\1l$/,/[^l]lien/,/^coa[dglx]./,/[^gq]ua[^auieo]/,/dnt$/,/uity$/,/ie(r|st)$/];" & _
"var u=[/^un/,/^fore/,/ly$/,/less$/,/ful$/,/ers?$/,/ings?$/];u.forEach(function(t){if(e.match(t)){e=e.replace(t,'');n++}});r=e.split(/[^aeiouy]+/ig).filter(function(e){return!!e.replace(/\s+/ig,'').length}).length;t=r+n;s.forEach(function(n){if(e.match(n))t--});o.forEach(function(n){if(e.match(n))t++});return t||1}"
With o
.Language = "JScript"
.AddCode code
' Create a TextStatistics object initially with no text.
' textStatistics is a function that creates TextStatistics objects
Set oTextStats = .Eval("textStatistics()")
' Now simply call TextStatistics methods directly
wc = oTextStats.averageWordsPerSentence(textString)
sc = oTextStats.syllableCount(textString)
' Alternatively you can create a TextStatistics object with the text
' and call the methods with a blank string to return the values
' for the string passed in the constructor
'Set oTextStats = .Eval("textStatistics('" + textString + "')")
'wc = oTextStats.wordCount("")
'sc = oTextStats.sentenceCount("")
Select Case statType
Case 1
Text_Statistics = oTextStats.wordCount(textString)
Case 2
Text_Statistics = oTextStats.sentenceCount(textString)
Case 3
Text_Statistics = oTextStats.fleschKincaidReadingEase(textString)
Case 4
Text_Statistics = oTextStats.fleschKincaidGradeLevel(textString)
Case 5
Text_Statistics = oTextStats.gunningFogScore(textString)
Case 6
Text_Statistics = oTextStats.colemanLiauIndex(textString)
Case 7
Text_Statistics = oTextStats.smogIndex(textString)
Case 8
Text_Statistics = oTextStats.automatedReadabilityIndex(textString)
Case 9
Text_Statistics = oTextStats.textLength(textString)
Case 10
Text_Statistics = oTextStats.letterCount(textString)
Case 11
Text_Statistics = oTextStats.averageWordsPerSentence(textString)
Case 12
Text_Statistics = oTextStats.averageSyllablesPerWord(textString)
End Select
End With
End Function
I spent some time this afternoon learning Javascript and then trying to figure out what is going on with your TextStatistics class when run in Microsoft's ScriptControl object. Rather than start with the mangled code in VBA I went back to the code in github that the OP referenced. The first thing I discovered is that ScriptControl will parse and execute the anonymous global function however once that code is added by ScriptControl it seems to lose track of the TextStatistics object. So the first thing I did was simply remove the anonymous global function by removing this at the top:
(function(glob) {
and removing these line at the bottom:
(typeof module != "undefined" && module.exports) ? (module.exports = textStatistics) : (typeof define != "undefined" ? (define("textstatistics", [], function() { return textStatistics; })) : (glob.textstatistics = textStatistics));
})(this);
Once I removed that as a source of problems I discovered that I could create new instances of TextStatistics but I could not assign text to them properly. Neither as a parameter using new or through calling a method like sentenceCount(). This had me perplexed. Since creating an instance of TextStatistics couldn't be done properly I decided to review the constructor. It is simple but it called cleanText. One thing that stood out at me was the forEach. On a hunch I did some digging about Javascript / ScriptControl and then forEach. I learned that ScriptControl uses ECMAScript and not Javascript. Once I had that straight I found a link with this information which includes this comment: about the forEach method:
This method is a JavaScript extension to the ECMA-262 standard; as such it may not be present in other implementations of the standard. To make it work you need to add following code at the top of your script:
And this code:
if (!Array.prototype.forEach) {
Array.prototype.forEach = function (fun /*, thisp*/ ) {
var len = this.length;
if (typeof fun != 'function') throw new TypeError();
var thisp = arguments[1];
for (var i = 0; i < len; i++) {
if (i in this) fun.call(thisp, this[i], i, this);
}
};
}
After providing my original answer, the OP discovered that functions involving syllables didn't work. There is another function that showed up in a later ECMA specification that ScriptControl didn't support. That was the polyfill filter function on arrays. According to this Mozilla documentation:
filter was added to the ECMA-262 standard in the 5th edition; as such it may not be present in all implementations of the standard. You can work around this by inserting the following code at the beginning of your scripts, allowing use of filter in ECMA-262 implementations which do not natively support it.
The code provided that meets the specification:
if (!Array.prototype.filter) {
Array.prototype.filter = function(fun/*, thisArg*/) {
'use strict';
if (this === void 0 || this === null) {
throw new TypeError();
}
var t = Object(this);
var len = t.length >>> 0;
if (typeof fun !== 'function') {
throw new TypeError();
}
var res = [];
var thisArg = arguments.length >= 2 ? arguments[1] : void 0;
for (var i = 0; i < len; i++) {
if (i in t) {
var val = t[i];
// NOTE: Technically this should Object.defineProperty at
// the next index, as push can be affected by
// properties on Object.prototype and Array.prototype.
// But that method's new, and collisions should be
// rare, so use the more-compatible alternative.
if (fun.call(thisArg, val, i, t)) {
res.push(val);
}
}
}
return res;
};
}
Was it so simple? Was this the cause of those problems? Yes it was. I added that code to the top of the script and VBA and ScriptControl were content. So before minifying and converting all " to ' the Javascript code in its entirety looks like this:
if (!Array.prototype.forEach) {
Array.prototype.forEach = function (fun /*, thisp*/ ) {
var len = this.length;
if (typeof fun != 'function') throw new TypeError();
var thisp = arguments[1];
for (var i = 0; i < len; i++) {
if (i in this) fun.call(thisp, this[i], i, this);
}
};
}
if (!Array.prototype.filter) {
Array.prototype.filter = function(fun/*, thisArg*/) {
'use strict';
if (this === void 0 || this === null) {
throw new TypeError();
}
var t = Object(this);
var len = t.length >>> 0;
if (typeof fun !== 'function') {
throw new TypeError();
}
var res = [];
var thisArg = arguments.length >= 2 ? arguments[1] : void 0;
for (var i = 0; i < len; i++) {
if (i in t) {
var val = t[i];
// NOTE: Technically this should Object.defineProperty at
// the next index, as push can be affected by
// properties on Object.prototype and Array.prototype.
// But that method's new, and collisions should be
// rare, so use the more-compatible alternative.
if (fun.call(thisArg, val, i, t)) {
res.push(val);
}
}
}
return res;
};
}
function cleanText(text) {
// all these tags should be preceeded by a full stop.
var fullStopTags = ['li', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd'];
fullStopTags.forEach(function (tag) {
text = text.replace('</' + tag + '>', '.');
});
text = text.replace(/<[^>]+>/g, '') // Strip tags
.replace(/[,:;()\-]/, ' ') // Replace commans, hyphens etc (count them as spaces)
.replace(/[\.!?]/, '.') // Unify terminators
.replace(/^\s+/, '') // Strip leading whitespace
.replace(/[ ]*(\n|\r\n|\r)[ ]*/, ' ') // Replace new lines with spaces
.replace(/([\.])[\. ]+/, '.') // Check for duplicated terminators
.replace(/[ ]*([\.])/, '. ') // Pad sentence terminators
.replace(/\s+/, ' ') // Remove multiple spaces
.replace(/\s+$/, ''); // Strip trailing whitespace
text += '.'; // Add final terminator, just in case it's missing.
return text;
}
var TextStatistics = function TextStatistics(text) {
this.text = text ? cleanText(text) : this.text;
};
TextStatistics.prototype.fleschKincaidReadingEase = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round((206.835 - (1.015 * this.averageWordsPerSentence(text)) - (84.6 * this.averageSyllablesPerWord(text))) * 10) / 10;
};
TextStatistics.prototype.fleschKincaidGradeLevel = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round(((0.39 * this.averageWordsPerSentence(text)) + (11.8 * this.averageSyllablesPerWord(text)) - 15.59) * 10) / 10;
};
TextStatistics.prototype.gunningFogScore = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round(((this.averageWordsPerSentence(text) + this.percentageWordsWithThreeSyllables(text, false)) * 0.4) * 10) / 10;
};
TextStatistics.prototype.colemanLiauIndex = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round(((5.89 * (this.letterCount(text) / this.wordCount(text))) - (0.3 * (this.sentenceCount(text) / this.wordCount(text))) - 15.8) * 10) / 10;
};
TextStatistics.prototype.smogIndex = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round(1.043 * Math.sqrt((this.wordsWithThreeSyllables(text) * (30 / this.sentenceCount(text))) + 3.1291) * 10) / 10;
};
TextStatistics.prototype.automatedReadabilityIndex = function (text) {
text = text ? cleanText(text) : this.text;
return Math.round(((4.71 * (this.letterCount(text) / this.wordCount(text))) + (0.5 * (this.wordCount(text) / this.sentenceCount(text))) - 21.43) * 10) / 10;
};
TextStatistics.prototype.textLength = function (text) {
text = text ? cleanText(text) : this.text;
return text.length;
};
TextStatistics.prototype.letterCount = function (text) {
text = text ? cleanText(text) : this.text;
text = text.replace(/[^a-z]+/ig, '');
return text.length;
};
TextStatistics.prototype.sentenceCount = function (text) {
text = text ? cleanText(text) : this.text;
// Will be tripped up by 'Mr.' or 'U.K.'. Not a major concern at this point.
return text.replace(/[^\.!?]/g, '').length || 1;
};
TextStatistics.prototype.wordCount = function (text) {
text = text ? cleanText(text) : this.text;
return text.split(/[^a-z0-9]+/i).length || 1;
};
TextStatistics.prototype.averageWordsPerSentence = function (text) {
text = text ? cleanText(text) : this.text;
return this.wordCount(text) / this.sentenceCount(text);
};
TextStatistics.prototype.averageSyllablesPerWord = function (text) {
text = text ? cleanText(text) : this.text;
var syllableCount = 0,
wordCount = this.wordCount(text),
self = this;
text.split(/\s+/).forEach(function (word) {
syllableCount += self.syllableCount(word);
});
// Prevent NaN...
return (syllableCount || 1) / (wordCount || 1);
};
TextStatistics.prototype.wordsWithThreeSyllables = function (text, countProperNouns) {
text = text ? cleanText(text) : this.text;
var longWordCount = 0,
self = this;
countProperNouns = countProperNouns === false ? false : true;
text.split(/\s+/).forEach(function (word) {
// We don't count proper nouns or capitalised words if the countProperNouns attribute is set.
// Defaults to true.
if (!word.match(/^[A-Z]/) || countProperNouns) {
if (self.syllableCount(word) > 2) longWordCount++;
}
});
return longWordCount;
};
TextStatistics.prototype.percentageWordsWithThreeSyllables = function (text, countProperNouns) {
text = text ? cleanText(text) : this.text;
return (this.wordsWithThreeSyllables(text, countProperNouns) / this.wordCount(text)) * 100;
};
TextStatistics.prototype.syllableCount = function (word) {
var syllableCount = 0,
prefixSuffixCount = 0,
wordPartCount = 0;
// Prepare word - make lower case and remove non-word characters
word = word.toLowerCase().replace(/[^a-z]/g, '');
// Specific common exceptions that don't follow the rule set below are handled individually
// Array of problem words (with word as key, syllable count as value)
var problemWords = {
'simile': 3,
'forever': 3,
'shoreline': 2
};
// Return if we've hit one of those...
if (problemWords.hasOwnProperty(word)) return problemWords[word];
// These syllables would be counted as two but should be one
var subSyllables = [
/cial/,
/tia/,
/cius/,
/cious/,
/giu/,
/ion/,
/iou/,
/sia$/,
/[^aeiuoyt]{2,}ed$/,
/.ely$/,
/[cg]h?e[rsd]?$/,
/rved?$/,
/[aeiouy][dt]es?$/,
/[aeiouy][^aeiouydt]e[rsd]?$/,
/^[dr]e[aeiou][^aeiou]+$/, // Sorts out deal, deign etc
/[aeiouy]rse$/ // Purse, hearse
];
// These syllables would be counted as one but should be two
var addSyllables = [
/ia/,
/riet/,
/dien/,
/iu/,
/io/,
/ii/,
/[aeiouym]bl$/,
/[aeiou]{3}/,
/^mc/,
/ism$/,
/([^aeiouy])\1l$/,
/[^l]lien/,
/^coa[dglx]./,
/[^gq]ua[^auieo]/,
/dnt$/,
/uity$/,
/ie(r|st)$/];
// Single syllable prefixes and suffixes
var prefixSuffix = [
/^un/,
/^fore/,
/ly$/,
/less$/,
/ful$/,
/ers?$/,
/ings?$/];
// Remove prefixes and suffixes and count how many were taken
prefixSuffix.forEach(function (regex) {
if (word.match(regex)) {
word = word.replace(regex, '');
prefixSuffixCount++;
}
});
wordPartCount = word.split(/[^aeiouy]+/ig)
.filter(function (wordPart) {
return !!wordPart.replace(/\s+/ig, '').length;
})
.length;
// Get preliminary syllable count...
syllableCount = wordPartCount + prefixSuffixCount;
// Some syllables do not follow normal rules - check for them
subSyllables.forEach(function (syllable) {
if (word.match(syllable)) syllableCount--;
});
addSyllables.forEach(function (syllable) {
if (word.match(syllable)) syllableCount++;
});
return syllableCount || 1;
};
function textStatistics(text) {
return new TextStatistics(text);
}
After taking this function and adding it to the code variable (See OP's Visual Basic code) I was able to create an instance of this control and call methods on it. There are a couple of different ways to use TextStatistics in VBA:
Dim wc, sc As Integer
Dim s1, s2, code As String
Dim oTextStats As Object
Dim o As New ScriptControl
code = "function cleanText(e){var t=['li','p','h1','h2','h3','h4','h5','h6','dd'];t.forEach(function(t){e=e.replace('</'+t+'>','.')});e=e.replace(/<[^>]+>/g,'').replace(/[,:;()\-]/,' ').replace(/[\.!?]/,'.').replace(/^\s+/,'').replace(/[ ]*(\n|\r\n|\r)[ ]*/,' ').replace(/([\.])[\. ]+/,'.').replace(/[ ]*([\.])/,'. ').replace(/\s+/,' ').replace(/\s+$/,'');e+='.';return e}function textStatistics(e){return new TextStatistics(e)}if(!Array.prototype.filter){Array.prototype.filter=function(e){'use strict';if(this===void 0||this===null){throw new TypeError}var t=Object(this);" & _
"var n=t.length>>>0;if(typeof e!=='function'){throw new TypeError}var r=[];var i=arguments.length>=2?arguments[1]:void 0;for(var s=0;s<n;s++){if(s in t){var o=t[s];if(e.call(i,o,s,t)){r.push(o)}}}return r}}if(!Array.prototype.forEach){Array.prototype.forEach=function(e){var t=this.length;if(typeof e!='function')throw new TypeError;var n=arguments[1];for(var r=0;r<t;r++){if(r in this)e.call(n,this[r],r,this)}}}var TextStatistics=function(t){this.text=t?cleanText(t):this.text};" & _
"TextStatistics.prototype.fleschKincaidReadingEase=function(e){e=e?cleanText(e):this.text;return Math.round((206.835-1.015*this.averageWordsPerSentence(e)-84.6*this.averageSyllablesPerWord(e))*10)/10};TextStatistics.prototype.fleschKincaidGradeLevel=function(e){e=e?cleanText(e):this.text;return Math.round((.39*this.averageWordsPerSentence(e)+11.8*this.averageSyllablesPerWord(e)-15.59)*10)/10};TextStatistics.prototype.gunningFogScore=function(e){e=e?cleanText(e):this.text;" & _
"return Math.round((this.averageWordsPerSentence(e)+this.percentageWordsWithThreeSyllables(e,false))*.4*10)/10};TextStatistics.prototype.colemanLiauIndex=function(e){e=e?cleanText(e):this.text;return Math.round((5.89*(this.letterCount(e)/this.wordCount(e))-.3*(this.sentenceCount(e)/this.wordCount(e))-15.8)*10)/10};TextStatistics.prototype.smogIndex=function(e){e=e?cleanText(e):this.text;return Math.round(1.043*Math.sqrt(this.wordsWithThreeSyllables(e)*(30/this.sentenceCount(e))+3.1291)*10)/10};" & _
"TextStatistics.prototype.automatedReadabilityIndex=function(e){e=e?cleanText(e):this.text;return Math.round((4.71*(this.letterCount(e)/this.wordCount(e))+.5*(this.wordCount(e)/this.sentenceCount(e))-21.43)*10)/10};TextStatistics.prototype.textLength=function(e){e=e?cleanText(e):this.text;return e.length};TextStatistics.prototype.letterCount=function(e){e=e?cleanText(e):this.text;e=e.replace(/[^a-z]+/ig,'');return e.length};TextStatistics.prototype.sentenceCount=function(e){e=e?cleanText(e):this.text;" & _
"return e.replace(/[^\.!?]/g,'').length||1};TextStatistics.prototype.wordCount=function(e){e=e?cleanText(e):this.text;return e.split(/[^a-z0-9]+/i).length||1};TextStatistics.prototype.averageWordsPerSentence=function(e){e=e?cleanText(e):this.text;return this.wordCount(e)/this.sentenceCount(e)};TextStatistics.prototype.averageSyllablesPerWord=function(e){e=e?cleanText(e):this.text;var t=0,n=this.wordCount(e),r=this;e.split(/\s+/).forEach(function(e){t+=r.syllableCount(e)});return(t||1)/(n||1)};" & _
"TextStatistics.prototype.wordsWithThreeSyllables=function(e,t){e=e?cleanText(e):this.text;var n=0,r=this;t=t===false?false:true;e.split(/\s+/).forEach(function(e){if(!e.match(/^[A-Z]/)||t){if(r.syllableCount(e)>2)n++}});return n};TextStatistics.prototype.percentageWordsWithThreeSyllables=function(e,t){e=e?cleanText(e):this.text;return this.wordsWithThreeSyllables(e,t)/this.wordCount(e)*100};TextStatistics.prototype.syllableCount=function(e){var t=0,n=0,r=0;e=e.toLowerCase().replace(/[^a-z]/g,'');" & _
"var i={simile:3,forever:3,shoreline:2};if(i.hasOwnProperty(e))return i[e];var s=[/cial/,/tia/,/cius/,/cious/,/giu/,/ion/,/iou/,/sia$/,/[^aeiuoyt]{2,}ed$/,/.ely$/,/[cg]h?e[rsd]?$/,/rved?$/,/[aeiouy][dt]es?$/,/[aeiouy][^aeiouydt]e[rsd]?$/,/^[dr]e[aeiou][^aeiou]+$/,/[aeiouy]rse$/];var o=[/ia/,/riet/,/dien/,/iu/,/io/,/ii/,/[aeiouym]bl$/,/[aeiou]{3}/,/^mc/,/ism$/,/([^aeiouy])\1l$/,/[^l]lien/,/^coa[dglx]./,/[^gq]ua[^auieo]/,/dnt$/,/uity$/,/ie(r|st)$/];var u=[/^un/,/^fore/,/ly$/,/less$/,/ful$/,/ers?$/,/ings?$/];" & _
"u.forEach(function(t){if(e.match(t)){e=e.replace(t,'');n++}});r=e.split(/[^aeiouy]+/ig).filter(function(e){return!!e.replace(/\s+/ig,'').length}).length;t=r+n;s.forEach(function(n){if(e.match(n))t--});o.forEach(function(n){if(e.match(n))t++});return t||1}"
s1 = "the quick brown fox jumps over the lazy dog"
s2 = "help me! Some Short sentence fragments. Just a test"
With o
.Language = "JScript"
.AddCode code
' Create a TextStatistics object initially with no text.
' textStatistics is a function that creates TextStatistics objects
Set oTextStats = .Eval("textStatistics()")
' Now simply call TextStatistics methods directly
wc = oTextStats.wordCount(s1)
sc = oTextStats.sentenceCount(s2)
' Alternatively you can create a TextStatistics object with the text
' and call the methods with a blank string to return the values
' for the string passed in the constructor
Set oTextStats = .Eval("textStatistics('" + s1 + "')")
wc = oTextStats.wordCount("")
sc = oTextStats.sentenceCount("")
End With
Related
I have this string:
Table {is {red|blue|orange|white|green-{yellow|black}} |has {{twenty|thirty}two|{{two hundered and |three hundered and }fourty |fifty }three|four} legs} and is placed { in corner | in the middle } of office and {printer|phone} is {{gray-|}black|white}.
I want some data structure that I would be able to work with it, Can you suggest something?
This is my try:
var matches = $scope.fileContent.match(/{([^}]+)}/g);
for (var i = 0; i < matches.length; i++) {
console.log(matches[i]);
}
I want random sentences
Possible results:
- Table is blue and is placed in corner of office and printer is black.
- Table has three hundered and fourty three legs and is placed in the middle of office and phone is gray-black.
A grammar for this sort of sentence structure would be:
SENTENCE := PARTIAL optional SENTENCE
PARTIAL := TEXT or BRANCH
BRANCH := '{' SENTENCE ALTERNATIVES '}'
ALTERNATIVES := '|' SENTENCE optional ALTERNATIVES
Maybe I could have used clearer names for the different stages, but you get the point. Any of your sentences can be broken down using the rules of this grammar. After parsing, you will get your sentence in a tree structure.
Once you have your string parsed into this tree structure, you can traverse it, and pick randomly as to which branch you take. An example in JavaScript:
var string = "A table {is {red|blue|green}|has {four|five} legs}"
var index = 0
var root = new Node()
var current = root
function Node() {
this.text = ""
this.options = []
this.next = null
}
Node.prototype.toString = function(){
var string = this.text;
if (this.options.length > 0) {
var rnd = Math.floor(Math.random() * this.options.length)
string += this.options[rnd].toString()
}
if (this.next != null)
string += this.next.toString()
return string
}
function parse() {
text()
if (index == string.length)
return
if (string[index] == "{") {
index++
options()
var next = new Node()
current.next = next
current = next
parse()
}
}
function options() {
var parent = current
while(true) {
current = new Node()
parent.options.push(current)
parse()
index++
if (string[index - 1] != '|')
break
}
current = new Node()
parent.next = current
}
function text() {
while (index < string.length && "{|}".indexOf(string[index]) == -1) {
current.text += string[index]
index++
}
}
parse()
alert(root.toString())
Just a heads up - this doesn't handle these strings:
"some text { without a closing curly brace"
"an unexpected | pipe symbol"
"an unexpected } closing curly brace"
I'll let you add that in yourself.
I know that console.log supports at least some of the basic features of printf from C through messing around, but I was curious of a way to take advantage of console.log's implementation to create something similar to sprintf. I know you can't simply use .bind or .apply since console.log doesn't actually return the string, so is there a way around this?
If this isn't actually possible, is there some other little-known native implementation that's only a few lines of code away from achieving sprintf in JavaScript?
For those who do not know what sprintf is exactly, here is some documentation from tutorialspoint. Example usage I'm looking for is below:
var string1 = sprintf("Hello, %s!", "world");
var string2 = sprintf("The answer to everything is %d.", 42);
Keep it simple
var sprintf = (str, ...argv) => !argv.length ? str :
sprintf(str = str.replace(sprintf.token||"$", argv.shift()), ...argv);
Since Javascript handles data types automatically, there is no need for type options.
If you need padding, "15".padStart(5,"0") = ("00000"+15).slice(-5) = "00015".
Usage
var sprintf = (str, ...argv) => !argv.length ? str :
sprintf(str = str.replace(sprintf.token||"$", argv.shift()), ...argv);
alert(sprintf("Success after $ clicks ($ seconds).", 15, 4.569));
sprintf.token = "_";
alert(sprintf("Failure after _ clicks (_ seconds).", 5, 1.569));
sprintf.token = "%";
var a = "%<br>%<br>%";
var b = sprintf("% plus % is %", 0, 1, 0 + 1);
var c = sprintf("Hello, %!", "world");
var d = sprintf("The answer to everything is %.", 42);
document.write(sprintf(a,b,c,d));
Try utilizing eval , .replace
var sprintf = function sprintf() {
// arguments
var args = Array.prototype.slice.call(arguments)
// parameters for string
, n = args.slice(1, -1)
// string
, text = args[0]
// check for `Number`
, _res = isNaN(parseInt(args[args.length - 1]))
? args[args.length - 1]
// alternatively, if string passed
// as last argument to `sprintf`,
// `eval(args[args.length - 1])`
: Number(args[args.length - 1])
// array of replacement values
, arr = n.concat(_res)
// `res`: `text`
, res = text;
// loop `arr` items
for (var i = 0; i < arr.length; i++) {
// replace formatted characters within `res` with `arr` at index `i`
res = res.replace(/%d|%s/, arr[i])
}
// return string `res`
return res
};
document.write(sprintf("%d plus %d is %d", 0, 1, 0 + 1)
+ "<br>"
+ sprintf("Hello, %s!", "world")
+ "<br>"
+ sprintf("The answer to everything is %d.", 42)
);
What's the JavaScript equivalent to this C# Method:
var x = "|f|oo||";
var y = x.Trim('|'); // "f|oo"
C# trims the selected character only at the beginning and end of the string!
One line is enough:
var x = '|f|oo||';
var y = x.replace(/^\|+|\|+$/g, '');
document.write(x + '<br />' + y);
^ beginning of the string
\|+ pipe, one or more times
| or
\|+ pipe, one or more times
$ end of the string
A general solution:
function trim (s, c) {
if (c === "]") c = "\\]";
if (c === "^") c = "\\^";
if (c === "\\") c = "\\\\";
return s.replace(new RegExp(
"^[" + c + "]+|[" + c + "]+$", "g"
), "");
}
chars = ".|]\\^";
for (c of chars) {
s = c + "foo" + c + c + "oo" + c + c + c;
console.log(s, "->", trim(s, c));
}
Parameter c is expected to be a character (a string of length 1).
As mentionned in the comments, it might be useful to support multiple characters, as it's quite common to trim multiple whitespace-like characters for example. To do this, MightyPork suggests to replace the ifs with the following line of code:
c = c.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&');
This part [-/\\^$*+?.()|[\]{}] is a set of special characters in regular expression syntax, and $& is a placeholder which stands for the matching character, meaning that the replace function escapes special characters. Try in your browser console:
> "{[hello]}".replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')
"\{\[hello\]\}"
Update: Was curious around the performance of different solutions and so I've updated a basic benchmark here:
https://www.measurethat.net/Benchmarks/Show/12738/0/trimming-leadingtrailing-characters
Some interesting and unexpected results running under Chrome.
https://www.measurethat.net/Benchmarks/ShowResult/182877
+-----------------------------------+-----------------------+
| Test name | Executions per second |
+-----------------------------------+-----------------------+
| Index Version (Jason Larke) | 949979.7 Ops/sec |
| Substring Version (Pho3niX83) | 197548.9 Ops/sec |
| Regex Version (leaf) | 107357.2 Ops/sec |
| Boolean Filter Version (mbaer3000)| 94162.3 Ops/sec |
| Spread Version (Robin F.) | 4242.8 Ops/sec |
+-----------------------------------+-----------------------+
Please note; tests were carried out on only a single test string (with both leading and trailing characters that needed trimming). In addition, this benchmark only gives an indication of raw speed; other factors like memory usage are also important to consider.
If you're dealing with longer strings I believe this should outperform most of the other options by reducing the number of allocated strings to either zero or one:
function trim(str, ch) {
var start = 0,
end = str.length;
while(start < end && str[start] === ch)
++start;
while(end > start && str[end - 1] === ch)
--end;
return (start > 0 || end < str.length) ? str.substring(start, end) : str;
}
// Usage:
trim('|hello|world|', '|'); // => 'hello|world'
Or if you want to trim from a set of multiple characters:
function trimAny(str, chars) {
var start = 0,
end = str.length;
while(start < end && chars.indexOf(str[start]) >= 0)
++start;
while(end > start && chars.indexOf(str[end - 1]) >= 0)
--end;
return (start > 0 || end < str.length) ? str.substring(start, end) : str;
}
// Usage:
trimAny('|hello|world ', [ '|', ' ' ]); // => 'hello|world'
// because '.indexOf' is used, you could also pass a string for the 2nd parameter:
trimAny('|hello| world ', '| '); // => 'hello|world'
EDIT: For fun, trim words (rather than individual characters)
// Helper function to detect if a string contains another string
// at a specific position.
// Equivalent to using `str.indexOf(substr, pos) === pos` but *should* be more efficient on longer strings as it can exit early (needs benchmarks to back this up).
function hasSubstringAt(str, substr, pos) {
var idx = 0, len = substr.length;
for (var max = str.length; idx < len; ++idx) {
if ((pos + idx) >= max || str[pos + idx] != substr[idx])
break;
}
return idx === len;
}
function trimWord(str, word) {
var start = 0,
end = str.length,
len = word.length;
while (start < end && hasSubstringAt(str, word, start))
start += word.length;
while (end > start && hasSubstringAt(str, word, end - len))
end -= word.length
return (start > 0 || end < str.length) ? str.substring(start, end) : str;
}
// Usage:
trimWord('blahrealmessageblah', 'blah');
If I understood well, you want to remove a specific character only if it is at the beginning or at the end of the string (ex: ||fo||oo|||| should become foo||oo). You can create an ad hoc function as follows:
function trimChar(string, charToRemove) {
while(string.charAt(0)==charToRemove) {
string = string.substring(1);
}
while(string.charAt(string.length-1)==charToRemove) {
string = string.substring(0,string.length-1);
}
return string;
}
I tested this function with the code below:
var str = "|f|oo||";
$( "#original" ).html( "Original String: '" + str + "'" );
$( "#trimmed" ).html( "Trimmed: '" + trimChar(str, "|") + "'" );
You can use a regular expression such as:
var x = "|f|oo||";
var y = x.replace(/^\|+|\|+$/g, "");
alert(y); // f|oo
UPDATE:
Should you wish to generalize this into a function, you can do the following:
var escapeRegExp = function(strToEscape) {
// Escape special characters for use in a regular expression
return strToEscape.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
};
var trimChar = function(origString, charToTrim) {
charToTrim = escapeRegExp(charToTrim);
var regEx = new RegExp("^[" + charToTrim + "]+|[" + charToTrim + "]+$", "g");
return origString.replace(regEx, "");
};
var x = "|f|oo||";
var y = trimChar(x, "|");
alert(y); // f|oo
A regex-less version which is easy on the eye:
const trim = (str, chars) => str.split(chars).filter(Boolean).join(chars);
For use cases where we're certain that there's no repetition of the chars off the edges.
to keep this question up to date:
here is an approach i'd choose over the regex function using the ES6 spread operator.
function trimByChar(string, character) {
const first = [...string].findIndex(char => char !== character);
const last = [...string].reverse().findIndex(char => char !== character);
return string.substring(first, string.length - last);
}
Improved version after #fabian 's comment (can handle strings containing the same character only)
function trimByChar1(string, character) {
const arr = Array.from(string);
const first = arr.findIndex(char => char !== character);
const last = arr.reverse().findIndex(char => char !== character);
return (first === -1 && last === -1) ? '' : string.substring(first, string.length - last);
}
This can trim several characters at a time:
function trimChars (str, c) {
var re = new RegExp("^[" + c + "]+|[" + c + "]+$", "g");
return str.replace(re,"");
}
var x = "|f|oo||";
x = trimChars(x, '|'); // f|oo
var y = "..++|f|oo||++..";
y = trimChars(y, '|.+'); // f|oo
var z = "\\f|oo\\"; // \f|oo\
// For backslash, remember to double-escape:
z = trimChars(z, "\\\\"); // f|oo
For use in your own script and if you don't mind changing the prototype, this can be a convenient "hack":
String.prototype.trimChars = function (c) {
var re = new RegExp("^[" + c + "]+|[" + c + "]+$", "g");
return this.replace(re,"");
}
var x = "|f|oo||";
x = x.trimChars('|'); // f|oo
Since I use the trimChars function extensively in one of my scripts, I prefer this solution. But there are potential issues with modifying an object's prototype.
If you define these functions in your program, your strings will have an upgraded version of trim that can trim all given characters:
String.prototype.trimLeft = function(charlist) {
if (charlist === undefined)
charlist = "\s";
return this.replace(new RegExp("^[" + charlist + "]+"), "");
};
String.prototype.trim = function(charlist) {
return this.trimLeft(charlist).trimRight(charlist);
};
String.prototype.trimRight = function(charlist) {
if (charlist === undefined)
charlist = "\s";
return this.replace(new RegExp("[" + charlist + "]+$"), "");
};
var withChars = "/-center-/"
var withoutChars = withChars.trim("/-")
document.write(withoutChars)
Source
https://www.sitepoint.com/trimming-strings-in-javascript/
const trim = (str, char) => {
let i = 0;
let j = str.length-1;
while (str[i] === char) i++;
while (str[j] === char) j--;
return str.slice(i,j+1);
}
console.log(trim('|f|oo|', '|')); // f|oo
Non-regex solution.
Two pointers: i (beginning) & j (end).
Only move pointers if they match char and stop when they don't.
Return remaining string.
I would suggest looking at lodash and how they implemented the trim function.
See Lodash Trim for the documentation and the source to see the exact code that does the trimming.
I know this does not provide an exact answer your question, but I think it's good to set a reference to a library on such a question since others might find it useful.
This one trims all leading and trailing delimeters
const trim = (str, delimiter) => {
const pattern = `[^\\${delimiter}]`;
const start = str.search(pattern);
const stop = str.length - str.split('').reverse().join('').search(pattern);
return str.substring(start, stop);
}
const test = '||2|aaaa12bb3ccc|||||';
console.log(trim(test, '|')); // 2|aaaa12bb3ccc
I like the solution from #Pho3niX83...
Let's extend it with "word" instead of "char"...
function trimWord(_string, _word) {
var splitted = _string.split(_word);
while (splitted.length && splitted[0] === "") {
splitted.shift();
}
while (splitted.length && splitted[splitted.length - 1] === "") {
splitted.pop();
}
return splitted.join(_word);
};
The best way to resolve this task is (similar with PHP trim function):
function trim( str, charlist ) {
if ( typeof charlist == 'undefined' ) {
charlist = '\\s';
}
var pattern = '^[' + charlist + ']*(.*?)[' + charlist + ']*$';
return str.replace( new RegExp( pattern ) , '$1' )
}
document.getElementById( 'run' ).onclick = function() {
document.getElementById( 'result' ).value =
trim( document.getElementById( 'input' ).value,
document.getElementById( 'charlist' ).value);
}
<div>
<label for="input">Text to trim:</label><br>
<input id="input" type="text" placeholder="Text to trim" value="dfstextfsd"><br>
<label for="charlist">Charlist:</label><br>
<input id="charlist" type="text" placeholder="Charlist" value="dfs"><br>
<label for="result">Result:</label><br>
<input id="result" type="text" placeholder="Result" disabled><br>
<button type="button" id="run">Trim it!</button>
</div>
P.S.: why i posted my answer, when most people already done it before? Because i found "the best" mistake in all of there answers: all used the '+' meta instead of '*', 'cause trim must remove chars IF THEY ARE IN START AND/OR END, but it return original string in else case.
Another version to use regular expression.
No or(|) used and no global(g) used.
function escapeRegexp(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
function trimSpecific(value, find) {
const find2 = escapeRegexp(find);
return value.replace(new RegExp(`^[${find2}]*(.*?)[${find2}]*$`), '$1')
}
console.log(trimSpecific('"a"b"', '"') === 'a"b');
console.log(trimSpecific('""ab"""', '"') === 'ab');
console.log(trimSpecific('"', '"') === '');
console.log(trimSpecific('"a', '"') === 'a');
console.log(trimSpecific('a"', '"') === 'a');
console.log(trimSpecific('[a]', '[]') === 'a');
console.log(trimSpecific('{[a]}', '[{}]') === 'a');
expanding on #leaf 's answer, here's one that can take multiple characters:
var trim = function (s, t) {
var tr, sr
tr = t.split('').map(e => `\\\\${e}`).join('')
sr = s.replace(new RegExp(`^[${tr}]+|[${tr}]+$`, 'g'), '')
return sr
}
function trim(text, val) {
return text.replace(new RegExp('^'+val+'+|'+val+'+$','g'), '');
}
"|Howdy".replace(new RegExp("^\\|"),"");
(note the double escaping. \\ needed, to have an actually single slash in the string, that then leads to escaping of | in the regExp).
Only few characters need regExp-Escaping., among them the pipe operator.
const special = ':;"<>?/!`~##$%^&*()+=-_ '.split("");
const trim = (input) => {
const inTrim = (str) => {
const spStr = str.split("");
let deleteTill = 0;
let startChar = spStr[deleteTill];
while (special.some((s) => s === startChar)) {
deleteTill++;
if (deleteTill <= spStr.length) {
startChar = spStr[deleteTill];
} else {
deleteTill--;
break;
}
}
spStr.splice(0, deleteTill);
return spStr.join("");
};
input = inTrim(input);
input = inTrim(input.split("").reverse().join("")).split("").reverse().join("");
return input;
};
alert(trim('##This is what I use$%'));
String.prototype.TrimStart = function (n) {
if (this.charAt(0) == n)
return this.substr(1);
};
String.prototype.TrimEnd = function (n) {
if (this.slice(-1) == n)
return this.slice(0, -1);
};
To my knowledge, jQuery doesnt have a built in function the method your are asking about.
With javascript however, you can just use replace to change the content of your string:
x.replace(/|/i, ""));
This will replace all occurences of | with nothing.
try:
console.log(x.replace(/\|/g,''));
Try this method:
var a = "anan güzel mi?";
if (a.endsWith("?")) a = a.slice(0, -1);
document.body.innerHTML = a;
How can I insert a string at a specific index of another string?
var txt1 = "foo baz"
Suppose I want to insert "bar " after the "foo" how can I achieve that?
I thought of substring(), but there must be a simpler more straight forward way.
Inserting at a specific index (rather than, say, at the first space character) has to use string slicing/substring:
var txt2 = txt1.slice(0, 3) + "bar" + txt1.slice(3);
You could prototype your own splice() into String.
Polyfill
if (!String.prototype.splice) {
/**
* {JSDoc}
*
* The splice() method changes the content of a string by removing a range of
* characters and/or adding new characters.
*
* #this {String}
* #param {number} start Index at which to start changing the string.
* #param {number} delCount An integer indicating the number of old chars to remove.
* #param {string} newSubStr The String that is spliced in.
* #return {string} A new string with the spliced substring.
*/
String.prototype.splice = function(start, delCount, newSubStr) {
return this.slice(0, start) + newSubStr + this.slice(start + Math.abs(delCount));
};
}
Example
String.prototype.splice = function(idx, rem, str) {
return this.slice(0, idx) + str + this.slice(idx + Math.abs(rem));
};
var result = "foo baz".splice(4, 0, "bar ");
document.body.innerHTML = result; // "foo bar baz"
EDIT: Modified it to ensure that rem is an absolute value.
Here is a method I wrote that behaves like all other programming languages:
String.prototype.insert = function(index, string) {
if (index > 0)
{
return this.substring(0, index) + string + this.substring(index, this.length);
}
return string + this;
};
//Example of use:
var something = "How you?";
something = something.insert(3, " are");
console.log(something)
Reference:
http://coderamblings.wordpress.com/2012/07/09/insert-a-string-at-a-specific-index/
Just make the following function:
function insert(str, index, value) {
return str.substr(0, index) + value + str.substr(index);
}
and then use it like that:
alert(insert("foo baz", 4, "bar "));
Output: foo bar baz
It behaves exactly, like the C# (Sharp) String.Insert(int startIndex, string value).
NOTE: This insert function inserts the string value (third parameter) before the specified integer index (second parameter) in the string str (first parameter), and then returns the new string without changing str!
UPDATE 2016: Here is another just-for-fun (but more serious!) prototype function based on one-liner RegExp approach (with prepend support on undefined or negative index):
/**
* Insert `what` to string at position `index`.
*/
String.prototype.insert = function(what, index) {
return index > 0
? this.replace(new RegExp('.{' + index + '}'), '$&' + what)
: what + this;
};
console.log( 'foo baz'.insert('bar ', 4) ); // "foo bar baz"
console.log( 'foo baz'.insert('bar ') ); // "bar foo baz"
Previous (back to 2012) just-for-fun solution:
var index = 4,
what = 'bar ';
'foo baz'.replace(/./g, function(v, i) {
return i === index - 1 ? v + what : v;
}); // "foo bar baz"
This is basically doing what #Base33 is doing except I'm also giving the option of using a negative index to count from the end. Kind of like the substr method allows.
// use a negative index to insert relative to the end of the string.
String.prototype.insert = function (index, string) {
var ind = index < 0 ? this.length + index : index;
return this.substring(0, ind) + string + this.substr(ind);
};
Example:
Let's say you have full size images using a naming convention but can't update the data to also provide thumbnail urls.
var url = '/images/myimage.jpg';
var thumb = url.insert(-4, '_thm');
// result: '/images/myimage_thm.jpg'
If anyone is looking for a way to insert text at multiple indices in a string, try this out:
String.prototype.insertTextAtIndices = function(text) {
return this.replace(/./g, function(character, index) {
return text[index] ? text[index] + character : character;
});
};
For example, you can use this to insert <span> tags at certain offsets in a string:
var text = {
6: "<span>",
11: "</span>"
};
"Hello world!".insertTextAtIndices(text); // returns "Hello <span>world</span>!"
Instantiate an array from the string
Use Array#splice
Stringify again using Array#join
The benefits of this approach are two-fold:
Simple
Unicode code point compliant
const pair = Array.from('USDGBP')
pair.splice(3, 0, '/')
console.log(pair.join(''))
Given your current example you could achieve the result by either
var txt2 = txt1.split(' ').join(' bar ')
or
var txt2 = txt1.replace(' ', ' bar ');
but given that you can make such assumptions, you might as well skip directly to Gullen's example.
In a situation where you really can't make any assumptions other than character index-based, then I really would go for a substring solution.
my_string = "hello world";
my_insert = " dear";
my_insert_location = 5;
my_string = my_string.split('');
my_string.splice( my_insert_location , 0, my_insert );
my_string = my_string.join('');
https://jsfiddle.net/gaby_de_wilde/wz69nw9k/
I know this is an old thread, however, here is a really effective approach.
var tn = document.createTextNode("I am just to help")
t.insertData(10, "trying");
What's great about this is that it coerces the node content. So if this node were already on the DOM, you wouldn't need to use any query selectors or update the innerText. The changes would reflect due to its binding.
Were you to need a string, simply access the node's text content property.
tn.textContent
#=> "I am just trying to help"
You can do it easily with regexp in one line of code
const str = 'Hello RegExp!';
const index = 6;
const insert = 'Lovely ';
//'Hello RegExp!'.replace(/^(.{6})(.)/, `$1Lovely $2`);
const res = str.replace(new RegExp(`^(.{${index}})(.)`), `$1${insert}$2`);
console.log(res);
"Hello Lovely RegExp!"
Well, we can use both the substring and slice method.
String.prototype.customSplice = function (index, absIndex, string) {
return this.slice(0, index) + string+ this.slice(index + Math.abs(absIndex));
};
String.prototype.replaceString = function (index, string) {
if (index > 0)
return this.substring(0, index) + string + this.substr(index);
return string + this;
};
console.log('Hello Developers'.customSplice(6,0,'Stack ')) // Hello Stack Developers
console.log('Hello Developers'.replaceString(6,'Stack ')) //// Hello Stack Developers
The only problem of a substring method is that it won't work with a negative index. It's always take string index from 0th position.
You can use Regular Expressions with a dynamic pattern.
var text = "something";
var output = " ";
var pattern = new RegExp("^\\s{"+text.length+"}");
var output.replace(pattern,text);
outputs:
"something "
This replaces text.length of whitespace characters at the beginning of the string output.
The RegExp means ^\ - beginning of a line \s any white space character, repeated {n} times, in this case text.length. Use \\ to \ escape backslashes when building this kind of patterns out of strings.
another solution, cut the string in 2 and put a string in between.
var str = jQuery('#selector').text();
var strlength = str.length;
strf = str.substr(0 , strlength - 5);
strb = str.substr(strlength - 5 , 5);
jQuery('#selector').html(strf + 'inserted' + strb);
Using slice
You can use slice(0,index) + str + slice(index). Or you can create a method for it.
String.prototype.insertAt = function(index,str){
return this.slice(0,index) + str + this.slice(index)
}
console.log("foo bar".insertAt(4,'baz ')) //foo baz bar
Splice method for Strings
You can split() the main string and add then use normal splice()
String.prototype.splice = function(index,del,...newStrs){
let str = this.split('');
str.splice(index,del,newStrs.join('') || '');
return str.join('');
}
var txt1 = "foo baz"
//inserting single string.
console.log(txt1.splice(4,0,"bar ")); //foo bar baz
//inserting multiple strings
console.log(txt1.splice(4,0,"bar ","bar2 ")); //foo bar bar2 baz
//removing letters
console.log(txt1.splice(1,2)) //f baz
//remving and inseting atm
console.log(txt1.splice(1,2," bar")) //f bar baz
Applying splice() at multiple indexes
The method takes an array of arrays each element of array representing a single splice().
String.prototype.splice = function(index,del,...newStrs){
let str = this.split('');
str.splice(index,del,newStrs.join('') || '');
return str.join('');
}
String.prototype.mulSplice = function(arr){
str = this
let dif = 0;
arr.forEach(x => {
x[2] === x[2] || [];
x[1] === x[1] || 0;
str = str.splice(x[0] + dif,x[1],...x[2]);
dif += x[2].join('').length - x[1];
})
return str;
}
let txt = "foo bar baz"
//Replacing the 'foo' and 'bar' with 'something1' ,'another'
console.log(txt.splice(0,3,'something'))
console.log(txt.mulSplice(
[
[0,3,["something1"]],
[4,3,["another"]]
]
))
I wanted to compare the method using substring and the method using slice from Base33 and user113716 respectively, to do that I wrote some code
also have a look at this performance comparison, substring, slice
The code I used creates huge strings and inserts the string "bar " multiple times into the huge string
if (!String.prototype.splice) {
/**
* {JSDoc}
*
* The splice() method changes the content of a string by removing a range of
* characters and/or adding new characters.
*
* #this {String}
* #param {number} start Index at which to start changing the string.
* #param {number} delCount An integer indicating the number of old chars to remove.
* #param {string} newSubStr The String that is spliced in.
* #return {string} A new string with the spliced substring.
*/
String.prototype.splice = function (start, delCount, newSubStr) {
return this.slice(0, start) + newSubStr + this.slice(start + Math.abs(delCount));
};
}
String.prototype.splice = function (idx, rem, str) {
return this.slice(0, idx) + str + this.slice(idx + Math.abs(rem));
};
String.prototype.insert = function (index, string) {
if (index > 0)
return this.substring(0, index) + string + this.substring(index, this.length);
return string + this;
};
function createString(size) {
var s = ""
for (var i = 0; i < size; i++) {
s += "Some String "
}
return s
}
function testSubStringPerformance(str, times) {
for (var i = 0; i < times; i++)
str.insert(4, "bar ")
}
function testSpliceStringPerformance(str, times) {
for (var i = 0; i < times; i++)
str.splice(4, 0, "bar ")
}
function doTests(repeatMax, sSizeMax) {
n = 1000
sSize = 1000
for (var i = 1; i <= repeatMax; i++) {
var repeatTimes = n * (10 * i)
for (var j = 1; j <= sSizeMax; j++) {
var actualStringSize = sSize * (10 * j)
var s1 = createString(actualStringSize)
var s2 = createString(actualStringSize)
var start = performance.now()
testSubStringPerformance(s1, repeatTimes)
var end = performance.now()
var subStrPerf = end - start
start = performance.now()
testSpliceStringPerformance(s2, repeatTimes)
end = performance.now()
var splicePerf = end - start
console.log(
"string size =", "Some String ".length * actualStringSize, "\n",
"repeat count = ", repeatTimes, "\n",
"splice performance = ", splicePerf, "\n",
"substring performance = ", subStrPerf, "\n",
"difference = ", splicePerf - subStrPerf // + = splice is faster, - = subStr is faster
)
}
}
}
doTests(1, 100)
The general difference in performance is marginal at best and both methods work just fine (even on strings of length ~~ 12000000)
Take the solution. I have written this code in an easy format:
const insertWord = (sentence,word,index) => {
var sliceWord = word.slice(""),output = [],join; // Slicing the input word and declaring other variables
var sliceSentence = sentence.slice(""); // Slicing the input sentence into each alphabets
for (var i = 0; i < sliceSentence.length; i++)
{
if (i === index)
{ // checking if index of array === input index
for (var j = 0; j < word.length; j++)
{ // if yes we'll insert the word
output.push(sliceWord[j]); // Condition is true we are inserting the word
}
output.push(" "); // providing a single space at the end of the word
}
output.push(sliceSentence[i]); // pushing the remaining elements present in an array
}
join = output.join(""); // converting an array to string
console.log(join)
return join;
}
Prototype should be the best approach as many mentioned. Make sure that prototype comes earlier than where it is used.
String.prototype.insert = function (x, str) {
return (x > 0) ? this.substring(0, x) + str + this.substr(x) : str + this;
};
Here's a thing i've been trying to resolve...
We've got some data from an ajax call and the result data is between other stuff a huge string with key:value data. For example:
"2R=OK|2M=2 row(s) found|V1=1,2|"
Is it posible for js to do something like:
var value = someFunction(str, param);
so if i search for "V1" parameter it will return "1,2"
I got this running on Sql server no sweat, but i'm struggling with js to parse the string.
So far i'm able to do this by a VERY rudimentary for loop like this:
var str = "2R=OK|2M=2 row(s) found|V1=1,2|";
var param = "V1";
var arr = str.split("|");
var i = 0;
var value = "";
for(i = 0; i<arr.length; ++i){
if( arr[i].indexOf(param)>-1 ){
value = arr[i].split("=")[1];
}
}
console.log(value);
if i put that into a function it works, but i wonder if there's a more efficient way to do it, maybe some regex? but i suck at it. Hopefully somebody may shine a light on this for me?
Thanks!
This seems to work for your specific use-case:
function getValueByKey(haystack, needle) {
if (!haystack || !needle) {
return false;
}
else {
var re = new RegExp(needle + '=(.+)');
return haystack.match(re)[1];
}
}
var str = "2R=OK|2M=2 row(s) found|V1=1,2|",
test = getValueByKey(str, 'V1');
console.log(test);
JS Fiddle demo.
And, to include the separator in your search (in order to prevent somethingElseV1 matching for V1):
function getValueByKey(haystack, needle, separator) {
if (!haystack || !needle) {
return false;
}
else {
var re = new RegExp('\\' + separator + needle + '=(.+)\\' + separator);
return haystack.match(re)[1];
}
}
var str = "2R=OK|2M=2 row(s) found|V1=1,2|",
test = getValueByKey(str, 'V1', '|');
console.log(test);
JS Fiddle demo.
Note that this approach does require the use of the new RegExp() constructor (rather than creating a regex-literal using /.../) in order to pass variables into the regular expression.
Similarly, because we're using a string to create the regular expression within the constructor, we need to double-escape characters that require escaping (escaping first within the string and then escaping within in the created RegExp).
References:
RegExp.
String.match().
This should work for you and it's delimiters are configurable (if you wish to parse a similar string with different delimiters, you can just pass in the delimiters as arguments):
var parseKeyValue = (function(){
return function(str, search, keyDelim, valueDelim){
keyDelim = quote(keyDelim || '|');
valueDelim = quote(valueDelim || '=');
var regexp = new RegExp('(?:^|' + keyDelim + ')' + quote(search) + valueDelim + '(.*?)(?:' + keyDelim + '|$)');
var result = regexp.exec(str);
if(result && result.length > 1)
return result[1];
};
function quote(str){
return (str+'').replace(/([.?*+^$[\]\\(){}|-])/g, "\\$1");
}
})();
Quote function borrowed form this answer
Usage examples:
var str = "2R=OK|2M=2 row(s) found|V1=1,2|";
var param = "V1";
parseKeyValue(str, param); // "1,2"
var str = "2R=OK&2M=2 row(s) found&V1=1,2";
var param = "2R";
parseKeyValue(str, param, '&'); // "OK"
var str =
"2R=>OK\n\
2M->2 row(s) found\n\
V1->1,2";
var param = "2M";
parseKeyValue(str, param, '\n', '->'); // "2 row(s) found"
Here is another approach:
HTML:
<div id="2R"></div>
<div id="2M"></div>
<div id="V1"></div>
Javascript:
function createDictionary(input) {
var splittedInput = input.split(/[=|]/),
kvpCount = Math.floor(splittedInput.length / 2),
i, key, value,
dictionary = {};
for (i = 0; i < kvpCount; i += 1) {
key = splittedInput[i * 2];
value = splittedInput[i * 2 + 1];
dictionary[key] = value;
}
return dictionary;
}
var input = "2R=OK|2M=2 row(s) found|V1=1,2|",
dictionary = createDictionary(input),
div2R = document.getElementById("2R"),
div2M = document.getElementById("2M"),
divV1 = document.getElementById("V1");
div2R.innerHTML = dictionary["2R"];
div2M.innerHTML = dictionary["2M"];
divV1.innerHTML = dictionary["V1"];
Result:
OK
2 row(s) found
1,2