Total newbie + first time poster here with very little experience though I feel this problem is one I could solve with the help of some generous strangers.
I am querying a GDoc and attempting to create a function to count words between two strings for two possible end strings, for example:
Example #1
Definitive Title
*Count these words*
===============
OR
Example #2
Definitive Title
*Count these words*
Other words that are in a table
Definitive Title
*Count these other different words*
===============
In both of the above examples I looking to count the words between a pre-defined string and an end string.
If I ran the function that I am trying to create on Example #1 I am hoping it'd return 3 words. For Example #2 I'd hope that my function returns 8 words.
So far my function looks like this:
function doPost(e) {
var docUrl = e.parameter.docUrl
var text = DocumentApp.openByUrl(docUrl).getBody().getText()
var wordCount = text.split(" ").length
return ContentService.createTextOutput(wordCount.toString()).setMimeType(ContentService.MimeType.TEXT)
}
This returns a word count for the entire document. Any advice to point me in the right direction?
For more dynamic, appropriate and accurate solution, execute the following snippets before the split () function. Regular Expressions often used to provide dynamic solutions. It is a must have skill.
text = text.replace(/(^\s*)|(\s*$)/gi,""); // remove the start and end spaces of the string (like trim ())
text = text.replace(/[ ]{2,}/gi," "); // filter out one or more spaces
text = text.replace(/\n /,"\n"); // filter out news lines with spacing at beginning
wordCount = text.split(" ").length;
Here is a solution to your problem you can log the difference of characters and words or you can log the total amount of words or characters in the two sentaces. You are also going to want to put the bigger sentence on top, otherwise it will give you a negative number.
var x = "count these words";
var y = "count words";
function findCharDif(word1, word2) {
var word1length = word1.length;
var word2length = word2.length;
var difference = word1length - word2length;
var total = word1length + word2length;
console.log(difference);
console.log(total);
}
function findWordDif(sentence1, sentence2) {
var words1 = 0;
var words2 = 0;
for (var i = 0; i < sentence1.length; i++) {
if (sentence1[i] == " ") {
words1++;
} else {
continue
}
}
for (var a = 0; a < sentence2.length; a++) {
if (sentence2[a] == " ") {
words2++;
} else {
continue
}
}
var difference = (words1 + 1) - (words2 + 1); // this logs out the difference of words between the sentences
var totalWords = (words1 + 1) + (words2 + 1); // this logs out the total amount of words
console.log(difference);
console.log(totalWords);
}
findCharDif(x, y);
findWordDif(x, y);
The below code seems to have worked! Was able to sit down with someone and solve it with them:
function doPost(e) {
var docUrl = e.parameter.docUrl
/*
var text = DocumentApp.openByUrl(docUrl).getBody().getText()
var wordCount = text.split(" ").length
*/
var wordCount = countScenario2(docUrl);
return ContentService.createTextOutput(wordCount.toString()).setMimeType(ContentService.MimeType.TEXT)
}
/**
* Count the words from Start Test to a table or ====
*/
function countScenario2(docUrl) {
//var docUrl = 'https://docs.google.com/document/d/';
var doc = DocumentApp.openByUrl(docUrl);
var body = doc.getBody();
var reference = body.findText('Start Text');
var start = getIndex('Start Text', body);
var tables = body.getTables();
var count = 0;
for(var j = 1; j < tables.length ; j ++) {
var end = body.getChildIndex(tables[j]);
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
var reference = body.findText('Start Text', reference);
var element = reference.getElement();
var start = body.getChildIndex(element.getParent());
}
var end = getIndex('=========================================================', body);
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
return count ;
}
/**
* This will return the index of the element
*
* #param {string} keyword The text to be found
* #param {Body} body This is the body of the document
*/
function getIndex(keyword, body, previous) {
var reference = body.findText(keyword, previous);
var element = reference.getElement();
return body.getChildIndex(element.getParent());
}
/************ */
function testPost(){
var e = {parameter:{docUrl:'https://docs.google.com/document/d/'}};
var result = doPost(e);
console.log(JSON.stringify(result.getContent()));}
/**
* Count the words from Start Text to ====
*/
function countScenario1(docUrl) {
//var docUrl = 'https://docs.google.com/document/d/';
var doc = DocumentApp.openByUrl(docUrl);
var body = doc.getBody();
var start = getIndex('Start Text', body);
var end = getIndex('=========================================================', body);
var count = 0;
for (var i = start + 1; i < end; i++) {
var element = body.getChild(i);
var text = element.getText();
//if(text.length > 0) count += text.split(" ").filter(word => word !== ' ' && word !== '' && word !== ' ').length;
var match = text.match(/\b(\w+)\b/g);
count += (match) ? match.length : 0;
}
console.log(count);
return count;
}
function test(){
var docUrl = 'https://docs.google.com/document/d/';
var wordCount = countScenario2(docUrl);
console.log(wordCount);
}
As what #Rishabh K said in his answer, you should definitely want to replace trailing spaces and multiple spaces to avoid inaccurate results.
However on the other hand, I don't think it answers the OP's question. Correct me if I'm wrong but I think this is what you want:
var sample1 = `This is the start identifier
These words should be included
As well As these ones
Even this
Until it ends
now
Ending identifier
These words shouldn't be included
If any of these appears, the logic is wrong`;
var sample2 = sample1 + `
This is the start identifier
These some few words
should also be included in the result set
Ending identifier`;
var sample3 = sample2 + `
This is the start identifier
Although we have the start identifier above
These words shouldn't be included
because there is no corresponding end identifier`;
function getWordDiffBetween(source, str1, str2) {
// make sure newSource, str1 and str2 are all strings
var args = Array.prototype.slice.call(arguments);
args.forEach(function(str, idx) {
if (typeof str !== 'string') {
throw `Argument ${[idx + 1]} is not a string.`;
}
});
var startId = '<==start==>',
endId = '<==end==>';
var newSource = source.replace(new RegExp(str1, 'g'), startId) // replace the start identifier with our own
.replace(new RegExp(str2 + '|={2,}', 'g'), endId) // replace the end identifier with our own
.replace(/(^\s*)|(\s*$)/gi, "") // remove the start and end spaces of the string (like trim ())
.replace(/\s+/g, ' ') //replace all 1 or more spaces/newline/linefeed with a single space
//separate text into words which are separated by a space since we replaced all newlines with space
var words = newSource.split(' ');
// get the indexes where the start and end identifiers occured
var strOneIdx = getAllIndexes(words, startId, true);
var strTwoIdx = getAllIndexes(words, endId, true);
var results = [], // we will store our results here
i;
for (i = 0; i < strOneIdx.length; i++) {
var idxOne = strOneIdx[i]; // current index for str1
var idxTwo = strTwoIdx.find(x => x > idxOne);
//make sure that idxOne has a partner
if (idxTwo) {
var wordsInBetween = words.slice(idxOne + 1, idxTwo); //get range between idxOne and idxTwo
results = results.concat(wordsInBetween); // add the result
}
}
return results;
}
function getAllIndexes(arr, val) {
var indexes = [],
i;
for (i = 0; i < arr.length; i++) {
if (arr[i] === val) {
indexes.push(i);
}
}
return indexes;
}
var startIdentifier = 'This is the start identifier',
endIdentifier = 'Ending identifier',
wordResults = {
sample1: getWordDiffBetween(sample1, startIdentifier, endIdentifier),
sample2: getWordDiffBetween(sample2, startIdentifier, endIdentifier),
sample3: getWordDiffBetween(sample3, startIdentifier, endIdentifier) //should be equal to sample2
};
console.log(wordResults);
We have 2 functions - getWordDiffBetween and getAllIndexes. For explanation, check the comments I added in noteworthy lines.
Edit (updated snippet above):
It seems like you also want "====================" included as your end identifier. This can be done by changing the code:
.replace(new RegExp(str2, 'g'), endId) // replace the end identifier with our own
into
.replace(new RegExp(str2 + '|={2,}', 'g'), endId) // replace the end identifier with our own
which means match occurence of your <end string> or if there is 2 or more occurences of =. You can also change the number 2 in {2,} to your desired count.
I'm trying to figure out how to remove every second character (starting from the first one) from a string in Javascript.
For example, the string "This is a test!" should become "hsi etTi sats!"
I also want to save every deleted character into another array.
I have tried using replace method and splice method, but wasn't able to get them to work properly. Mostly because replace only replaces the first character.
function encrypt(text, n) {
if (text === "NULL") return n;
if (n <= 0) return text;
var encArr = [];
var newString = text.split("");
var j = 0;
for (var i = 0; i < text.length; i += 2) {
encArr[j++] = text[i];
newString.splice(i, 1); // this line doesn't work properly
}
}
You could reduce the characters of the string and group them to separate arrays using the % operator. Use destructuring to get the 2D array returned to separate variables
let str = "This is a test!";
const [even, odd] = [...str].reduce((r,char,i) => (r[i%2].push(char), r), [[],[]])
console.log(odd.join(''))
console.log(even.join(''))
Using a for loop:
let str = "This is a test!",
odd = [],
even = [];
for (var i = 0; i < str.length; i++) {
i % 2 === 0
? even.push(str[i])
: odd.push(str[i])
}
console.log(odd.join(''))
console.log(even.join(''))
It would probably be easier to use a regular expression and .replace: capture two characters in separate capturing groups, add the first character to a string, and replace with the second character. Then, you'll have first half of the output you need in one string, and the second in another: just concatenate them together and return:
function encrypt(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
console.log(encrypt('This is a test!'));
Pretty simple with .reduce() to create the two arrays you seem to want.
function encrypt(text) {
return text.split("")
.reduce(({odd, even}, c, i) =>
i % 2 ? {odd: [...odd, c], even} : {odd, even: [...even, c]}
, {odd: [], even: []})
}
console.log(encrypt("This is a test!"));
They can be converted to strings by using .join("") if you desire.
I think you were on the right track. What you missed is replace is using either a string or RegExp.
The replace() method returns a new string with some or all matches of a pattern replaced by a replacement. The pattern can be a string or a RegExp, and the replacement can be a string or a function to be called for each match. If pattern is a string, only the first occurrence will be replaced.
Source: String.prototype.replace()
If you are replacing a value (and not a regular expression), only the first instance of the value will be replaced. To replace all occurrences of a specified value, use the global (g) modifier
Source: JavaScript String replace() Method
So my suggestion would be to continue still with replace and pass the right RegExp to the function, I guess you can figure out from this example - this removes every second occurrence for char 't':
let count = 0;
let testString = 'test test test test';
console.log('original', testString);
// global modifier in RegExp
let result = testString.replace(/t/g, function (match) {
count++;
return (count % 2 === 0) ? '' : match;
});
console.log('removed', result);
like this?
var text = "This is a test!"
var result = ""
var rest = ""
for(var i = 0; i < text.length; i++){
if( (i%2) != 0 ){
result += text[i]
} else{
rest += text[i]
}
}
console.log(result+rest)
Maybe with split, filter and join:
const remaining = myString.split('').filter((char, i) => i % 2 !== 0).join('');
const deleted = myString.split('').filter((char, i) => i % 2 === 0).join('');
You could take an array and splice and push each second item to the end of the array.
function encrypt(string) {
var array = [...string],
i = 0,
l = array.length >> 1;
while (i <= l) array.push(array.splice(i++, 1)[0]);
return array.join('');
}
console.log(encrypt("This is a test!"));
function encrypt(text) {
text = text.split("");
var removed = []
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed.push(letter)
return false;
}
return true
}).join("")
return {
full: encrypted + removed.join(""),
encrypted: encrypted,
removed: removed
}
}
console.log(encrypt("This is a test!"))
Splice does not work, because if you remove an element from an array in for loop indexes most probably will be wrong when removing another element.
I don't know how much you care about performance, but using regex is not very efficient.
Simple test for quite a long string shows that using filter function is on average about 3 times faster, which can make quite a difference when performed on very long strings or on many, many shorts ones.
function test(func, n){
var text = "";
for(var i = 0; i < n; ++i){
text += "a";
}
var start = new Date().getTime();
func(text);
var end = new Date().getTime();
var time = (end-start) / 1000.0;
console.log(func.name, " took ", time, " seconds")
return time;
}
function encryptREGEX(text) {
let removedText = '';
const replacedText1 = text.replace(/(.)(.)?/g, (_, firstChar, secondChar) => {
// in case the match was at the end of the string,
// and the string has an odd number of characters:
if (!secondChar) secondChar = '';
// remove the firstChar from the string, while adding it to removedText:
removedText += firstChar;
return secondChar;
});
return replacedText1 + removedText;
}
function encrypt(text) {
text = text.split("");
var removed = "";
var encrypted = text.filter((letter, index) => {
if(index % 2 == 0){
removed += letter;
return false;
}
return true
}).join("")
return encrypted + removed
}
var timeREGEX = test(encryptREGEX, 10000000);
var timeFilter = test(encrypt, 10000000);
console.log("Using filter is faster ", timeREGEX/timeFilter, " times")
Using actually an array for storing removed letters and then joining them is much more efficient, than using a string and concatenating letters to it.
I changed an array to string in filter solution to make it the same like in regex solution, so they are more comparable.
I'm looking for [a, b, c, "d, e, f", g, h]to turn into an array of 6 elements: a, b, c, "d,e,f", g, h. I'm trying to do this through Javascript. This is what I have so far:
str = str.split(/,+|"[^"]+"/g);
But right now it's splitting out everything that's in the double-quotes, which is incorrect.
Edit: Okay sorry I worded this question really poorly. I'm being given a string not an array.
var str = 'a, b, c, "d, e, f", g, h';
And I want to turn that into an array using something like the "split" function.
Here's what I would do.
var str = 'a, b, c, "d, e, f", g, h';
var arr = str.match(/(".*?"|[^",\s]+)(?=\s*,|\s*$)/g);
/* will match:
(
".*?" double quotes + anything but double quotes + double quotes
| OR
[^",\s]+ 1 or more characters excl. double quotes, comma or spaces of any kind
)
(?= FOLLOWED BY
\s*, 0 or more empty spaces and a comma
| OR
\s*$ 0 or more empty spaces and nothing else (end of string)
)
*/
arr = arr || [];
// this will prevent JS from throwing an error in
// the below loop when there are no matches
for (var i = 0; i < arr.length; i++) console.log('arr['+i+'] =',arr[i]);
regex: /,(?=(?:(?:[^"]*"){2})*[^"]*$)/
const input_line = '"2C95699FFC68","201 S BOULEVARDRICHMOND, VA 23220","8299600062754882","2018-09-23"'
let my_split = input_line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)[4]
Output:
my_split[0]: "2C95699FFC68",
my_split[1]: "201 S BOULEVARDRICHMOND, VA 23220",
my_split[2]: "8299600062754882",
my_split[3]: "2018-09-23"
Reference following link for an explanation: regexr.com/44u6o
Here is a JavaScript function to do it:
function splitCSVButIgnoreCommasInDoublequotes(str) {
//split the str first
//then merge the elments between two double quotes
var delimiter = ',';
var quotes = '"';
var elements = str.split(delimiter);
var newElements = [];
for (var i = 0; i < elements.length; ++i) {
if (elements[i].indexOf(quotes) >= 0) {//the left double quotes is found
var indexOfRightQuotes = -1;
var tmp = elements[i];
//find the right double quotes
for (var j = i + 1; j < elements.length; ++j) {
if (elements[j].indexOf(quotes) >= 0) {
indexOfRightQuotes = j;
break;
}
}
//found the right double quotes
//merge all the elements between double quotes
if (-1 != indexOfRightQuotes) {
for (var j = i + 1; j <= indexOfRightQuotes; ++j) {
tmp = tmp + delimiter + elements[j];
}
newElements.push(tmp);
i = indexOfRightQuotes;
}
else { //right double quotes is not found
newElements.push(elements[i]);
}
}
else {//no left double quotes is found
newElements.push(elements[i]);
}
}
return newElements;
}
Here's a non-regex one that assumes doublequotes will come in pairs:
function splitCsv(str) {
return str.split(',').reduce((accum,curr)=>{
if(accum.isConcatting) {
accum.soFar[accum.soFar.length-1] += ','+curr
} else {
accum.soFar.push(curr)
}
if(curr.split('"').length % 2 == 0) {
accum.isConcatting= !accum.isConcatting
}
return accum;
},{soFar:[],isConcatting:false}).soFar
}
console.log(splitCsv('asdf,"a,d",fdsa'),' should be ',['asdf','"a,d"','fdsa'])
console.log(splitCsv(',asdf,,fds,'),' should be ',['','asdf','','fds',''])
console.log(splitCsv('asdf,"a,,,d",fdsa'),' should be ',['asdf','"a,,,d"','fdsa'])
This works well for me. (I used semicolons so the alert message would show the difference between commas added when turning the array into a string and the actual captured values.)
REGEX
/("[^"]*")|[^;]+/
var str = 'a; b; c; "d; e; f"; g; h; "i"';
var array = str.match(/("[^"]*")|[^;]+/g);
alert(array);
Here's the regex we're using to extract valid arguments from a comma-separated argument list, supporting double-quoted arguments. It works for the outlined edge cases. E.g.
doesn't include quotes in the matches
works with white spaces in matches
works with empty fields
(?<=")[^"]+?(?="(?:\s*?,|\s*?$))|(?<=(?:^|,)\s*?)(?:[^,"\s][^,"]*[^,"\s])|(?:[^,"\s])(?![^"]*?"(?:\s*?,|\s*?$))(?=\s*?(?:,|$))
Proof: https://regex101.com/r/UL8kyy/3/tests (Note: currently only works in Chrome because the regex uses lookbehinds which are only supported in ECMA2018)
According to our guidelines it avoids non-capturing groups and greedy matching.
I'm sure it can be simplified, I'm open to suggestions / additional test cases.
For anyone interested, the first part matches double-quoted, comma-delimited arguments:
(?<=")[^"]+?(?="(?:\s*?,|\s*?$))
And the second part matches comma-delimited arguments by themselves:
(?<=(?:^|,)\s*?)(?:[^,"\s][^,"]*[^,"\s])|(?:[^,"\s])(?![^"]*?"(?:\s*?,|\s*?$))(?=\s*?(?:,|$))
I almost liked the accepted answer, but it didn't parse the space correctly, and/or it left the double quotes untrimmed, so here is my function:
/**
* Splits the given string into components, and returns the components array.
* Each component must be separated by a comma.
* If the component contains one or more comma(s), it must be wrapped with double quotes.
* The double quote must not be used inside components (replace it with a special string like __double__quotes__ for instance, then transform it again into double quotes later...).
*
* https://stackoverflow.com/questions/11456850/split-a-string-by-commas-but-ignore-commas-within-double-quotes-using-javascript
*/
function splitComponentsByComma(str){
var ret = [];
var arr = str.match(/(".*?"|[^",]+)(?=\s*,|\s*$)/g);
for (let i in arr) {
let element = arr[i];
if ('"' === element[0]) {
element = element.substr(1, element.length - 2);
} else {
element = arr[i].trim();
}
ret.push(element);
}
return ret;
}
console.log(splitComponentsByComma('Hello World, b, c, "d, e, f", c')); // [ 'Hello World', 'b', 'c', 'd, e, f', 'c' ]
Parse any CSV or CSV-String code based on TYPESCRIPT
public parseCSV(content:string):any[string]{
return content.split("\n").map(ar=>ar.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(refi=>refi.replace(/[\x00-\x08\x0E-\x1F\x7F-\uFFFF]/g, "").trim()));
}
var str='"abc",jkl,1000,qwerty6000';
parseCSV(str);
output :
[
"abc","jkl","1000","qwerty6000"
]
I know it's a bit long, but here's my take:
var sample="[a, b, c, \"d, e, f\", g, h]";
var inQuotes = false, items = [], currentItem = '';
for(var i = 0; i < sample.length; i++) {
if (sample[i] == '"') {
inQuotes = !inQuotes;
if (!inQuotes) {
if (currentItem.length) items.push(currentItem);
currentItem = '';
}
continue;
}
if ((/^[\"\[\]\,\s]$/gi).test(sample[i]) && !inQuotes) {
if (currentItem.length) items.push(currentItem);
currentItem = '';
continue;
}
currentItem += sample[i];
}
if (currentItem.length) items.push(currentItem);
console.log(items);
As a side note, it will work both with, and without the braces in the start and end.
This takes a csv file one line at a time and spits back an array with commas inside speech marks intact. if there are no speech marks detected it just .split(",")s as normal... could probs replace that second loop with something but it does the job as is
function parseCSVLine(str){
if(str.indexOf("\"")>-1){
var aInputSplit = str.split(",");
var aOutput = [];
var iMatch = 0;
//var adding = 0;
for(var i=0;i<aInputSplit.length;i++){
if(aInputSplit[i].indexOf("\"")>-1){
var sWithCommas = aInputSplit[i];
for(var z=i;z<aInputSplit.length;z++){
if(z !== i && aInputSplit[z].indexOf("\"") === -1){
sWithCommas+= ","+aInputSplit[z];
}else if(z !== i && aInputSplit[z].indexOf("\"") > -1){
sWithCommas+= ","+aInputSplit[z];
sWithCommas.replace(new RegExp("\"", 'g'), "");
aOutput.push(sWithCommas);
i=z;
z=aInputSplit.length+1;
iMatch++;
}
if(z === aInputSplit.length-1){
if(iMatch === 0){
aOutput.push(aInputSplit[z]);
}
iMatch = 0;
}
}
}else{
aOutput.push(aInputSplit[i]);
}
}
return aOutput
}else{
return str.split(",")
}
}
Use the npm library csv-string to parse the strings instead of split: https://www.npmjs.com/package/csv-string
This will handle the empty entries
Something like a stack should do the trick. Here I vaguely use marker boolean as stack (just getting my purpose served with it).
var str = "a,b,c,blah\"d,=,f\"blah,\"g,h,";
var getAttributes = function(str){
var result = [];
var strBuf = '';
var start = 0 ;
var marker = false;
for (var i = 0; i< str.length; i++){
if (str[i] === '"'){
marker = !marker;
}
if (str[i] === ',' && !marker){
result.push(str.substr(start, i - start));
start = i+1;
}
}
if (start <= str.length){
result.push(str.substr(start, i - start));
}
return result;
};
console.log(getAttributes(str));
jsfiddle setting image code output image
The code works if your input string in the format of stringTocompare.
Run the code on https://jsfiddle.net/ to see output for fiddlejs setting.
Please refer to the screenshot.
You can either use split function for the same for the code below it and tweak the code according to you need.
Remove the bold or word with in ** from the code if you dont want to have comma after split attach=attach**+","**+actualString[t+1].
var stringTocompare='"Manufacturer","12345","6001","00",,"Calfe,eto,lin","Calfe,edin","4","20","10","07/01/2018","01/01/2006",,,,,,,,"03/31/2004"';
console.log(stringTocompare);
var actualString=stringTocompare.split(',');
console.log("Before");
for(var i=0;i<actualString.length;i++){
console.log(actualString[i]);
}
//var actualString=stringTocompare.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/);
for(var i=0;i<actualString.length;i++){
var flag=0;
var x=actualString[i];
if(x!==null)
{
if(x[0]=='"' && x[x.length-1]!=='"'){
var p=0;
var t=i;
var b=i;
for(var k=i;k<actualString.length;k++){
var y=actualString[k];
if(y[y.length-1]!=='"'){
p++;
}
if(y[y.length-1]=='"'){
flag=1;
}
if(flag==1)
break;
}
var attach=actualString[t];
for(var s=p;s>0;s--){
attach=attach+","+actualString[t+1];
t++;
}
actualString[i]=attach;
actualString.splice(b+1,p);
}
}
}
console.log("After");
for(var i=0;i<actualString.length;i++){
console.log(actualString[i]);
}
[1]: https://i.stack.imgur.com/3FcxM.png
I solved this with a simple parser.
It simply goes through the string char by char, splitting off a segment when it finds the split_char (e.g. comma), but also has an on/off flag which is switched by finding the encapsulator_char (e.g. quote). It doesn't require the encapsulator to be at the start of the field/segment (a,b","c,d would produce 3 segments, with 'b","c' as the second), but it should work for a well formed CSV with escaped encapsulator chars.
function split_except_within(text, split_char, encapsulator_char, escape_char) {
var start = 0
var encapsulated = false
var fields = []
for (var c = 0; c < text.length; c++) {
var char = text[c]
if (char === split_char && ! encapsulated) {
fields.push(text.substring(start, c))
start = c+1
}
if (char === encapsulator_char && (c === 0 || text[c-1] !== escape_char) )
encapsulated = ! encapsulated
}
fields.push(text.substring(start))
return fields
}
https://jsfiddle.net/7hty8Lvr/1/
const csvSplit = (line) => {
let splitLine = [];
var quotesplit = line.split('"');
var lastindex = quotesplit.length - 1;
// split evens removing outside quotes, push odds
quotesplit.forEach((val, index) => {
if (index % 2 === 0) {
var firstchar = (index == 0) ? 0 : 1;
var trimmed = (index == lastindex)
? val.substring(firstchar)
: val.slice(firstchar, -1);
trimmed.split(",").forEach(v => splitLine.push(v));
} else {
splitLine.push(val);
}
});
return splitLine;
}
this works as long as quotes always come on the outside of values that contain the commas that need to be excluded (i.e. a csv file).
if you have stuff like '1,2,4"2,6",8'
it will not work.
Assuming your string really looks like '[a, b, c, "d, e, f", g, h]', I believe this would be 'an acceptable use case for eval():
myString = 'var myArr ' + myString;
eval(myString);
console.log(myArr); // will now be an array of elements: a, b, c, "d, e, f", g, h
Edit: As Rocket pointed out, strict mode removes eval's ability to inject variables into the local scope, meaning you'd want to do this:
var myArr = eval(myString);
I've had similar issues with this, and I've found no good .net solution so went DIY. NOTE: This was also used to reply to
Splitting comma separated string, ignore commas in quotes, but allow strings with one double quotation
but seems more applicable here (but useful over there)
In my application I'm parsing a csv so my split credential is ",". this method I suppose only works for where you have a single char split argument.
So, I've written a function that ignores commas within double quotes. it does it by converting the input string into a character array and parsing char by char
public static string[] Splitter_IgnoreQuotes(string stringToSplit)
{
char[] CharsOfData = stringToSplit.ToCharArray();
//enter your expected array size here or alloc.
string[] dataArray = new string[37];
int arrayIndex = 0;
bool DoubleQuotesJustSeen = false;
foreach (char theChar in CharsOfData)
{
//did we just see double quotes, and no command? dont split then. you could make ',' a variable for your split parameters I'm working with a csv.
if ((theChar != ',' || DoubleQuotesJustSeen) && theChar != '"')
{
dataArray[arrayIndex] = dataArray[arrayIndex] + theChar;
}
else if (theChar == '"')
{
if (DoubleQuotesJustSeen)
{
DoubleQuotesJustSeen = false;
}
else
{
DoubleQuotesJustSeen = true;
}
}
else if (theChar == ',' && !DoubleQuotesJustSeen)
{
arrayIndex++;
}
}
return dataArray;
}
This function, to my application taste also ignores ("") in any input as these are unneeded and present in my input.
I am working an anagram generator and am trying to break off each new item in the array into a new line. The way this works is it slices each array item and loops through each character.
The output needs to be:
cat, cta, act, atc, tca, tac,
bat, bta, abt, atb, tba, tab,
rat, rta, art, atr, tra, tar,
But it is:
cat, cta, act, atc, tca, tac, bat, bta, abt, atb, tba, tab, rat, rta, art, atr, tra, tar, splat, splta, spalt, spatl,...
So far the code I have is this:
HTML:
<div id="anagrams"></div>
JS:
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
};
recurse(' ', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
To accomplish a new line per array item I basically want to check if the amount of the characters exceeds the amount of characters in the string/array item and if it does, insert a break into the HTML. I tried doing that by:
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
// check if string length is greater than the count and
// if it is, insert a break between the string
if (i > str.length) {
recurse(' <br>', str);
}
};
recurse(' ', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
However it still prints across a single line. Am I approaching this the correct way? I also tried using ana in place of i but I think I need to use i since that's the actual count - is that correct?
A jsfiddle can be seen here: https://jsfiddle.net/4eqhd1m4/1/
I would slightly restructure the anagram creation.
Anagrams is now a string.
Recurse no longer takes care of adding break lines. Considering you want a break per element, it's cleaner to add it in the Array.forEach
jsfiddle
Edit
Adding a second jsfiddle to demonstrate the same behavior, except instead of using strings directly it returns an array (which gets split and rejoined using breaklines). It may be preferable to have the anagrams returned as an array.
jsfiddle
Is this is what you need ? All anagrams in different lines
var arr = ['cat', 'bat', 'rat', 'splat'];
var allAnagrams = function(arr) {
var anagrams = {};
arr.forEach(function(str) {
var recurse = function(ana, str) {
if (str === '')
anagrams[ana] = 1;
for (var i = 0; i < str.length; i++)
recurse(ana + str[i], str.slice(0, i) + str.slice(i + 1));
// check if string length is greater than the count and
// if it is, insert a break between the string
if (i > str.length) {
recurse(' <br \/>', str);
}
};
recurse(' <br \/>', str);
});
return Object.keys(anagrams);
}
document.getElementById("anagrams").innerHTML = (allAnagrams(arr));
<div id="anagrams"></div>