javascript indexOf with millions of matches - javascript

I'm trying to extract a few lines representing some XML elements from a file.
The user provides a file using a simple <input type="file"> tag, and than this file is read as text with FileReader, and given as the parameter to this function:
var relevantDelimiters = [{"begin":"<header>","end":"</header>"}
,{"begin":" <someElement>","end":"</someElement>"}];
function dealWithString(invalidXML) {
var validXML = "";
for (var i=0; i<relevantDelimiters.length; i++) {
delimiter = relevantDelimiters[i];
while (invalidXML.indexOf(delimiter.begin) != -1) {
//while there are relevant elements of this kind left:
startPos = invalidXML.indexOf(delimiter.begin);
endPos = invalidXML.indexOf(delimiter.end);
//append to end result:
validXML+=invalidXML.substring(startPos,endPos+delimiter.end.length)+"\n";
//take this item out of the input to process next item
invalidXML = invalidXML.replace(invalidXML.substring(startPos,endPos+delimiter.end.length),"");
}
}
//return fixed data
return validXML;
}
This approach seems to work just fine with a small amount of matches in the input text file, but given a file of 1.5MB, script is stuck (Running with Google Chrome, making it's tab non-responsive). This file contains about a million "relevant elements", meaning matches from relevantDelimiters.
How can I optimize this?

Instead of repeatedly "taking the item out of the input" by calling replace on it, you should use the second argument to indexOf: fromIndex. That way, it'll search the next occurence after the given index, and you can loop through the very large input without needing to touch it.
function dealWithString(invalidXML) {
var validXML = "";
for (var i=0; i<relevantDelimiters.length; i++) {
var delimiter = relevantDelimiters[i],
pos = 0,
startPos;
while ((startPos = invalidXML.indexOf(delimiter.begin, pos)) != -1) {
//while there are relevant elements of this kind left:
var endPos = invalidXML.indexOf(delimiter.end, startPos);
// assert(endPos != -1) - otherwise this could go horribly wrong
pos = endPos+delimiter.end.length;
//append to end result:
validXML += invalidXML.slice(startPos, pos) + "\n";
}
}
return validXML;
}

Where's the time being spent? I assume you could break up this big synchronous action into a couple of async hopes. (Every couple of while-iterations, you could store your index and set-timeout before resuming. This way you don't lock the UI thread.

Related

Where/how to set increment on loop and update array only when condition found?

I'm writing a function to iterate through folders on Google Drive and match files (Google Sheets) with a variable string (a date specified on a table cell). When a matching file is found, the containing folder name string is assigned to folderItems[0] and the file URL to folderItems[1]. Once all matching files within a folder have been found, the next folder is iterated through in the same way. These "folderItems" arrays are stored in a parent array "folderItemsContainer" to create a 2 dimensional array which can then be output to a spreadsheet using .setValues().
I'm having trouble figuring out how or where to put the increment variable so that it will increment only when a filename match is made but not stop a loop when a match isn't found.
I've tried various structures including interchanging for and while loops and inserting if statements where seemingly useful. I've looked at a few different answers on Stackoverflow that come close to making sense but none seem to be applicable here. I'm fairly new to programming. I've got different variations of code I've tried, but this is where I'm up to so far:
function GetFolderData() {
var currentSheet = SpreadsheetApp.getActiveSpreadsheet();
var currentYearPeriod = currentSheet.getRange("C1!A4").getValue();
// Logger.log(currentYearPeriod);
//Get folder objects from parent folder
var parentFolderId = "17F0fcBH0jmxsk2sUq723AuIY0E2G_u0m";
var parentFolder = DriveApp.getFolderById(parentFolderId);
//Get folders from specified parent folder
var StaffFolders = parentFolder.getFolders();
//Create container array
var folderItemsContainer = [];
//Create Item Array
var folderItems = [];
var i = 0;
//For every staff folder, regardless of content, do:
while (StaffFolders.hasNext()) {
//Get current folder object
currentFolder = StaffFolders.next();
//Get files in current folder object as FileIterator
FolderFiles = currentFolder.getFiles();
//If folder empty, outer while loop will iterate
if (FolderFiles !== null) {
//Iterate through existing files
while (FolderFiles.hasNext()) {
//Get file object sequentially
file = FolderFiles.next();
//When filename matches currentYearPeriod, store URL next to name in folderItems
for (i = 0; file.getName().indexOf(currentYearPeriod) !== -1; i++) {
folderItems[i] = [];
folderItems[i][0] = currentFolder.getName();
// Logger.log(currentFolder.getName());
folderItems[i][1] = file.getUrl();
folderItemsContainer[i] = folderItems[i];
}
}
}
}
return folderItemsContainer;
}
function InsertFolderData() {
var sheet = SpreadsheetApp.getActiveSheet();
sheet.getRange("B4:Z1000").clearContent();
FolderData = GetFolderData();
Logger.log(FolderData);
sheet
.getRange(4, 2, FolderData.length, FolderData[0].length)
.setValues(FolderData);
Logger.log(FolderData);
/* var str = "";
for (var i = 0; i < FolderData.length; i++) {
str += FolderData[i] + "\r\n";
}
str = str.substr(0);
var ui = SpreadsheetApp.getUi();
ui.alert("DATA IMPORTED: " + "\r\n" + str);
*/
}
With the above code, I'm not entirely sure why but I seem to be getting stuck in an endless loop and the script doesn't finish. What I'm hoping to achieve is the folderItemsContainer array being populated with arrays containing file information (parent folder name[0] and file URL[1]) for files that match the currentYearPeriod variable. I've been refactoring the code and I've learned a lot but unfortunately not how to solve the problem.
You should check what's the deference between each loop, you are not fully undestending them. If you want to execute the instructions inside the for loop until a certain condition is met, in this case file.getName().indexOf(currentYearPeriod) !== -1, you should use a while loop. The bug is that the previous condition is never met because file never change while running the for loop. Thats why you are having an infinite loop. My solution:
// new variable
var cnt = 0;
while (StaffFolders.hasNext()) {
currentFolder = StaffFolders.next();
FolderFiles = currentFolder.getFiles();
if (FolderFiles !== null) {
while (FolderFiles.hasNext()) {
file = FolderFiles.next();
// You for loop started here
folderItems[cnt] = [];
folderItems[cnt][0] = currentFolder.getName();
folderItems[cnt][1] = file.getUrl();
folderItemsContainer[cnt] = folderItems[cnt];
// each time you read a new file you increment by 1
cnt++;
}
}
// this reset the counter for each new folder
cnt = 0;
}
Deferences between loops:
for loops
They are used when you know how many iteration will be needed. For example, if you want to print all the character of a string in the console:
const str = "hello";
for(let i = 0; i < str.length; i++) {
console.log(str.charAt(i));
}
let i = 0 is the starting point
i < str.length is when you want to stop. If you have to use a simbol which is not one of the follow <, <=, >, >=, you shouldn't be using a for loop.
i++ how you want to reach the stop property.
while loops
If you dont know when your loop is going to end, if it's going to have, 5 iteration, 100 iteration or 0 iteration. You should use while loops.
function getFirstL(str)
let i = 0;
while(i < str.length && str.charAt(i) !== "l"){
i++;
}
}
Your for loop. Here is syntax of for loop.
for (statement 1; statement 2; statement 3) {
// code block to be executed
}
Statement 1 is executed (one time) before the execution of the code block.
Statement 2 defines the condition for executing the code block.
Statement 3 is executed (every time) after the code block has been executed.
Your for loop doesn't define a condition for it to exit. A minimum or maximum value. something like
i<file.getName().indexOf(currentYearPeriod);
So it will check from 0-to that value.

Trouble creating unordered list with number of list items controlled by user input

I tried to make a function that would generate a number of list items based on the user input from a prompt. It does not work although I believe it should.
I'm looking for an explanation of what's wrong with my code even if an alternate solution is also provided, if possible.
On the HTML side I have entered <div class="freshList"></div> in the body so that it can be picked up by the function and have the list placed in that location
Code is below:
function makeAList()
{
var freshList = document.getElementsByClassName("freshList");
var listLength = prompt("Enter number of list items");
var listString = "<ul>";
for (var i=0; i < listLength; i++)
{
listString+= "<li>"+"</li>"
}
listString += "</ul>"
document.innerHTML = listString;
}
makeAList();
// end code
Now the only way I have been able to get this to work was by accident when using the document.Write method at various points in the code to see what was working (I tried console log first which said that the function was called and the loop was proceeding but no output was coming so I switched to doc.write instead). I used document.Write(listString); and this was able to forcibly print the bullet points onto the screen but that is not my desire. I want it in the HTML not just printed on the screen (so that I can manipulate it with other functions I have made).
Altogether I wanted to make a series of functions to perform the following action: Ask if the user would like to make a new list. Call the makeNewList function which would prompt the user for the number of items. Then ask the user if they would like to edit the list and call the editList function with new prompts for each list item. Finally leaving an output of # of bullet points with user input on each point. I am sure this is a ridiculous idea that nobody would use but it was more a lesson for myself to try an idea I had rather than something functional. Full (attempted) code below:
function makeAList()
{
var freshList = document.getElementsByClassName("freshList");
var listLength = prompt("Enter number of list items");
var listString = "<ul>";
for (var i=0; i < listLength; i++)
{
listString+= "<li>"+"</li>"
}
listString += "</ul>"
document.innerHTML = listString;
}
makeAList();
function editAList() {
var list = document.getElementsByTagName("li");
for (var i = 0; i < list.length; i++)
{
list[i].innerHTML = prompt("Place list text below","")
}
function checkList(){
var resp1 = confirm("Would you like to make a new list?")
if(resp1 == true)
{
makeAList();
}
else
{
}
if(resp1 === false){
var resp2 = prompt("Would you like to edit an existing list instead?")
}
else if(resp2 === true){
editAList();
}
else{
alert("You have chosen not to make a new list or edit an existing one")
}
}
checkList();
My friend looked at my code and made some changes as well as detailed comments with the places I went wrong. For anyone who views this question in the future here is his response. All credit to him but I don't know his stack overflow handle to tag him.
Here is his js bin updated and heavily commented code
Code below in case that link dies:
// hi
// i've changed a few things, i've left the original code in comments (//)
function makeAList()
{
// what does the following code return? a single element? a list of elements?
//var freshList = document.getElementsByClassName("freshList")
var freshList = document.getElementById("freshList");
var listLength = prompt("Enter number of list items");
// var listString = "<ul>";
// you can create a 'ul' element and append the list string later
// https://developer.mozilla.org/en-US/docs/Web/API/ParentNode/append
var ul = document.createElement('ul');
ul.setAttribute('id', 'theList');
// there's an even shorter way of doing all this, but since you're starting out, we can save that for later
for (var i=0; i < listLength; i++)
{
//i would probably append here too, but just demonstrating insertAdjacent
ul.insertAdjacentHTML('beforeend', '<li></li>');
}
// document.innerHtml = listString //this was the reason why this function didn't work
// document has no inner html, instead, you want to append the list to the .freshList div that you created
// and then append that to the listOfLists that you queried
// the reason why we don't want to manually set innerHTML is because the DOM has to be reparsed and recreated
// every time innerHTML is set. if you have 1000s of lists, this would be extremely slow
// there are DOM apis that create and insert html elements much more faster and efficient (appendChild)
// if you want to create html elements as strings, as you have done previously, use insertAdjacentHTML: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML
// it is faster and more efficient
freshList.appendChild(ul);
}
makeAList();
function editAList() {
var list = document.getElementsByTagName("li");
// there's a much more efficient way to do this, but keep this here for now
var insertText = function(i) {
var input = prompt("Place list text below", "");
console.log(i);
list[i].append(input);
}
for (var i = 0; i < list.length; i++)
{
// why would we use settimeout? http://www.w3schools.com/jsref/met_win_settimeout.asp
setTimeout(insertText.bind(null, i), 1000); // why bind? https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/bind
}
}
editAList();
// function checkList(){
// var resp1 = confirm("Would you like to make a new list?")
// if(resp1 == true)
// {
// makeAList();
// }
// else
// {
// }
// if(resp1 === false){
// var resp2 = prompt("Would you like to edit an existing list instead?")
// }
// else if(resp2 === true){
// editAList();
// }
// else{
// alert("You have chosen not to make a new list or edit an existing one")
// }
// }
// checkList();

Find and select next string in textarea

I'm trying to implement a find and replace functionality for a textarea in a javascript app.
I've got this pretty much working, but for an incredibly weird bug that I've no idea how to fix.
Essentially, 50% of the time this code works as expected. Every time I run the function, it selects the next instance of my string. However, the other 50% it just re-selects the same bit of text over and over.
This inconsistency is most confusing. I can relaunch the app and it works different each time. Here's my code. I'm guessing this might be performance related? Any ideas?
function selectNext() {
// collect variables
var findBox = document.getElementById('findBox');
var editable = document.getElementById('editable');
var txt = editable.innerText;
var strSearchTerm = findBox.value;
// find next index of searchterm, starting from current cursor position
var cursorPos = editable.selectionEnd;
var termPos = txt.indexOf(strSearchTerm, cursorPos);
// if found, select it
if (termPos != -1) {
editable.setSelectionRange(termPos, termPos + strSearchTerm.length)
} else {
// not found from cursor pos, so start from beginning
termPos = txt.indexOf(strSearchTerm);
if (termPos != -1) {
editable.setSelectionRange(termPos, termPos + strSearchTerm.length)
}
}
};

.length on array crashing when length is 1 (maybe issue with split)

I'm having trouble with this code. I've tried to troubleshoot it many times and seem to have isolated the issue, but can't figure out the cause.
If the variable called string is set to something in the form of "text v. text," the code runs fine and the first if-statement triggers the sentence. If the string contains text but no "v." i.e. nothing that meets the search separator value, the function fails and does not execute the second if-statement.
Link to Fiddle: http://jsfiddle.net/qsq4we99/
Snippet of code, there also would need to be a html div with ID "outputtext."
function brokenCode()
{
//Setting Relevant Variables
var string = "red";
var array = string.split("v.");
var number = array.length;
// Checking location of things
var findText1 = array[0].search("search text");
var findText2 = array[1].search("search text");
//Running Conditional Stuff
if(number > 1)
{
document.getElementById('outputtext').innerHTML = "2+ listed";
}
else if(number < 2)
{
document.getElementById('outputtext').innerHTML = "1 listed";
}
}
brokenCode();
In this simplified example there is no clear explanation why the search operations need to occur (they are there because in the real code they are needed... but something about them seems to be causing the problem (even in this simple example). If the two searches are removed, the code runs smoothly.
You can't start setting variables from the array without checking for length. Before setting findText1 & findText2, check to make sure the length of the array is greater than zero.
function brokenCode() {
//Setting Relevant Variables
var string = "red";
var array = string.split("v.");
var number = array.length;
if (number > 0) {
// Checking location of things
var findText1 = array[0].search("search text");
var findText2 = array[1].search("search text");
//Running Conditional Stuff
if(number > 1)
{
document.getElementById('outputtext').innerHTML = "2+ listed";
}
else if(number < 2)
{
document.getElementById('outputtext').innerHTML = "1 listed";
}
}
}
brokenCode();

How do I avoid looping through an array to find a partial match?

I am looping through an array of english phrases, and if i find a match, with the current text node, i replace it with it's translation in the non_english array. All of that works 100% for exact matches.
But for partial matches, I need to use the .match command, which allows for partial matches.
My code to search for exact matches is like this:
var found = $.inArray(value,en_lang);
Then if there is a found value, then do replacement of text. This method is fast and I love it.
However to do partial word/phrase matching, I have to use this looping code.
// loop thru language arrays
for (var x = en_count; x > 0; x--) {
// assign current from/to variables for replace
var from = en_lang[x];
var to = other_lang[x];
// if value match do translation
if (value.match(from)) {
content(node, value.replace(from, to));
}
// mark this node as translated
if ($.browser.msie == 'false') {
$(node).data('translated', 'yes');
}
}
This does the job but is pretty slow. After a lot of research, I have found that I can convert the english array to a list-based string via the join command.
But I am unable to come up with a function to search this list for a partial match, and return the position in the list.
I was trying out this old js function created in 2006. But I can't figure out how to get the position back, correctly.
function listfind(list, value, delimiters) {
if (!delimiters) {
var delimiters = ','
}
_TempListSplitArray = list.split(delimiters)
var FoundIdx = 0;
for (i = 0; i < _TempListSplitArray.length; i++) {
if (_TempListSplitArray[i] == value) {
FoundIdx = i + 1;
break
}
if (value.match(_TempListSplitArray[i])) {
FoundIdx = i + 1;
break
}
}
return FoundIdx
}
Thank you for your time.
Javascript has a foreach type of system but its still based on a loop
var array = ['hello', 'world'];
for(var key in array){
alert(array[key]);
}
Thats the best your getting for looping though an array but this way allso works with objects
var obj = {'one':'hello', 'two':'world'];
for(var key in obj){
alert("key: "+key+" value: "+obj[key]);
}
UPDATED for Comments on your question
You can just replace the text you know
var str = "hello World";
str = str.replace("hello", "Bye bye");
alert(str);

Categories

Resources