I am trying to have my entire sheet deduped and the script works fine however, it takes 60+ seconds to run. Am I over complicating this and really there is a simpler code to get what I need? It just seems like a long time to process such a simple task.
My data is only between 4-12k rows.
function removeDuplicates() {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheets()[1];
var data = sheet.getDataRange().getValues();
var newData = [];
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
//If not a duplicate, put in newData array
if (!duplicate) {
newData.push(row);
}
}
//Delete the old Sheet and insert the newData array
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
If you use an object, you'll dramatically lower the number of iterations.
function removeDuplicates() {
const sheet = SpreadsheetApp.getActiveSpreadsheet().getSheets()[0];
const data = sheet.getDataRange().getValues();
let newDataObject = {};
for (let row of data) {
newDataObject[row.join()] = row;
}
const newData = Object.values(newDataObject);
// Clear the old Sheet and insert the newData array
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
As other approach, how about using the method of removeDuplicates()? When your script is modified, it becomes as follows.
Modified script:
function removeDuplicates() {
SpreadsheetApp.getActiveSpreadsheet().getSheets()[1].getDataRange().removeDuplicates();
}
Reference:
removeDuplicates()
Now that we have V8 you can use the Set class. Didn't make any attempts to benchmark performance so I don't know if you'll fair any better where speed of execution is concerned, code is far more readable though. Try the following and tell me how it goes:
// V8 runtime version using Set
function removeDuplicates(sheetName) {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
var rows = sheet.getDataRange().getValues();
var dedupedValues = [];
var set = new Set();
rows.forEach(function(row) {
let key = row.join();
if (set.has(key)) return;
set.add(key);
dedupedValues.push(row);
});
//Delete the old Sheet and insert the dedupedValues array
sheet.clearContents();
sheet.getRange(1, 1, dedupedValues.length, dedupedValues[0].length).setValues(dedupedValues);
}
If you're not comfortable with V8 you can accomplish the same thing using #Diego's solution...but with a few tweaks as follows:
// ES5 version using object keys
function removeDuplicates(sheetName) {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
var rows = sheet.getDataRange().getValues();
var dedupedValues = [];
var keys = {};
rows.forEach(function(row) {
var key = row.join();
if (key in keys) return;
keys[key] = true;
dedupedValues.push(row);
});
//Delete the old Sheet and insert the dedupedValues array
sheet.clearContents();
sheet.getRange(1, 1, dedupedValues.length, dedupedValues[0].length).setValues(dedupedValues);
}
DISCLAIMER: I don’t know the Google Sheets API.
I suggested a few improvements and added the comments in the code.
A big thing in performance is caching. So don’t do things twice if not necessary (or DRY = Don’t repeat yourself!).
If you forget your wallet and have to go upstairs again, it doubles the time taking you to leave the front door. Same with code.
If you are generally interested in performance boosts (and everyone should be) I suggest you to take a look at How to write Performant JavaScript- Mark Nadal
function removeDuplicates() {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheets()[1];
var data = sheet.getDataRange().getValues();
var newData = [];
var newDataJoinedCache = {};
// help the loop to not have to read the same values over and over again
// if you prepare as much data as possible for the loop it will thankfully speed up
for (var i = 0, len = data.length; i < len; i++) {
// data won’t change in the loop, so we can cache it here once(!)
// as you can see, I added a semicolon to prevent unwanted results
// imagine a row with colums [ "a", "bc"] and another one with [ "ab", "c"]
// just joined without separator they will both be the same "abc", which is wrong
var joinedRow = data[i].join(";");
// no need as we will know it with one simple comparison
// var duplicate = false;
// Just make one simple comparison
// instead of over and over joining the arrays in newData just cache them
if (!newDataJoinedCache[joinedRow]) {
newData.push(data[i]);
// we push the joined string as a cache
newDataJoinedCache[joinedRow] = true;
}
}
//Delete the old Sheet and insert the newData array
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
Let me know how it works out and how much time this will save in comparison.
Related
So I wrote some code that goes to a book's ID based on a "dictionary" and takes the data from all the sheets in that book after the second sheet. That data is then aggregated into a single array. My question centers around the idea of improving the for loop in the if statement. That is adding the "supplier name" to the front of the subarray that represents a row in the sheet/data. I was told that this is inefficient as the code has to go through each subarray and add a value.
Is there a more efficient way of doing this? I did it this way because I am copying the values of a range which are stored as an array of arrays. So to add data, I have to reaccess the subarrays. Is it possible to add the new data (supplier name) at the same time that the values are being copied? would this be more efficient? It was recommended to use an arrow function, however, I am not familiar with their usage.
function aggregate() {
var combinedData = []
var idArray = {
"suppliername":"id",
};
for (var supplierName in idArray){
var sBook = SpreadsheetApp.openById(idArray[supplierName]);
var sheets = sBook.getSheets();
for (var index = 2; index <sheets.length; index++){
var sheet = sheets[index];
var dataLength = sheet.getRange("E5:E").getValues().filter(String).length;
if(dataLength != 0){
var dataRange = sheet.getRange(5,2,dataLength,14);
var dataValues = dataRange.getValues();
for (row in dataValues) {
dataValues[row].unshift(supplierName);
};
combinedData = combinedData.concat(dataValues);
};
};
};
var dataLength = combinedData.length;
const dSheet = SpreadsheetApp.openById("id").getSheets()[0];
dSheet.getRange(2,1,dSheet.getMaxRows(),dSheet.getMaxColumns()).clearContent();
var dRange = dSheet.getRange(2,1,dataLength,15);
dRange.setValues(combinedData);
};
In your script, how about the following modification?
From:
for (row in dataValues) {
dataValues[row].unshift(supplierName);
};
combinedData = combinedData.concat(dataValues);
To:
combinedData = combinedData.concat(dataValues.map(e => [supplierName].concat(e)));
or
combinedData = [...combinedData, ...dataValues.map(e => [supplierName, ...e])];
References:
map()
Spread syntax
My spreadsheet has a column (A) with over 1000 rows of values like 10.99€, 25.99 € and so on. for optimizing purposes, I am looping through this column and removing the "EUR" mark and replacing "." with ",". While the code works, my problem is that it takes super long to execute and for thousands of products it sometimes time outs. I know I am probably not following the best practices, but this was the best solution I could come up with because of my limited JavaScript skills. Any help?
function myFunction() {
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Table');
var lastRow = sheet.getRange(1,1).getDataRegion(SpreadsheetApp.Dimension.ROWS).getLastRow();
for (var i = 1; i < lastRow +1; i++) {
var price = sheet.getRange(i,1).getValue();
var removeCur = price.toString().replace(" EUR","").replace(".",",");
sheet.getRange(i,1).setValue(removeCur);
}
}
It's a classic question. Classic answer -- you need to replace cell.getValue() with range.getValues(). To get this way 2D-array. Process the array with a loop (or map, etc). And then set all values of the array at once back on sheet with range.setValues()
https://developers.google.com/apps-script/guides/support/best-practices?hl=en
For this case it could be something like this:
function main() {
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Table');
var range = sheet.getDataRange();
var data = range.getValues(); // get a 2d array
// process the array (make changes in first column)
const changes = x => x.toString().replace(" EUR","").replace(".",",");
data = data.map(x => [changes(x[0])].concat(x.slice(1,)));
range.setValues(data); // set the 2d array back to the sheet
}
Just in case here is the same code with loop for:
function main() {
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Table');
var range = sheet.getDataRange();
var data = range.getValues();
for (var i=0; i<data.length; i++) {
data[i][0] = data[i][0].toString().replace(" EUR","").replace(".",",")
}
range.setValues(data);
}
Probably the loop for looks cleaner in this case than map.
And if you sure that all changes will be in column A you can make the script even faster if you change third line in the function this way:
var range = sheet.getRange("A1:A" + sheet.getLastRow());
It will narrow the range to one column.
Well, there's something you can do to improve your code, can't guarantee it will help you to make it faster, but we'll see.
Here's the updated version
function myFunction() {
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Table');
var lastRow = sheet.getRange(1,1).getDataRegion(SpreadsheetApp.Dimension.ROWS).getLastRow() + 1;
var price;
var removeCur;
for (var i = 1; i < lastRow; i++) {
price = sheet.getRange(i,1).getValue();
removeCur = price.toString().replace(" EUR","").replace(".",",");
sheet.getRange(i,1).setValue(removeCur);
}
}
What I did:
Line 5: I removed the +1 in the loop and added on lastRow directly. If you have 1000 rows, you'll save 1000 assignments
Line 6-7: removed declarations in the loop. If you have 1000 rows, you'll save 2000 re-declarations (not sure if it does, but it's best practise anyway)
You could use regex for the replace, so you do it only once, but I think it's slower, so I kept the 2 replaces there
I've just switched from excel to Google sheets and I've had to go through a bit of a learning curve with moving on with "Macros" or scripts as they're now called.
Anyway, a short while later I've written a loop to go through everything in column B and if it's less than 50, delete the row.
It works and I'm happy but it's so slow. I have about 16,000 rows and I'll probably end with more. I let it run for about 4 minutes and it didn't even get rid of 1,000 rows. I refuse to believe that a popular programming language is that slow I can still read stuff as it's being deleted 20 rows up.
function grabData(){
let sheet = SpreadsheetApp.getActive().getSheetByName("Keywords");
var rangeData = sheet.getDataRange();
var lastColumn = rangeData.getLastColumn();
var lastRow = rangeData.getLastRow();
let range = sheet.getRange("B2:B16000");
let values=range.getValues();
for (var i = 0, len = values.length; i<len; i++){
if(values[i] <= 50 ){
sheet.deleteRow(i);
i--
len--
};
};
}
I keep seeing somewhere that something's not being reset, but I have no idea what that means.
Is it because the array length starts off at 16,000 and when I delete a row I'm not accounting for it properly?
Since I never use formulas I would do it this way:
function grabData() {
let ss = SpreadsheetApp.getActive();
let sh = ss.getSheetByName("Keywords");
let rg = s.getRange(2, 2, sh.getLastRow() - 1, sh.getLastColumn());
let values = rg.getValues();
let oA = [];
values.forEach((r, i) => {
if (r[0] > 50) {
oA.push(r);
}
});
rg.clearContent();
sh.getRange(2,1,oA.length,oA[0].length).setValues(oA);
}
It's much faster but it will probably mess up your formulas. Which is one of the reasons I never use formulas. Deleting lines is quite slow. Pretty much anything you do with the UI is slow.
Welcome to App Script and the community! App Script is actually very fast if follow the best practice of App Script.
Here is an example for you that will complete what you need in one second (*modify the variable value in config to fit your own application):
function myFunction() {
// config
const filterValue = 50
const targetSheetName = "Sheet1"
const targetColumn = "A"
const startRowNum = "2"
// get data from target sheet
const ss = SpreadsheetApp.getActiveSpreadsheet()
const sheet = ss.getSheetByName(targetSheetName)
const endRowNum = sheet.getLastRow()
const targetRange =`${targetColumn + startRowNum }:${targetColumn + endRowNum}`
const data = sheet.getRange(targetRange).getValues()
// filter data based on filterValue and set filtered result into new ary
const ary = data.filter(row=>row[0]>=filterValue)
//get max row number in the sheet
const maxRowNum = sheet.getMaxRows()
// break if nothing is filtered out
if(ary.length===0){
// remove all row and break
let deleteStartFromRowNum = parseInt(startRowNum,10) - 1
let deleteRowsCount = maxRowNum - deleteStartFromRowNum
sheet.deleteRows(deleteStartFromRowNum, deleteRowsCount)
return
}
// break if all is filtered out
if(ary.length===data.length){
// remove all trailing empty rows
if(endRowNum<maxRowNum){
let deletStartFromRowNum = endRowNum+1
let deleteRowsCount = maxRowNum-endRowNum
sheet.deleteRows(deletStartFromRowNum,deleteRowsCount)
}
return
}
// get lowerbound (the last row of filtered data in ary)
const lowerBound = parseInt(startRowNum,10) + ary.length - 1
// set ary into sheet range according to lowerBound value
sheet.getRange(`${targetColumn + startRowNum}:${targetColumn + lowerBound.toString()}`).setValues(ary)
// delete rest of the rows that are below lower bound
let deleteStartFromRowNum = lowerBound + 1
let deleteRowsCount = maxRowNum - lowerBound
sheet.deleteRows(deleteStartFromRowNum, deleteRowsCount)
return
}
Issue:
In Apps Script, you want to minimize calls to other service, including requests to Spreadsheets (see Minimize calls to other services). Calling other services in a loop will slow down your script considerably.
Because of this, it's much preferrable to filter out the undesired rows from values, remove all existing data in the range via Range.clearContent(), and then use setValues(values) to write the filtered values back to the spreadsheet (see Use batch operations).
Code snippet:
function grabData(){
let sheet = SpreadsheetApp.getActive().getSheetByName("Keywords");
const range = sheet.getRange("B2:B16000");
const values = range.getValues().filter(val => val[0] > 50);
range.clearContent();
sheet.getRange(2,2,values.length).setValues(values);
}
Reference:
Best Practices
I have a code that dynamically creates new sheets based on the first-row value in the main sheet.
I would like to have the code to check the existence of the sheet name and overwrite the sheet or delete it first if it exists and then creates it afresh from the new data in main sheet.
I will appreciate your help in restructuring.
function newSheet() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var templateSheet = ss.getActiveSheet();
var sheet1 = ss.getSheetByName("main")
var getNames = [...new Set(sheet1.getRange("A2:A").getValues().filter(String).toString().split(","))];
for (var i = 0; i < getNames.length; i++) {
var copy = ss.getSheetByName(getNames[i]);
if (copy) { // This is where I am kind lost on how to structure it.
// if a copy exists delete or overwrite existing sheet here
} else {
var rowIndexes = sheet1.getRange("A:A").getValues()
.map((value, index) => [value[0], (index + 1)])
.filter(value => value[0] === getNames[i]);
var namedSheet = ss.insertSheet(getNames[i]);
rowIndexes.map(index => {
var rowValues = sheet1.getRange(index[1], 1, 1, sheet1.getLastColumn()).getValues();
namedSheet.appendRow(rowValues[0]);
});
ss.setActiveSheet(ss.getSheetByName(getNames[i]));
ss.moveActiveSheet(ss.getNumSheets());
}
}
}
I think there are multiple ways to achieve the solutions you are looking for
First:
Yes, you can replace it.
// This example assumes there is a sheet named "first"
var ss = SpreadsheetApp.getActiveSpreadsheet();
var first = ss.getSheetByName("first");
first.setName("not first anymore");
So in your case,
var copy = ss.getSheetByName(getNames[i]);
if (copy) { // This is where I am kind lost on how to structure it.
copy.setName("New name")
// if a copy exists delete or overwrite existing sheet here
}
Second:
Yes, you can delete the sheet as well.
// The code below deletes the specified sheet.
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('My Sheet');
ss.deleteSheet(sheet);
So in your case,
if (copy) { // This is where I am kind lost on how to structure it.
ss.deleteSheet(copy)
// if a copy exists delete or overwrite existing sheet here
}
Sorry if I have misunderstood your problem.
I am completely new in writing scripts for google sheets, so I was hoping some of you could help/guide me a little bit.
So Ideally, I want a script to clear (not remove) ALL filters in my sheet. This is, however, complicated for me to do (If some of you have such a script, I would LOVE to see it :) )
Instead, I made this one (Used recorder):
function Clear_Filter() {
var spreadsheet = SpreadsheetApp.getActive();
spreadsheet.getRange('A5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(1);
spreadsheet.getRange('B5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(2);
spreadsheet.getRange('C5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(3);
spreadsheet.getRange('G5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(7);
spreadsheet.getRange('J5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(10);
spreadsheet.getRange('M5').activate();
spreadsheet.getActiveSheet().getFilter().removeColumnFilterCriteria(13);
};
So my filter is set in Row 5. First I made the above for all columns (I had 20), but the problem is, that the code is very slow :( So now I am using the columns, that I use the most, when filtering, but the code is still slow. Well the worst thing is, that the code is running one column at a time (which we see in the code), and when the code is finish, I end up in the last column.
Can I do something? I dont want my sheet window keep turning right, when I run the code, and then end up in column M.
I will appreciate any help!
Thanks
Here is mine. The function does not remove filters. Instead, it clears them as requested.
function clearFilter(sheet) {
sheet = SpreadsheetApp.getActiveSheet(); //for testing purpose only
var filter = sheet.getFilter();
if (filter !== null) { // tests if there is a filter applied
var range = filter.getRange(); // prevents exception in case the filter is not applied to all columns
var firstColumn = range.getColumn();
var lastColumn = range.getLastColumn();
for (var i = firstColumn; i < lastColumn; i++) {
filter.removeColumnFilterCriteria(i);
}
Logger.log('All filters cleared')
}
else {Logger.log('There is no filter')}
}
Reset filters criterea + sort by first column (as default state).
And add this action to main menu.
/** #OnlyCurrentDoc */
function onOpen() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var menuEntries = [
{name: "Reset filters", functionName: "ResetFilters"},
];
ss.addMenu("Custom actions", menuEntries); // add to main menu
}
function ResetFilters() {
var spreadsheet = SpreadsheetApp.getActive();
var lastColumn = spreadsheet.getActiveSheet().getLastColumn();
var filter = spreadsheet.getActiveSheet().getFilter();
var criteria = SpreadsheetApp.newFilterCriteria().build();
for (var i = 1; i <= lastColumn; i++) {
filter.setColumnFilterCriteria(i, criteria);
}
filter.sort(1, true); // remove this line for disable setting of sorting order
};
To clear all
`function turnOffFilter(sheet) {
for (var index = 1; index < sheet.getLastColumn(); index++) {
if (sheet.getFilter().getColumnFilterCriteria(index)) {
sheet.getFilter().removeColumnFilterCriteria(index);
}
}
}`
It seems that the answers (e.g. proposed by Birmin) work fine but the script is painfully slow. I find it much faster to reapply the filter:
function clearFilter(sheet) {
sheet = SpreadsheetApp.getActiveSheet(); //for testing purpose only
var filter = sheet.getFilter();
if (filter !== null) { // tests if there is a filter applied
var range = filter.getRange();
filter.remove();
range.createFilter();
Logger.log('All filters cleared')
}
else {Logger.log('There is no filter')}
}
I, have you tried :
function Clear_Filter() {
var spreadsheet = SpreadsheetApp.getActive();
spreadsheet.getFilter().remove();
}