Split multiple JSON string into strucutred table using Google App Script - javascript

I am trying to split a data set with an ID and JSON string into a structured table.
The difficult part is I need it to be dynamic, the JSON string varies often and I want headings to be determined by the unique values in the input column at that time. I need the script to be able to create headings if the string changes without needed to recode the script.
We have about 150 different JSON strings we are hoping to use this script on, without recoding it for each one. Each string has lots of data points.
I have a script working but it splits them one by one, need to build something that will do bulk in one go, by looping through all outputs in B and creating a column for each unique field in all the strings, then populating them.
The script works if I paste the additional info straight in, however I am having trouble reading from the sheet
var inputsheet = SpreadsheetApp.getActive().getSheetByName("Input");
var outputsheet = SpreadsheetApp.getActive().getSheetByName("Current Output");
var additionalinfo = inputsheet.getRange(1,1).getValue()
Logger.log(additionalinfo)
var rows = [],
data;
for (i = 0; i < additionalinfo.length; i++) {
for (j in additionalinfo[i]) {
dataq = additionalinfo[i][j];
Logger.log(dataq);
rows.push([j, dataq]);
}
dataRange = outputsheet.getRange(1, 1, rows.length, 2);
dataRange.setValues(rows);
}
}
Here is a link to the sample data. Note that in Sample 1 & 2 there are different headings, we need the script to identify this and create headings for both
https://docs.google.com/spreadsheets/d/1BMiVuAgDbibLw6yUG3IZ9iw4MZTaVVegkw_k3ItQ4mU/edit#gid=0

Try this script that produces dynamic headers based on the json that has been read. It collects all json data, get its keys, and remove the duplicates.
Script:
function JSON_SPLITTER() {
var spreadsheet = SpreadsheetApp.getActive();
var inputsheet = spreadsheet .getSheetByName("Input");
var outputsheet = spreadsheet .getSheetByName("Current Output");
var additionalinfo = inputsheet.getDataRange().getValues();
var keys = [];
// prepare the additionalInfo data to be parsed for later
var data = additionalinfo.slice(1).map(row => {
// collect all keys in an array
if (JSON.parse(row[1]).additionalInfo) {
keys.push(Object.keys(JSON.parse(row[1]).additionalInfo));
return JSON.parse(row[1]).additionalInfo;
}
else {
keys.push(Object.keys(JSON.parse(row[1])));
return JSON.parse(row[1]);
}
});
// unique values of keys, modified to form header
var headers = [...new Set(keys.flat())]
// Add A1 as the header for the ids
headers.unshift(additionalinfo[0][0]);
// set A1 and keys as headers
var output = [headers]
// build output array
additionalinfo.slice(1).forEach((row, index) => {
var outputRow = [];
headers.forEach(column => {
if(column == 'Contract Oid')
outputRow.push(row[0]);
else
outputRow.push(data[index][column]);
});
output.push(outputRow)
});
outputsheet.getRange(1, 1, output.length, output[0].length).setValues(output);
}
Output:
Update:
Modified script for no-additionalInfo key objects.

Related

Parsing Data in Google Sheets From an Object

I have thousands of rows of data in a Google Sheets File in a column that looks something like
[{"amountMax":49.99,"amountMin":49.99,"availability":"true","color":"Brown","currency":"USD","dateSeen":["2019-04-11T08:00:00Z"],"isSale":"false","offer":"Online only","sourceURLs":["https://www.walmart.com/ip/SadoTech-Model-CXR-Wireless-Doorbell-1-Remote-Button-2-Plugin-Receivers-Operating-500-feet-Range-50-Chimes-Batteries-Required-Receivers-Beige-Fixed-C/463989633"]}]
I would like to be able to return the max value, the currency, the color attributes. How can I do that in Google Sheets. Ideally would like to do something like being able to retrieve the data attributes how I would normally in javascript like in this link here https://repl.it/#alexhoy/WetSlateblueDribbleware
However this does not seem to work for me when creating a function in script.google.com
For example, here is a slugify function which takes an input (cell) and turns it into a slug/handle without the need for looping. In Google Sheets I can then call =slugify(b2) and turn that value into slug form
/**
* Converts value to slug
* #customfunction
*/
function slugify(value) {
/*
* Convert the the vs in a range of cells into slugs.
* #customfunction
*/
let slug = '';
slug = value.substring(0, 100).toLowerCase();
slug = slug.replace(/[^\w\s-]/g, '');
slug = slug.replace(/\s+/g, '-');
Logger.log(slug);
return slug;
}
I want to do the same thing without looping to parse the object data above or declaring a range of values and what not.
Any suggestions on how I can do this in a simple way like shown above without the need for declaring active spreadsheet, range values and looping.
The following script will give you an idea about how to approach this task.
It assumes that:
the json data described in your question is in Cell A2.
the max value will be inserted into cell D2
the currency will be inserted into cell E2
the color will be inserted into cell F2
The script uses temporary arrays to capture the values and then assign it to a 2d array.
If you have many rows of data, then you will need to create a loop. I suggest that you build the arraydata progressively, and only update the target range at the end of the loop. This will give you the most efficient outcome.
function so6031098604() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet()
var content = JSON.parse(sheet.getRange("A2").getValue());
// temp arrar to capture the data
var temparray = [];
temparray.push(content[0]["amountMax"]);
temparray.push(content[0]["currency"]);
temparray.push(content[0]["color"]);
// second array to accept the row data
var arraydata =[];
arraydata.push(temparray)
// define the target range
var targetrange = sheet.getRange(2, 4, 1, 3);
// update with the arraydata
targetrange.setValues(arraydata);
}
You want a custom function that will return certain fields from a JSON array.
In the following example, the target cell can be a single cell or an array.
This example does not use an arrayformula. The mechanics of using an arrayformula with a custom function may be something that you can research here Custom SHEETNAME function not working in Arrayformula.
Note: A 30 second quota applies to the execution of a Custom function
/**
* gets the MaxAmount, Current and Color from the data
*
* #param {cell reference or range} range The range to analyse.
* #return amountMax,currency and color
* #customfunction
*/
function getJsonData(range) {
//so6031098606
// Test whether range is an array.
if (range.map) {
// if yes, then loop through the rows and build the row values
var jsonLine = [];
for (var i = 0; i < range.length; i++) {
var jsonValues=[];
var v = JSON.parse(range[i][0]);
jsonValues.push(v.amountMax);
jsonValues.push(v.currency);
jsonValues.push(v.color);
// aggregate the row values
jsonLine.push(jsonValues);
} // end i
return jsonLine;
} else {
// if no, then just return a single set of values
var v = JSON.parse(range);
var jsonValues = [];
jsonValues.push(v.amountMax);
jsonValues.push(v.currency);
jsonValues.push(v.color);
return [jsonValues];
}
}

Paste values from one Google Sheet to another and remove duplicates based on ID column

I have a similar situation to the one described on this question: two worksheets, with input data coming into the Feed sheet using the importxml function and a Data sheet where new rows get copied thanks to a script set to run daily.
However, the current script is creating daily duplicates. As such, I would like to adapt the answer provided on the question above so that the script checks the IDs on column F and only copies the rows with new IDs.
How should I update the section below that creates a hash to one that looks for the IDs on column F instead? Also my rows are consistent, so is it correct to assume I can just remove the relevant code lines towards the end?
The sample Google Sheet is available here.
function appendUniqueRows() {
var ss = SpreadsheetApp.getActive();
var sourceSheet = ss.getSheetByName('Get Data');
var destSheet = ss.getSheetByName('Final Data');
var sourceData = sourceSheet.getDataRange().getValues();
var destData = destSheet.getDataRange().getValues();
// Check whether destination sheet is empty
if (destData.length === 1 && "" === destData[0].join('')) {
// Empty, so ignore the phantom row
destData = [];
}
// Generate hash for comparisons
var destHash = {};
destData.forEach(function(row) {
destHash[row.join('')] = true; // could be anything
});
// Concatentate source rows to dest rows if they satisfy a uniqueness filter
var mergedData = destData.concat(sourceData.filter(function (row) {
var hashedRow = row.join('');
if (!destHash.hasOwnProperty(hashedRow)) {
// This row is unique
destHash[hashedRow] = true; // Add to hash for future comparisons
return true; // filter -> true
}
return false; // not unique, filter -> false
}));
// Check whether two data sets were the same width
var sourceWidth = (sourceData.length > 0) ? sourceData[0].length : 0;
var destWidth = (destData.length > 0) ? destData[0].length : 0;
if (sourceWidth !== destWidth) {
// Pad out all columns for the new row
var mergedWidth = Math.max(sourceWidth,destWidth);
for (var row=0; row<mergedData.length; row++) {
for (var col=mergedData[row].length; col<mergedWidth; col++)
mergedData[row].push('');
}
}
// Write merged data to destination sheet
destSheet.getRange(1, 1, mergedData.length, mergedData[0].length)
.setValues(mergedData);
}
I'm a novice in this world of Google Apps scripts, so do please let me know if I'm missing any crucial information. Thanks in advance for the help.
You want to copy the values from "Feed" sheet to "Data" sheet.
When the values are copied, you want to copy only new values which are not included in "Data" sheet.
You want to choose the new values using the values of column "F".
If my understanding for your question is correct, how about this modification? In this modification, I modified the script in your shared spreadsheet.
Modification points:
In your script, all values of "Feed" sheet are copied to "Data" sheet. So in order to choose only new values, I used the following flow.
Retrieve the values from column "F". This is used for choosing the new values.
Retrieve the new values using the values from column "F".
Put the new values to "Data" sheet.
The script which reflected above flow is as follows.
Modified script:
From:
This is your script in the shared spreadsheet. Please modify this script to below one.
function Copy() {
var sss = SpreadsheetApp.openById('#####'); // this is your Spreadsheet key
var ss = sss.getSheetByName('Feed'); // this is the name of your source Sheet tab
var range = ss.getRange('A3:H52'); //assign the range you want to copy
var data = range.getValues();
var tss = SpreadsheetApp.openById('#####'); //replace with destination ID
var ts = tss.getSheetByName('Data'); //replace with destination Sheet tab name
ts.getRange(ts.getLastRow()+1, 1,50,8).setValues(data);// 49 value refers to number of rows, 8 to columns
}
To:
function Copy() {
var sss = SpreadsheetApp.openById('#####'); // this is your Spreadsheet key
var ss = sss.getSheetByName('Feed'); // this is the name of your source Sheet tab
var range = ss.getRange('A3:H52'); //assign the range you want to copy
var data = range.getValues();
var tss = SpreadsheetApp.openById('#####'); //replace with destination ID
var ts = tss.getSheetByName('Data'); //replace with destination Sheet tab name
// Below script was added.
var values = ts.getRange("F3:F").getValues().filter(String);
var copiedValues = data.filter(function(e) {return !values.some(function(f){return f[0] == e[5]}) && e.filter(String).length > 0});
ts.getRange(ts.getLastRow() + 1, 1, copiedValues.length, copiedValues[0].length).setValues(copiedValues);
}

Use google script to put 2 columns into a single multi dimensional array

I'm looking for a way to take 2 columns in a google spreadsheet and merge them into a single array in hopes that I can take these 2 columns and use setValues on a new sheet.
Why?
I'm eventually taking 2 different sheets and basically doing a large scale vlookup and transferring all results and desired columns into a single, new sheet. I can get the full dataRange, loop through each array, grabbing the values I want and pushing them to a new array. But is there an easier way? If I can look through just row1 and get the headers and their index, can I just put all of column A and column D in a multi-dimensional array?
Example
Header1 | H2 | H3
I want H1 and H3 and their rows so I can put them in a new sheet as such
Multi-Dimensional Array:
[ [H1, H3], [dataH1,dataH3] ]
Current Code
var freqArr = new Array(); //Array with sheet data
var myArray = new Array(); //Blank array to house header index
var freqSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('KEY_test_test');
var freqData = freqSheet.getDataRange(); //all data
var freqNumRows = freqData.getNumRows(); //number of rows
var freqNumCol = freqData.getNumColumns(); //number of columns
freqArr = freqSheet.getRange(1, 1, freqNumRows, freqNumCol).getValues();
for (i = 0;i<1;++i){
for (j = 0;j<freqNumCol;++j){
if (freqArr[i][j].toString() == 'Header1' || freqArr[i][j].toString() == 'Header3'){
myArray.push([j]);
}
}
}
Logger.log(myArray);
Where I'm Stuck
What I'm doing right now is looping through the first row to get the header indexes I want (should look like this [ 0, 2 ]) but all that is returning in my log is []. I plan to use this array of indexes to loop through my freqData and grab the indexes of each nested array.
Any advice would be great. I'm just starting to learn google script and I'm teaching myself. Thanks
UPDATE TO CODE:
It turns out that .toString() == 'Header1' will not return a match but after more google fu, I found .toString().match('Header1') == 'Header1' will return what I need. See below for update
var freqArr = new Array(); //Array with sheet data
var myArray = new Array(); //Blank array to house header index
var freqSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('KEY_test_test');
var freqData = freqSheet.getDataRange(); //all data
var freqNumRows = freqData.getNumRows(); //number of rows
var freqNumCol = freqData.getNumColumns(); //number of columns
freqArr = freqSheet.getRange(1, 1, freqNumRows, freqNumCol).getValues();
for (i = 0;i<1;++i){
for (j = 0;j<freqNumCol;++j){
if (freqArr[i][j].toString().match('Header1') == 'Header1' || freqArr[i][j].toString().match('Header3') == 'Header3'){
myArray.push(j);
}
}
}
Logger.log(myArray);
will return [ 0.0 , 2.0 ].
But still, my question remains, is there a faster way to get 2(n) columns that are not side-by-side and put them into an array so that you can use .setValues?
Answer
But still, my question remains, is there a faster way to get 2(n) columns that are not side-by-side and put them into an array so that you can use .setValues?
Yes, there are many ways. One of them is the use of a JavaScript method: array.prototype.forEach()
Code
function myFunction() {
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var array = [];
data.forEach(function(row){
array.push([row[0],row[5]]);
});
sheet.getRange(1,10,array.length,2).setValues(array);
}
Explanation
Get the active sheet
var sheet = SpreadsheetApp.getActiveSheet();
Get the all the values on sheet
var data = sheet.getDataRange().getValues();
Initialize a variable to hold the array
var array = [];
Get the values of the first and sixth columns (A and F) (zero based index)
data.forEach(function(row){
array.push([row[0],row[5]]);
});
Return the values to a range starting on J1 and ending on column K and the required row (one based index)
sheet.getRange(1,10,array.length,2).setValues(array);
Take a look at the getRowsData() function on the Simple Mail Merge tutorial. It will get all the data in a sheet and return it as objects. You could then access the data as myData[i].header1 It will remove spaces and "normalize" the header. So a header such as My Header name will be myData[i].myHEaderName
You could limit the returned data to only the columns you need if you wish.

Paste values from one sheet to another and remove duplicates

I have two worksheets in my google spreadsheet:
Input data is coming into the Get Data worksheet via the importxml function.
However, I would like to copy all values of the Get Data sheet to the Final Data sheet and if there are duplicates(in terms of rows) append the unique row.
Here is what I tried:
function onEdit() {
//get the data from old Spreadsheet
var ss = SpreadsheetApp.openById("1bm2ia--F2b0495iTJotp4Kv1QAW-wGUGDUROwM9B-D0");
var dataRange = ss.getSheetByName("Get Data").getRange(1, 1, ss.getLastRow(), ss.getLastColumn());
var dataRangeFinalData = ss.getSheetByName("Final Data").getRange(1, 1, ss.getLastRow(), ss.getLastColumn());
var myData = dataRange.getValues();
//Open new Spreadsheet & paste the data
newSS = SpreadsheetApp.openById("1bm2ia--F2b0495iTJotp4Kv1QAW-wGUGDUROwM9B-D0");
Logger.log(newSS.getLastRow());
newSS.getSheetByName("Final Data").getRange(newSS.getLastRow()+1, 1, ss.getLastRow(), ss.getLastColumn()).setValues(myData);
//remove duplicates in the new sheet
removeDups(dataRangeFinalData)
}
function getId() {
Browser.msgBox('Spreadsheet key: ' + SpreadsheetApp.getActiveSpreadsheet().getId());
}
function removeDups(array) {
var outArray = [];
array.sort(lowerCase);
function lowerCase(a,b){
return a.toLowerCase()>b.toLowerCase() ? 1 : -1;// sort function that does not "see" letter case
}
outArray.push(array[0]);
for(var n in array){
Logger.log(outArray[outArray.length-1]+' = '+array[n]+' ?');
if(outArray[outArray.length-1].toLowerCase()!=array[n].toLowerCase()){
outArray.push(array[n]);
}
}
return outArray;
}
Below you can find the link to a sample spreadsheet:
Sample Sheet
My problem is that the data does not get pasted.
I appreciate your replies!
tl;dr: See script at bottom.
An onEdit() function is inappropriate for your use case, as cell contents modified by spreadsheet functions are not considered "edit" events. You can read more about that in this answer. If you want this to be automated, then a timed trigger function would be appropriate. Alternatively, you could manually invoke the function by a menu item, say. I'll leave that to you to decide, as the real meat of your problem is how to ensure row-level uniqueness in your final data set.
Merging unique rows
Although your original code is incomplete, it appears you were intending to first remove duplicates from the source data, utilizing case-insensitive string comparisons. I'll suggest instead that some other JavaScript magic would help here.
We're interested in uniqueness in our destination data, so we need to have a way to compare new rows to what we already have. If we had arrays of strings or numbers, then we could just use the techniques in How to merge two arrays in Javascript and de-duplicate items. However, there's a complication here, because we have an array of arrays, and arrays cannot be directly compared.
Hash
Fine - we could still compare rows element-by-element, which would require a simple loop over all columns in the rows we were comparing. Simple, but slow, what we would call an O(n2) solution (Order n-squared). As the number of rows to compare increased, the number of unique comparison operations would increase exponentially. So, let's not do that.
Instead, we'll create a separate data structure that mirrors our destination data but is very efficient for comparisons, a hash.
In JavaScript we can quickly access the properties of an object by their name, or key. Further, that key can be any string. We can create a simple hash table then, with an object whose properties are named using strings generated from the rows of our destination data. For example, this would create a hash object, then add the array row to it:
var destHash = {};
destHash[row.join('')] = true; // could be anything
To create our key, we're joining all the values in the row array with no separator. Now, to test for uniqueness of a row, we just check for existence of an object property with an identically-formed key. Like this:
var alreadyExists = destHash.hasOwnProperty(row.join(''));
One additional consideration: since the source data can conceivably contain duplicate rows that aren't yet in the destination data, we need to continuously expand the hash table as unique rows are identified.
Filter & Concatenate
JavaScript provides two built-in array methods that we'll use to filter out known rows, and concatenate only unique rows to our destination data.
In its simple form, that would look like this:
// Concatentate source rows to dest rows if they satisfy a uniqueness filter
var mergedData = destData.concat(sourceData.filter(function (row) {
// Return true if given row is unique
}));
You can read that as "create an array named mergedData that consists of the current contents of the array named destData, with filtered rows of the sourceData array concatenated to it."
You'll find in the final function that it's a little more complex due to the other considerations already mentioned.
Update spreadsheet
Once we have our mergedData array, it just needs to be written into the destination Sheet.
Padding rows: The source data contains rows of inconsistent width, which will be a problem when calling setValues(), which expects all rows to be squared off. This will require that we examine and pad rows to avoid this sort of error:
Incorrect range width, was 6 but should be 5 (line ?, file "Code")
Padding rows is done by pushing blank "cells" at the end of the row array until it reaches the intended length.
for (var col=mergedData[row].length; col<mergedWidth; col++)
mergedData[row].push('');
With that taken care of for each row, we're finally ready to write out the result.
Final script
function appendUniqueRows() {
var ss = SpreadsheetApp.getActive();
var sourceSheet = ss.getSheetByName('Get Data');
var destSheet = ss.getSheetByName('Final Data');
var sourceData = sourceSheet.getDataRange().getValues();
var destData = destSheet.getDataRange().getValues();
// Check whether destination sheet is empty
if (destData.length === 1 && "" === destData[0].join('')) {
// Empty, so ignore the phantom row
destData = [];
}
// Generate hash for comparisons
var destHash = {};
destData.forEach(function(row) {
destHash[row.join('')] = true; // could be anything
});
// Concatentate source rows to dest rows if they satisfy a uniqueness filter
var mergedData = destData.concat(sourceData.filter(function (row) {
var hashedRow = row.join('');
if (!destHash.hasOwnProperty(hashedRow)) {
// This row is unique
destHash[hashedRow] = true; // Add to hash for future comparisons
return true; // filter -> true
}
return false; // not unique, filter -> false
}));
// Check whether two data sets were the same width
var sourceWidth = (sourceData.length > 0) ? sourceData[0].length : 0;
var destWidth = (destData.length > 0) ? destData[0].length : 0;
if (sourceWidth !== destWidth) {
// Pad out all columns for the new row
var mergedWidth = Math.max(sourceWidth,destWidth);
for (var row=0; row<mergedData.length; row++) {
for (var col=mergedData[row].length; col<mergedWidth; col++)
mergedData[row].push('');
}
}
// Write merged data to destination sheet
destSheet.getRange(1, 1, mergedData.length, mergedData[0].length)
.setValues(mergedData);
}

How to remove quote mark from array

I have a script that will import data from a csv file in to a Google sheet. I would like to include the following to remove the quote marks from the data - .replace(/"/g, "") - but am not sure of the best place/syntax to add in to the existing script.
Do I add it in to the section where I convert the csv data to an array -
function CSVToArray_(strData){
var rows = strData.split("\n");
//Logger.log(rows.length);
var array = [];
for(n=0;n<rows.length;++n){
if(rows[n].split(',').length>1){
array.push(rows[n].split(','));
}
}
Logger.log(array);
return array;
}
...or to the section where the sheet gets updated with the array data ?
var impSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("CtrlSht").getRange("B8:B8").getValue();
var csvData = CSVToArray_(csvFile);// convert to 2D array
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName(impSheet);
sheet.clear();
sheet.getRange(1,1, csvData.length, csvData[0].length).setValues(csvData);// write to sheet in one single step
Any guidance/suggestions would be greatly appreciated.
The code above is dangerous and will break if there is a comma within an element even if it is in quotes which is valid csv. I suggest checking out this library https://code.google.com/p/jquery-csv/. Also, are you wanting to remove quotes or are you wanting to convert string values to numbers? That is, are you getting "2" and wanting it to be just 2, if so, you may just be wanting to parse it into a number as follows: parseInt("2").
If you don't care about the above then the following should work:
function CSVToArray_(strData){
var rows = strData.split("\n");
//Logger.log(rows.length);
var array = [];
var array_inner;
// iterate over rows
for (n=0; n<rows.length; ++n) {
array_inner = rows[n].split(',');
if (array_inner.length>1) {
// iterate over columns
for (m=0; m<array_inner.length; m++) {
array_inner[m] = array_inner[m].replace(/"/g, "");
}
array.push(array_inner);
}
}
Logger.log(array);
return array
}

Categories

Resources