I have a Google Spreadsheet with a few hundreds of sheets there. Users update those sheets with new data under a strict and specific template. I created a Google Apps Script where once every while, I run it to change several things on every sheet, by keeping the original data intact and export those in multiple CSV files I store in Google Drive. In more details,
- Iterate on every sheet
- Duplicate the current one
- Run a set of functions on every cell I have to update (~500 on each)
- Export the sheet to CSV and store it in Drive
- Delete the temp sheet
- Move to the next one
Number 3 is the most time consuming. It might takes 30-40 seconds for every sheet and the functions are simple math formulas or dictionaries.
Here is the code which I have removed functions that I just repeat for more cells.
function saveAsCSV() {
var maxSheetID = 100;
var sheetsFolder = DriveApp.getFoldersByName('sheetsFolder_CSV').next();
var folder = sheetsFolder.createFolder('sheetsFolder' + new Date().getTime());
for (var sheetID = 1; sheetID <= maxSheetID; sheetID++) {
createTempSheet();
copyRowsWithCopyTo(sheetID);
var tempSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('temp');
tempSheet.getDataRange().setDataValidation(null); //remove all validations I have
var cell1 = tempSheet.getRange(5,4);
cell1.setValue(function1(cell1.getValue()));
var cell2 = tempSheet.getRange(6,4);
cell2.setValue(function2(cell2.getValue()));
var cell3 = tempSheet.getRange(7,4);
cell3.setValue(function3(cell3.getValue()));
//continue for several cells like this on specific i,j indexes. No pattern.
//Table
for (var p = 9; p <= 30; p++) {
var tCell1 = tempSheet.getRange(p,2);
tCell1.setValue(function5(tCell1.getValue()));
var tCell2 = tempSheet.getRange(p,3);
tCell2.setValue(function6(tCell2.getValue()));
//continue for several cells like this with dynamic p and several columns
}
fileName = sheetID + ".csv";
var csvFile = convertRangeToCsvFile_(fileName, tempSheet);
folder.createFile(fileName, csvFile);
}
Browser.msgBox('Files are waiting in a folder named ' + folder.getName());
}
function createTempSheet() {
var activeSpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var tempSheet = activeSpreadsheet.getSheetByName("temp");
if (tempSheet != null) {
activeSpreadsheet.deleteSheet(tempSheet);
}
tempSheet = activeSpreadsheet.insertSheet();
tempSheet.setName("temp");
}
function convertRangeToCsvFile_(csvFileName, sheet) {
// get available data range in the spreadsheet
var activeRange = sheet.getDataRange();
try {
var data = activeRange.getValues();
var csvFile = undefined;
// loop through the data in the range and build a string with the csv data
if (data.length > 1) {
var csv = "";
for (var row = 0; row < data.length; row++) {
for (var col = 0; col < data[row].length; col++) {
if (data[row][col].toString().indexOf(",") != -1) {
data[row][col] = "\"" + data[row][col] + "\"";
}
}
// join each row's columns
// add a carriage return to end of each row, except for the last one
if (row < data.length-1) {
csv += data[row].join(",") + "\r\n";
}
else {
csv += data[row];
}
}
csvFile = csv;
}
return csvFile;
}
catch(err) {
Logger.log(err);
Browser.msgBox(err);
}
}
function copyRowsWithCopyTo(sourceName) {
let spreadSheet = SpreadsheetApp.getActiveSpreadsheet();
let sourceSheet = spreadSheet.getSheetByName(sourceName);
let sourceRange = sourceSheet.getDataRange();
let targetSheet = spreadSheet.getSheetByName('temp');
sourceRange.copyTo(targetSheet.getRange(1, 1));
}
-
\$\begingroup\$ Check my answer here \$\endgroup\$TheMaster– TheMaster2022年08月17日 23:20:50 +00:00Commented Aug 17, 2022 at 23:20
2 Answers 2
As the documentation says,
Using JavaScript operations within your script is considerably faster than calling other services. Anything you can accomplish within Google Apps Script itself will be much faster than making calls that need to fetch data from Google's servers or an external server, such as requests to Spreadsheets, Docs, Sites, Translate, UrlFetch, and so on. Your scripts will run faster if you can find ways to minimize the calls the scripts make to those services.
The slowest part in the script is all those .getValue()
and .setValue()
calls. Not only is the script making those calls, it is alternating them as well, even in the same line(rg.setValue(function(rg.getValue()))
). Alternating read and write is slow:
Every time you do a read, we must first empty (commit) the write cache to ensure that you're reading the latest data (you can force a write of the cache by calling SpreadsheetApp.flush()). Likewise, every time you do a write, we have to throw away the read cache because it's no longer valid. Therefore if you can avoid interleaving reads and writes, you'll get full benefit of the cache.
The script here leaves out essential information. What does function1
do? More importantly, whether function1
does anything to the data. Does it modify the sheet again? Also, when getting the csv
, does the sheet make any calculations through formula?
Ideally, Your script structure should look like:
- INPUT: Get data from one sheet
- OUTPUT: Set modified data to Drive
This implies:
- no temporary sheet creation,
- no repeated get/set calls,
- no relying on sheet formulas after getting data
Other minor changes include:
Use
const
andlet
as needed instead of declaring all variables asvar
. This helps the javascript engine optimize the memory and processing of such variablesThe script repeats almost all the code multiple times. Practice
DRY
(Don't Repeat Yourself) principle. See eg below. Use loops where possible: Even in cases, where there isn't a pattern, you can create a list of indexes that you want and then loop over them.insertSheet
is able to create new sheet with old sheet as atemplate
. So, the functionscreateTempSheet
andcopyRowsWithCopyTo
are completely unnecessary.console
is a newer standard class. Use it instead ofLogger
Script with sample modifications:
function saveAsCSV() {
const maxSheetID = 100;
const sheetsFolder = DriveApp.getFoldersByName('sheetsFolder_CSV').next();
const folderName = 'sheetsFolder' + new Date().getTime();
const folder = sheetsFolder.createFolder(folderName);
const ss = SpreadsheetApp.getActiveSpreadsheet();
for (let sheetID = 1; sheetID <= maxSheetID; sheetID++) {
/*needed? why not directly get the data*/ const tempSheet = ss.insertSheet(
'temp',
1,
{
template: ss.getSheetByName(sheetID),
}
),
datarange = tempSheet.getDataRange(),
/*1 GET call*/data = datarange.getValues(),
indexes = [
[5, 4], // automatically use index to calculate function number
[6, 4],
[7, 4, 'function3'], // or specify a function
];
datarange.setDataValidation(null); //remove all validations I have
indexes.forEach(/*DRY loop*/
([i, j, func], funcIdx) =>
(data[i][j] = this[ func ?? `function${funcIdx + 1}`](data[i][j]))
);
//continue for several cells like this on specific i,j indexes. No pattern.
//Table
for (let p = 9; p <= 30; p++) {
data[p][2] = function5(data[p][2]);
data[p][3] = function5(data[p][3]);
//continue for several cells like this with dynamic p and several columns
}
/*needed?1 SET call*/ datarange.setValues(data);
const fileName = sheetID + '.csv';
const csvFile = convertRangeToCsvFile_(/*DRY*/data);
folder.createFile(fileName, csvFile);
ss.deleteSheet(tempSheet);
}
Browser.msgBox('Files are waiting in a folder named ' + folderName/*DRY*/);
}
function convertRangeToCsvFile_(data) {
try {
let csvFile;
// loop through the data in the range and build a string with the csv data
if (data.length > 1) {
let csv = '';
for (let row = 0; row < data.length; row++) {
for (let col = 0; col < data[row].length; col++) {
if (data[row][col].toString().indexOf(',') != -1) {
data[row][col] = '"' + data[row][col] + '"';
}
}
// join each row's columns
// add a carriage return to end of each row, except for the last one
if (row < data.length - 1) {
csv += data[row].join(',') + '\r\n';
} else {
csv += data[row];
}
}
csvFile = csv;
}
return csvFile;
} catch (err) {
console.log(err);
Browser.msgBox(err);
}
}
As this answer explains calling SpreadsheetApp.getActiveSpreadsheet()
in every single iteration can slow things down dramatically. Moving that outside the loops can likely increase performance.
const tempSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('temp');
tempSheet.getDataRange().setDataValidation(null); //remove all validations I have
for (var sheetID = 1; sheetID <= maxSheetID; sheetID++) {
Another thing to consider is the way the CSV strings are created in convertRangeToCsvFile_()
. It may be possible to speed up the conversion by adding the rows to an array and using .join()
with the new line characters instead of appending with +=
for each row.
As this site explains the nested for
loop could go in reverse order, since the order doesn't really matter, so instead of:
for (var col = 0; col < data[row].length; col++) {
it could be:
for (var col = data[row].length; col--; ) {
which would require fewer operations.
Explore related questions
See similar questions with these tags.