Batch upload using CSV to Azure Storage - javascript

I've come across a problem in uploading a large csv file to Azure's Table Storage, in that it appears to stream the data from it so fast that it doesn't upload properly or throws a lot of Timeout Errors.
This is my current code:
var fs = require('fs');
var csv = require('csv');
var azure = require('azure');
var AZURE_STORAGE_ACCOUNT = "my storage account";
var AZURE_STORAGE_ACCESS_KEY = "my access key";
var tableService = azure.createTableService(AZURE_STORAGE_ACCOUNT,AZURE_STORAGE_ACCESS_KEY);
var count = 150000;
var uploadCount =1;
var counterror = 1;
tableService.createTableIfNotExists('newallactorstable', function(error){
if(!error){
console.log("Table created / located");
}
else
{
console.log("error");
}
});
csv()
.from.path(__dirname+'/actorsb-c.csv', {delimiter: '\t'})
.transform( function(row){
row.unshift(row.pop());
return row;
})
.on('record', function(row,index){
//Output plane carrier, arrival delay and departure delay
//console.log('Actor:' + row[0]);
var actorsUpload = {
PartitionKey : 'actors'
, RowKey : count.toString()
, Actors : row[0]
};
tableService.insertEntity('newallactorstable', actorsUpload, function(error){
if(!error){
console.log("Added: " + uploadCount);
}
else
{
console.log(error)
}
});
count++
})
.on('close', function(count){
console.log('Number of lines: '+count);
})
.on('error', function(error){
console.log(error.message);
});
The CSV file is roughly 800mb.
I know that to fix it, I probably need to send the data in batches, but I have literally no idea how to do this.

I have no knowledge of the azure package nor the CSV package, but I would suggest you to upload the file using a stream. If you have the file saved to your drive you can create a read stream from it, and then use that stream to upload to azure using createBlockBlobFromStream. That question redirects me here. I suggest you to take a look at that, as it handles the encoding. The code provides a way to convert the file to a base64 string, but i have the idea that can be done more efficiently using node. I will have to look into that though.

hmm What I would suggest is to upload your file to blob storage and you can have reference to blob URI in your table storage. Block blob option give you an easy way of batch upload.

Related

not able to fetch text data from web url using javascript

I need to extract text data from web url (http://www.africau.edu/images/default/sample.pdf)
I used two node_module.
1) crawler-Request
it('Read Pdf Data using crawler',function(){
const crawler = require('crawler-request');
function response_text_size(response){
response["size"] = response.text.length;
return response;
}
crawler("http://www.africau.edu/images/default/sample.pdf",response_text_size).then(function(response){
// handle response
console.log("Reponse =" + response.size);
});
});
What happen for this it will not print anything on console.
2) pfd2json/pdfparser
it('Read Data from url',function(){
var request = require('request');
var pdf = require('pfd2json/pdfparser');
var fs = require('fs');
var pdfUrl = "http://www.africau.edu/images/default/sample.pdf";
let databuffer = fs.readFileSync(pdfUrl);
pdf(databuffer).then(function(data){
var arr:Array<String> = data.text;
var n = arr.includes('Thursday 02 May');
console.log("Print Array " + n);
});
});
Failed: ENOENT: no such file or directory, open 'http://www.africau.edu/images/default/sample.pdf'
I am able to access data from local path but not able to extract it from url.
The issue here is that you are using the fs module (File System) to read a file on a distant server.
You also mistyped the pdf2json module, which should give you an error ?
You did require the request module. This module will make it possible to access that distant file. Here's one way to do this :
it('Read Data from url', function () {
var request = require('request');
var PDFParser = require('pdf2json');
var pdfUrl = 'http://unec.edu.az/application/uploads/2014/12/pdf-sample.pdf';
var pdfParser = new PDFParser(this, 1);
// executed if the parser fails for any reason
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError));
// executed when the parser finished
pdfParser.on("pdfParser_dataReady", pdfData => console.log(pdfParser.getRawTextContent()));
// request to get the pdf's file content then call the pdf parser on the retrieved buffer
request({ url: pdfUrl, encoding: null }, (error, response, body) => pdfParser.parseBuffer(body));
});
This will make it possible to load the distant .pdf file in your program.
I'd recommend looking at the pdf2json documentation if you want to do more. This will simply output the textual content of the .pdf file when the parser has completed reading data.

Upload a file stream to S3 without a file and from memory

I'm trying to create a csv from a string and upload it to my S3 bucket. I don't want to write a file. I want it all to be in memory.
I don't want to read from a file to get my stream. I would like to make a stream with out a file. I would like this method createReadStream, but instead of a file, I would like to pass a string with my stream's contents.
var AWS = require('aws-sdk'),
zlib = require('zlib'),
fs = require('fs');
s3Stream = require('s3-upload-stream')(new AWS.S3()),
// Set the client to be used for the upload.
AWS.config.loadFromPath('./config.json');
// Create the streams
var read = fs.createReadStream('/path/to/a/file');
var upload = s3Stream.upload({
"Bucket": "bucket-name",
"Key": "key-name"
});
// Handle errors.
upload.on('error', function (error) {
console.log(error);
});
upload.on('part', function (details) {
console.log(details);
});
upload.on('uploaded', function (details) {
console.log(details);
});
read.pipe(upload);
You can create a ReadableStream and push your string directly to it which, can then be consumed by your s3Stream instance.
const Readable = require('stream').Readable
let data = 'this is your data'
let read = new Readable()
read.push(data) // Push your data string
read.push(null) // Signal that you're done writing
// Create upload s3Stream instance and attach listeners go here
read.pipe(upload)

Convert base64 to png in meteor app

I have a meteor application and in this one I get a base64 image. I want to save the image on a Digital Ocean instance, so I would convert it in a png or an other image format and send it to the server to get an url of the image.
But I didn't find a meteor package that does this.
Do you know how I can do that ?
I was running into a similar issue.
run the following:
meteor npm install --save file-api
This will allow the following code on the server for example:
import FileAPI from 'file-api';
const { File } = FileAPI;
const getFile = function(name,image){
const i = image.indexOf('base64,');
const buffer = Buffer.from(image.slice(i + 7), 'base64');
const file = new File({buffer: buffer, name, type: 'image/jpeg'});
return file;
}
Simply call it with any name of file you prefer, and the base64 string as the image parameter.
I hope this helps. I have tested this and it works on the server. I have not tested it on the client but I don't see why it wouldn't work.
I solved my problem using fs.writeFile from File System.
This is my javascript code on client side, I got a base64 image (img) from a plugin and when I click on my save button, I do this :
$("#saveImage").click(function() {
var img = $image.cropper("getDataURL")
preview.setAttribute('src', img);
insertionImage(img);
});
var insertionImage = function(img){
//some things...
Meteor.call('saveTileImage', img);
//some things...
}
And on the server side, I have :
Meteor.methods({
saveTileImage: function(fileData) {
var fs = Npm.require('fs');
var path = process.env.PWD + '/var/uploads/';
base64Data = fileData.replace(/^data:image\/png;base64,/, "");
base64Data += base64Data.replace('+', ' ');
binaryData = new Buffer(base64Data, 'base64').toString('binary');
var imageName = "tileImg_" + currentTileId + ".png";
fs.writeFile(path + imageName, binaryData, "binary", Meteor.bindEnvironment(function (err) {
if (err) {
throw (new Meteor.Error(500, 'Failed to save file.', err));
} else {
insertionTileImage(imageName);
}
}));
}
});
var insertionTileImage = function(fileName){
tiles.update({_id: currentTileId},{$set:{image: "upload/" + fileName}});
}
So, the meteor methods saveTileImage transform the base64 image into a png file and insertionTileImage upload it to the server.
BlobUrl, would it be a better option for you?
Save the images to a server as you like in base64 or whatever, and then when you are viewing the image on a page, generate the blobUrl of it. The url being used only at that time, preventing others from using your url on various websites and not overloading your image server ...

How to download multiple files from Google Drive as .zip using JSZip on Salesforce

The case:
On Salesforce platform I use Google Drive to store files (images for this case) with configured Apex Google Drive API Framework. So Google Drive API handles authToken and so on. I can upload and browse images in my application. In my case I want to select multiple files and download them in a single zip file. So far I'm trying to do that using JSZip and FileSaver libraries. With the same code below I can zip and download multiple files stored somewhere else with proper response header, but not from GDrive because of CORS error.
https://xxx.salesforce.com/contenthub/download/XXXXXXXXXX%3Afile%XXXXXX_XXXXXXXXX. No'Access-Control-Allow-Origin' header is present on the requested resource. Origin 'https://xxx.visual.force.com' is therefore not allowed access. If I just click on this link, file starts to download.
Is there any way to configure GDrive to enable response header: Access-Control-Allow-Origin: * or Access-Control-Allow-Origin: https://*/mydomain.com somehow or I just have to use something else, maybe server side compression? Now I am using the download link provided by Apex Google Drive API (looks like this:
https://xxx.salesforce.com/contenthub/download/XXXXXXXXXXX%3Afile%XXXXXXXX), it works fine when used as src="fileURL" or when pasted directly to the browser. GDrive connector add 'accesToken' and so on.
My code:
//ajax request to get files using JSZipUtils
let urlToPromise = (url) =>{
return new Promise(function(resolve, reject) {
JSZipUtils.getBinaryContent(url, function (err, data) {
if(err) {
reject(err);
} else {
resolve(data);
}
});
});
};
this.downloadAssets = () => {
let zip = new JSZip();
//here 'selectedAssets' array of objects each of them has 'assetFiles'
//with fileURL where I have url. Download and add them to 'zip' one by one
for (var a of this.selectedAssets){
for (let f of a.assetFiles){
let url = f.fileURL;
let name = a.assetName + "." + f.fileType;
let filename = name.replace(/ /g, "");
zip.file(filename, urlToPromise(url), {binary:true});
}
}
//generate zip and download using 'FileSaver.js'
zip.generateAsync({type:"blob"})
.then(function callback(blob) {
saveAs(blob, "test.zip");
});
};
I also tried to change let url = f.fileURL to let url = f.fileURL + '?alt=media'; and &access_token=CURRENT_TOKEN added by GDrive connector.
this link handled by GRDrive connector so if I just enter it in browser it download the image. However, for multiple download using JS I got CORS error.
I think this feature is not yet supported. If you check the Download Files guide from Drive API, there's no mention of downloading multiple files at once. That's because you have to make individual API requests for each file. This is confirmed in this SO thread.
But that selected multiple files are convert into single zip file and download that single zip file which is possible with google drive API. So how can i convert them into single Zip File? please tell me.
According to me, just download all files and store them at temporary directory location and then add that directory to zip file and store that zip to physical device.
public Entity.Result<Entity.GoogleDrive> DownloadMultipleFile(string[] fileidList)
{
var result = new Entity.Result<Entity.GoogleDrive>();
ZipFile zip = new ZipFile();
try
{
var service = new DriveService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = "Download File",
});
FilesResource.ListRequest listRequest = service.Files.List();
//listRequest.PageSize = 10;
listRequest.Fields = "nextPageToken, files(id, name, mimeType, fullFileExtension)";
IList<File> files = listRequest.Execute().Files;
if (files != null && files.Count > 0)
{
foreach (var fileid in fileidList)
{
foreach (var file in files)
{
if (file.Id == fileid)
{
result.Data = new Entity.GoogleDrive { FileId = fileid };
FilesResource.GetRequest request = service.Files.Get(fileid);
request.ExecuteAsync();
var stream = new System.IO.FileStream(HttpContext.Current.Server.MapPath(#"~\TempFiles") + "\\" + file.Name, System.IO.FileMode.Create, System.IO.FileAccess.Write);
request.MediaDownloader.ProgressChanged += (IDownloadProgress progress) =>
{
switch (progress.Status)
{
case DownloadStatus.Downloading:
{
break;
}
case DownloadStatus.Completed:
{
break;
}
case DownloadStatus.Failed:
{
break;
}
}
};
request.Download(stream);
stream.Close();
break;
}
}
}
}
zip.AddDirectory(HttpContext.Current.Server.MapPath(#"~\TempFiles"), "GoogleDrive");
string pathUser = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
string pathDownload = System.IO.Path.Combine(pathUser, "Downloads");
zip.Save(pathDownload + "\\GoogleDrive.zip");
System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(HttpContext.Current.Server.MapPath(#"~\TempFiles"));
foreach (var file in di.GetFiles())
{
file.Delete();
}
result.IsSucceed = true;
result.Message = "File downloaded suceessfully";
}
catch (Exception ex)
{
result.IsSucceed = false;
result.Message = ex.ToString();
}
return result;
}
My previously published code works. Forgot to post a solution.
Just instead of using content hub link I started to use direct link to Google Drive and CORS issue was solved. Still not sure if CORS might be solved somehow at Salesforce side. Tried different setups with no luck.
Direct download link to GDrive works ok in my case. The only thing I had to change is the prefix to GDrive file ID.

Create image from ArrayBuffer in Nodejs

I'm trying to create an image file from chunks of ArrayBuffers.
all= fs.createWriteStream("out."+imgtype);
for(i=0; i<end; i++){
all.write(picarray[i]);
}
all.end();
where picarray contains ArrayBuffer chunks. However, I get the error TypeError: Invalid non-string/buffer chunk.
How can I convert ArrayBuffer chunks into an image?
Have you tried first converting it into a node.js. Buffer? (this is the native node.js Buffer interface, whereas ArrayBuffer is the interface for the browser and not completely supported for node.js write operations).
Something along the line of this should help:
all= fs.createWriteStream("out."+imgtype);
for(i=0; i<end; i++){
var buffer = new Buffer( new Uint8Array(picarray[i]) );
all.write(buffer);
}
all.end();
after spending some time i got this, it worked for me perfectly.
as mentioned by #Nick you will have to convert buffer array you recieved from browser in to nodejs Buffer.
var readWriteFile = function (req) {
var fs = require('fs');
var data = new Buffer(req);
fs.writeFile('fileName.png', data, 'binary', function (err) {
if (err) {
console.log("There was an error writing the image")
}
else {
console.log("The sheel file was written")
}
});
});
};
Array Buffer is browser supported which will be unsupportable for writing file, we need to convert to Buffer native api of NodeJs runtime engine.
This few lines of code will create image.
const fs = require('fs');
let data = arrayBuffer // you image stored on arrayBuffer variable;
data = Buffer.from(data);
fs.writeFile(`Assets/test.png`, data, err => { // Assets is a folder present in your root directory
if (err) {
console.log(err);
} else {
console.log('File created successfully!');
}
});

Categories

Resources