Download and upload image without saving to disk - javascript

Using Node.js, I am trying to get an image from a URL and upload that image to another service without saving image to disk. I have the following code that works when saving the file to disk and using fs to create a readablestream. But as I am doing this as a cron job on a read-only file system (webtask.io) I'd want to achieve the same result without saving the file to disk temporarily. Shouldn't that be possible?
request(image.Url)
.pipe(
fs
.createWriteStream(image.Id)
.on('finish', () => {
client.assets
.upload('image', fs.createReadStream(image.Id))
.then(imageAsset => {
resolve(imageAsset)
})
})
)
Do you have any suggestions of how to achieve this without saving the file to disk? The upload client will take the following
client.asset.upload(type: 'file' | image', body: File | Blob | Buffer | NodeStream, options = {}): Promise<AssetDocument>
Thanks!

How about passing the buffer down to the upload function? Since as per your statement it'll accept a buffer.
As a side note... This will keep it in memory for the duration of the method execution, so if you call this numerous times you might run out of resources.
request.get(url, function (res) {
var data = [];
res.on('data', function(chunk) {
data.push(chunk);
}).on('end', function() {
var buffer = Buffer.concat(data);
// Pass the buffer
client.asset.upload(type: 'buffer', body: buffer);
});
});

I tried some various libraries and it turns out that node-fetch provides a way to return a buffer. So this code works:
fetch(image.Url)
.then(res => res.buffer())
.then(buffer => client.assets
.upload('image', buffer, {filename: image.Id}))
.then(imageAsset => {
resolve(imageAsset)
})

well I know it has been a few years since the question was originally asked, but I have encountered this problem now, and since I didn't find an answer with a comprehensive example I made one myself.
i'm assuming that the file path is a valid URL and that the end of it is the file name, I need to pass an apikey to this API endpoint, and a successful upload sends me back a response with a token.
I'm using node-fetch and form-data as dependencies.
const fetch = require('node-fetch');
const FormData = require('form-data');
const secretKey = 'secretKey';
const downloadAndUploadFile = async (filePath) => {
const fileName = new URL(filePath).pathname.split("/").pop();
const endpoint = `the-upload-endpoint-url`;
const formData = new FormData();
let jsonResponse = null;
try {
const download = await fetch(filePath);
const buffer = await download.buffer();
if (!buffer) {
console.log('file not found', filePath);
return null;
}
formData.append('file', buffer, fileName);
const response = await fetch(endpoint, {
method: 'POST', body: formData, headers: {
...formData.getHeaders(),
"Authorization": `Bearer ${secretKey}`,
},
});
jsonResponse = await response.json();
} catch (error) {
console.log('error on file upload', error);
}
return jsonResponse ? jsonResponse.token : null;
}

Related

Downloading Sqlite backup file from google drive ends up with a corrupt file nodejs + Vue

I am working on a backup system with Google Drive v3 REST API. I upload a sqlite backup file to a folder on Google Drive. That works. I am able to upload and list the files in the folder.
So now I am working on restoring that file in my application. But when I download the file and write the response stream I end up with a corrupted db file. When I open it up in notepad ++ I see that any special characters such as ^ etc are all squares and the file is like 2 mb bigger than what was uploaded.
I have been at this for two days now and cannot figure out why the file is not writing 1:1 copy of what is downloaded
this is my current code for the download ..
async restoreGoogleDriveDatabase(fileId){
const url = encodeURI(`https://www.googleapis.com/drive/v3/files/${fileId}?alt=media`);
const options = {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + this.getToken()
},
}
await fetch(url, options).then(async response => response.text())
.then(arrayBuffer => {
const path = require('path').join(window.process.env.ProgramData, 'Home Inventory');
fs.writeFile(`${path}\\HomeInventory1.db`, arrayBuffer, function () {
console.warn('file created');
});
});
},
Looking for any kind of help that would get me back on the right track.
The issue seems to be with character encoding
The simplest fix is to fetch the result as an arrayBuffer, and write out the arrayBuffer using "binary" encoding
Like so
const fs = require('node:fs/promises');
async restoreGoogleDriveDatabase(fileId) {
const url = encodeURI(`https://www.googleapis.com/drive/v3/files/${fileId}?alt=media`);
const options = {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + this.getToken()
},
}
try {
const response = await fetch(url, options);
const arrayBuffer = await response.arrayBuffer();
const path = require('path').join(window.process.env.ProgramData, 'Home Inventory');
await fs.writeFile(`${path}/HomeInventory1.db`, Buffer.from(arrayBuffer, "binary"));
console.warn('file created');
} catch (err) {
console.error(err);
}
},
The code above uses async/await correctly, and also uses fs/promises instead of fs, since already using async/await it makes sense to use the Promise version of fs methods
The important changes are
response.arrayBuffer()
To get the response as an arrayBuffer
and in fs.writeFile use
Buffer.from(arrayBuffer, "binary")
To write the data "as-is" to the file

multipart/form-data not being automatically set with axios in React Native

When attempting to upload a file to Amazon S3 using axios, I have been encountering a very strange issue. Normally, in a web browser, when FormData has binary data in it, the Content-Type header automatically gets set to multipart/form-data; boundary=<some random string>. However, I have been completely unable to achieve that in React Native (testing on an iOS device). The Content-Type is automatically set to application/json, and thus not being detected as a correctly formatted body when uploading to Amazon S3. I have tried specifying a blob in the file parameter in FormData instead of the URI to the file as well to no avail. I have appended my code below, any advice would be very much appreciated.
const uploadFileToS3 = (
presignedPostData,
file) => {
// create a form obj
const formData = new FormData();
// append the fields in presignedPostData in formData
Object.keys(presignedPostData.fields).forEach(
key => {
formData.append(
key,
presignedPostData.fields[key],
);
},
);
// append the file and uplaod
const getBlob = async () => {
const img_url = previewPath;
let result = await fetch(img_url);
const blob = await result.blob();
formData.append('Content-Type', 'image/jpeg');
formData.append('file', {
uri: previewPath,
type: 'image/jpeg',
name: 'test.jpeg',
});
console.log(formData, 'wild');
// post the data on the s3 url
axios
.post(presignedPostData.url, formData)
.then(function (response) {
console.log(response);
})
.catch(function (error) {
console.log(error.response);
});
};
getBlob();
};

Node.js save audio file from mediaserver [duplicate]

i want download a pdf file with axios and save on disk (server side) with fs.writeFile, i have tried:
axios.get('https://xxx/my.pdf', {responseType: 'blob'}).then(response => {
fs.writeFile('/temp/my.pdf', response.data, (err) => {
if (err) throw err;
console.log('The file has been saved!');
});
});
the file is saved but the content is broken...
how do I correctly save the file?
Actually, I believe the previously accepted answer has some flaws, as it will not handle the writestream properly, so if you call "then()" after Axios has given you the response, you will end up having a partially downloaded file.
This is a more appropriate solution when downloading slightly larger files:
export async function downloadFile(fileUrl: string, outputLocationPath: string) {
const writer = createWriteStream(outputLocationPath);
return Axios({
method: 'get',
url: fileUrl,
responseType: 'stream',
}).then(response => {
//ensure that the user can call `then()` only when the file has
//been downloaded entirely.
return new Promise((resolve, reject) => {
response.data.pipe(writer);
let error = null;
writer.on('error', err => {
error = err;
writer.close();
reject(err);
});
writer.on('close', () => {
if (!error) {
resolve(true);
}
//no need to call the reject here, as it will have been called in the
//'error' stream;
});
});
});
}
This way, you can call downloadFile(), call then() on the returned promise, and making sure that the downloaded file will have completed processing.
Or, if you use a more modern version of NodeJS, you can try this instead:
import * as stream from 'stream';
import { promisify } from 'util';
const finished = promisify(stream.finished);
export async function downloadFile(fileUrl: string, outputLocationPath: string): Promise<any> {
const writer = createWriteStream(outputLocationPath);
return Axios({
method: 'get',
url: fileUrl,
responseType: 'stream',
}).then(response => {
response.data.pipe(writer);
return finished(writer); //this is a Promise
});
}
You can simply use response.data.pipe and fs.createWriteStream to pipe response to file
axios({
method: "get",
url: "https://xxx/my.pdf",
responseType: "stream"
}).then(function (response) {
response.data.pipe(fs.createWriteStream("/temp/my.pdf"));
});
The problem with broken file is because of backpressuring in node streams. You may find this link useful to read: https://nodejs.org/es/docs/guides/backpressuring-in-streams/
I'm not really a fan of using Promise base declarative objects in JS codes as I feel it pollutes the actual core logic & makes the code hard to read. On top of it, you have to provision event handlers & listeners to make sure the code is completed.
A more cleaner approach on the same logic which the accepted answer proposes is given below. It uses the concepts of stream pipelines.
const util = require('util');
const stream = require('stream');
const pipeline = util.promisify(stream.pipeline);
const downloadFile = async () => {
try {
const request = await axios.get('https://xxx/my.pdf', {
responseType: 'stream',
});
await pipeline(request.data, fs.createWriteStream('/temp/my.pdf'));
console.log('download pdf pipeline successful');
} catch (error) {
console.error('download pdf pipeline failed', error);
}
}
exports.downloadFile = downloadFile
I hope you find this useful.
// This works perfectly well!
const axios = require('axios');
axios.get('http://www.sclance.com/pngs/png-file-download/png_file_download_1057991.png', {responseType: "stream"} )
.then(response => {
// Saving file to working directory
response.data.pipe(fs.createWriteStream("todays_picture.png"));
})
.catch(error => {
console.log(error);
});
The following code taken from https://gist.github.com/senthilmpro/072f5e69bdef4baffc8442c7e696f4eb?permalink_comment_id=3620639#gistcomment-3620639 worked for me
const res = await axios.get(url, { responseType: 'arraybuffer' });
fs.writeFileSync(downloadDestination, res.data);
node fileSystem writeFile encodes data by default to UTF8. which could be a problem in your case.
Try setting your encoding to null and skip encoding the received data:
fs.writeFile('/temp/my.pdf', response.data, {encoding: null}, (err) => {...}
you can also decalre encoding as a string (instead of options object) if you only declare encoding and no other options. string will be handled as encoding value. as such:
fs.writeFile('/temp/my.pdf', response.data, 'null', (err) => {...}
more read in fileSystem API write_file
I have tried, and I'm sure that using response.data.pipe and fs.createWriteStream can work.
Besides, I want to add my situation and solution
Situation:
using koa to develop a node.js server
using axios to get a pdf via url
using pdf-parse to parse the pdf
extract some information of pdf and return it as json to browser
Solution:
const Koa = require('koa');
const app = new Koa();
const axios = require('axios')
const fs = require("fs")
const pdf = require('pdf-parse');
const utils = require('./utils')
app.listen(process.env.PORT || 3000)
app.use(async (ctx, next) => {
let url = 'https://path/name.pdf'
let resp = await axios({
url: encodeURI(url),
responseType: 'arraybuffer'
})
let data = await pdf(resp.data)
ctx.body = {
phone: utils.getPhone(data.text),
email: utils.getEmail(data.text),
}
})
In this solution, it doesn't need to write file and read file, it's more efficient.
This is what worked for me and it also creates a temporary file for the image file in case the output file path is not specified:
const fs = require('fs')
const axios = require('axios').default
const tmp = require('tmp');
const downloadFile = async (fileUrl, outputLocationPath) => {
if(!outputLocationPath) {
outputLocationPath = tmp.fileSync({ mode: 0o644, prefix: 'kuzzle-listener-', postfix: '.jpg' });
}
let path = typeof outputLocationPath === 'object' ? outputLocationPath.name : outputLocationPath
const writer = fs.createWriteStream(path)
const response = await axios.get(fileUrl, { responseType: 'arraybuffer' })
return new Promise((resolve, reject) => {
if(response.data instanceof Buffer) {
writer.write(response.data)
resolve(outputLocationPath.name)
} else {
response.data.pipe(writer)
let error = null
writer.on('error', err => {
error = err
writer.close()
reject(err)
})
writer.on('close', () => {
if (!error) {
resolve(outputLocationPath.name)
}
})
}
})
}
Here is a very simple Jest test:
it('when downloadFile should downloaded', () => {
downloadFile('https://i.ytimg.com/vi/HhpbzPMCKDc/hq720.jpg').then((file) => {
console.log('file', file)
expect(file).toBeTruthy()
expect(file.length).toBeGreaterThan(10)
})
})
Lorenzo's response is probably the best answer as it's using the axios built-in.
Here's a simple way to do it if you just want the buffer:
const downloadFile = url => axios({ url, responseType: 'stream' })
.then(({ data }) => {
const buff = []
data.on('data', chunk => buff.push(chunk))
return new Promise((resolve, reject) => {
data.on('error', reject)
data.on('close', () => resolve(Buffer.concat(buff)))
})
})
// better
const downloadFile = url => axios({ url, responseType: 'arraybuffer' }).then(res => res.data)
const res = await downloadFile(url)
fs.writeFileSync(downloadDestination, res)
I'd still probably use the 'arraybuffer' responseType
There is a much simpler way that can be accomplished in a couple of lines:
const fileResponse = await axios({
url: fileUrl,
method: "GET",
responseType: "stream",
});
// Write file to disk (here I use fs.promise but you can use writeFileSync it's equal
await fsPromises.writeFile(filePath, fileResponse.data);
Axios has internal capacity of handling streams and you don't need to necessarily meddle with low-level Node APIs for that.
Check out https://axios-http.com/docs/req_config (find the responseTypepart in the docs for all the types you can use).
If you just want the file use this
const media_data =await axios({url: url, method: "get", responseType: "arraybuffer"})
writeFile("./image.jpg", Buffer.from(media_data.data), {encoding: "binary"}, console.log)
import download from "downloadjs";
export const downloadFile = async (fileName) => {
axios({
method: "get",
url: `/api/v1/users/resume/${fileName}`,
responseType: "blob",
}).then(function (response) {
download(response.data, fileName);
});
};
it's work fine to me
This is my example code run with node js
There is a synctax error
should be writeFile not WriteFile
const axios = require('axios');
const fs = require('fs');
axios.get('http://www.africau.edu/images/default/sample.pdf', {responseType: 'blob'}).then(response => {
fs.writeFile('./my.pdf', response.data, (err) => {
if (err) throw err;
console.log('The file has been saved!');
});
});
After the file is saved it might look like in a text editor, but the file was saved properly
%PDF-1.3
%����
1 0 obj
<<
/Type /Catalog
/Outlines 2 0 R
/Pages 3 0 R
>>
endobj
2 0 obj
<<
/Type /Outlines
/Count 0
>>
endobj
3 0 obj
<<
/Type /Pages
/Count 2
/Kids [ 4 0 R 6 0 R ]
>>
endobj

how to embed an image in a JSON response

I'm using Jimp to read in a JSON string that looks like this:
As you can see the image node is a base64-encoded JPEG.
I'm able to succesfully convert it to a TIFF and save it:
Jimp.read(Buffer.from(inputImage, "base64"), function(err, image) {
image.getBuffer(Jimp.MIME_TIFF, function(error, tiff) {
context.bindings.outputBlob = tiff
...}
However, when I attempted to embed the tiff inside of a JSON object, the TIFF gets all garbled up:
const response = {
image: tiff.toString('base64'),
correlation: correlation
};
context.bindings.outputBlob = response;
Here's the full code:
const Jimp = require("jimp");
module.exports = function(context, myBlob) {
const correlation = context.bindings.inputBlob.correlation;
const inputImage = context.bindings.inputBlob.image;
const imageName = context.bindings.inputBlob.imageName;
context.log(
correlation + "Attempting to convert this image to a tiff: " + imageName
);
Jimp.read(Buffer.from(inputImage, "base64"), function(err, image) {
image.getBuffer(Jimp.MIME_TIFF, function(error, tiff) {
const response = {
image: tiff.toString('base64'),
correlation: correlation
};
context.bindings.outputBlob = response;
context.log(
correlation + "Succesfully converted " + imageName + " to tiff."
);
context.done();
});
});
};
How do we embed the tiff inside of a JSON payload?
If this output is non-negotiable, how would I render the tiff from the saved payload?
Well since you confirmed you are looking for output with context.res here is my working sample.. note that there is a maximum response size, so you can't return every image/file the way I am returning the image here
const Jimp = require('jimp')
module.exports = async function (context, req)
{
let response = {}
try
{
let url = 'https://noahwriting.com/wp-content/uploads/2018/06/APPLE-300x286.jpg'
//call function to download and resize image
response = await resizeImage(url)
}
catch (err)
{
response.type = 'application/json'
if (err.response == undefined)
{
context.log(err)
response.status = 500
response.data = err
}
else
{
response.data = err.response.data
response.status = err.response.status
context.log(response)
}
}
//response
context.res =
{
headers: { 'Content-Type': `${response.type}` },
body: response.buf
}
}
async function resizeImage(url)
{
//read image to buffer
let image = await Jimp.read(url)
//resize image
image.resize(300, Jimp.AUTO)
//save to buffer
let image_buf = await image.getBufferAsync(image.getMIME())
//image.getMIME() returns something like `image/jpeg` which is a valid Content-Type for responses.
return { 'buf': image_buf, 'type': image.getMIME() }
}
(Offtopic but I saw that you are using blob storage so..) if you plan on storing photos/files/anything in Azure Blob Storage and you want to retrieve them in some systematic way you will find out very fast that you can't query the storage and you have to deal with ugly XML. My work around to avoid this way to create a function that stores photos/files in Blob Storage but then saves the url path to the file along with the file name and any other attributes to a mongo storage. So then I can make super fast queries to retrieve an array of links, which point to the respective files.

Getting the pdf blob from url and insert to drive directly using puppeteer library and fetch

I´m trying to use puppeteer to log in a website and "download" a pdf directly to my drive. I've managed to reach the pdf page with puppeteer and I tried (between other tries) to get the blob using fetch with the cookies to send to drive. I can´t post the login information here, but if you could help me looking for an error (or more) in the code it would be great! For now, it goes to the page before pdf, gets the link, fetch with cookies and insert a pdf in drive, but the pdf is corrupted with 0 kb.
I tried setRequestInterception, getPdf (from puppeteer) and using buffer with some stuff I found on my research.
//Page before pdfPage. Here I got the link: urlPdf
//await page.goto(urlPdf);
//await page.waitForNavigation();
//const htmlPdf = await page.content();
const cookies = await page.cookies()
const opts = {
headers: {
cookie: cookies
}
};
let blob = await fetch(urlPdf,opts).then(r => r.blob());
console.log("pegou o blob")
// upload file in specific folder
var file ;
console.log("driveApi upload reached")
function blobToFile(req){
file = req.body.blob
//A Blob() is almost a File() - it's just missing the two properties below which we will add
file.lastModifiedDate = new Date();
file.name = teste.pdf;//req.body.word;
return file;
}
var folderId = myFolderId;
var fileMetadata = {
'name': 'teste.pdf',
parents: [folderId]
};
var media = {
mimeType: 'application/pdf',
body: file
};
drive.files.create({
auth: jwToken,
resource: fileMetadata,
media: media,
fields: 'id'
}, function(err, file) {
if (err) {
// Handle error
console.error(err);
} else {
console.log('File Id: ', file.data.id);
}
});
I tried many things, but the final solution I came with is posted here:
Puppeteer - How can I get the current page (application/pdf) as a buffer or file?
await page.setRequestInterception(true);
page.on('request', async request => {
if (request.url().indexOf('exibirFat.do')>0) { //This condition is true only in pdf page (in my case of course)
const options = {
encoding: null,
method: request._method,
uri: request._url,
body: request._postData,
headers: request._headers
}
/* add the cookies */
const cookies = await page.cookies();
options.headers.Cookie = cookies.map(ck => ck.name + '=' + ck.value).join(';');
/* resend the request */
const response = await request_client(options);
//console.log(response); // PDF Buffer
buffer = response;
let filename = 'file.pdf';
fs.writeFileSync(filename, buffer); //Save file
} else {
request.continue();
}
});
This solution needs: const request_client = require('request-promise-native');

Categories

Resources