Downloading a lot of images with NodeJS

Downloading a lot of images with NodeJS - javascript

I got a large list of URLs (8000+) I want to download the images from.
I created a script that will download the files, as long as I'm setting the limit to about a 100.
If I try to download more I'm getting errors like
(node:6740) UnhandledPromiseRejectionWarning: Error: read ECONNRESET
(node:3808) UnhandledPromiseRejectionWarning: Error: connect ETIMEDOUT
(node:7052) UnhandledPromiseRejectionWarning: Error: Client network socket disconnected before secure TLS connection was established
This is the code that reads my CSV with URL's:
const fotoDownload = require('./async-foto');
const csv = require('csv-parser');
const fs = require('fs');
const results = [];
fs.createReadStream('\\\\hk-nas02\\import\\Partij\\Files\\partij.csv')
.pipe(csv( { separator: ';'}))
.on('data', (data) => results.push(data))
.on('end', () => {
console.log(results.length)
let NoPartijen = results.length;
for(i = 0;i < 50; i++){
//console.log(results[i]);
itemno = i
path = '..\\files\\images\\'+results[i]['partij.VPARTIJNR']+'_'+results[i]['partij.PARTIJNR']+'_H.jpg';
console.log(path)
fotoDownload.fotoDownload(results[i]['partij.EXFOTOURL'], path, itemno)
}
console.log('Test');
});
and calls the following code to download:
const util = require('util')
const fs = require('fs')
const axios = require("axios").default;
module.exports = {
fotoDownload: async (url, path, itemno) => {
try {
const response = await axios({
method: "GET",
url: url,
responseType: "stream",})
await response.data.pipe(fs.createWriteStream(path));
console.log('Start foto download' + itemno);
return;
} catch (err) {
throw new Error(err)
}
}
}
I'm assuming I need the chop the data into chunks or something, but I'm a bit lost here. Can someone put me in the right direction?

You need to set a timeout for downloading the images & add connection keep-alive
something like this
axios.defaults.timeout = 30000; //or whateve your desired timeout
axios.defaults.httpsAgent = new https.Agent({ keepAlive: true });
module.exports = {
fotoDownload: async (url, path, itemno) => {
try {
const response = await axios({
method: "GET",
url: url,
responseType: "stream",})
await response.data.pipe(fs.createWriteStream(path));
console.log('Start foto download' + itemno);
return;
} catch (err) {
throw new Error(err)
}
}
}
Note: make sure u have https installed
run npm i http or npm i https

Related

Node app not writing to a file, no error logs

I am trying to use Capture-Website which saves screenshots of webpages to a file.
It used to work perfectly until I restarted the server.
Now the code runs without errors, but it does NOT save a screenshot to disk
Here is my code:
import http from 'http';
import url from 'url';
import querystring from 'querystring';
var mainURL;
const hostname = 'localhost';
const port = 8080;
import captureWebsite from 'capture-website';
const server = http.createServer((req, res) => {
res.statusCode = 200;
res.setHeader('Content-Type', 'text/plain');
res.end('Hello World!\n');
var url_parts = url.parse(req.url, true);
var query = url_parts.query;
mainURL = query.url;
console.log(mainURL);
(async () => {
try {
await captureWebsite.file('https://'+mainURL, mainURL+".jpg", {
overwrite:true,
type: 'jpeg',
quality: 0.5,
width:1480,
height:800,
delay: 1
});
}
catch(err) {
console.log(err);
}
})();
});
There are no errors. I have also tried running pm2 logs - no errors there either.
Here is the file-writing code that belongs to the capture-website package:
captureWebsite.file = async (url, filePath, options = {}) => {
const screenshot = await internalCaptureWebsite(url, options);
await fs.writeFile(filePath, screenshot, {
flag: options.overwrite ? 'w' : 'wx',
});
};

Your problem is most likely that you can not use Slashes (/) in file names. This is because folders are seperated with slashes.

Unexpected file transfer behaviour when building API using Express

Here is how my API works:
You can find SeaweedFS here on GitHub.
And the code here:
// /drivers/seaweedfs.js Defines how API interacts with SeaweedFS
const { error } = require("console");
const http = require("http");
module.exports = class Weed {
constructor(mserver) {
this.mserver = new URL(mserver);
}
get(fileId) {
return new Promise((resolve, reject) => {
let options = {
hostname: this.mserver.hostname,
port: this.mserver.port,
method: "GET",
path: `/${fileId}`,
timeout: 6000,
};
let data;
const fileReq = http.request(options, (res) => {
console.log(`Statuscode ${res.statusCode}`);
res.on("data", (response) => {
data += response;
});
res.on("end", () => {
resolve(data);
});
});
fileReq.on("error", () => {
console.error(error);
reject();
});
fileReq.end();
});
}
};
// /routes/file.js An Express router
const express = require("express");
const router = express.Router();
const Weed = require("../drivers/seaweedfs");
let weedClient = new Weed("http://localhost:60002");
router.get("/:fileId", (req, res) => {
weedClient.get(req.params.fileId)
.then(data=>{
res.write(data)
res.end()
})
}
)
module.exports = router;
MongoDB driver not yet implemented.
When I try to GET a file(using Firefox, Hoppscotch says Could not send request: Unable to reach the API endpoint. Check your network connection and try again.), I get something whose MIME type is application/octet-stream for some reason. It's bigger than the original file. I know there must be some problems with my code, but I don't know where and how to fix it.

Piping requests using gaxios (or axios)

At present I'm performing the trick of piping a request req to a destination url, and piping the response back to res, like so:
const request = require('request');
const url = 'http://some.url.com' + req.originalUrl;
const destination = request(url);
// pipe req to destination...
const readableA = req.pipe(destination);
readableA.on('end', function () {
// do optional stuff on end
});
// pipe response to res...
const readableB = readableA.pipe(res);
readableB.on('end', function () {
// do optional stuff on end
});
Since request is now officially deprecated (boo hoo), is this trick at all possible using the gaxios library? I thought that setting responseType: 'stream' on the request would do something similar as above, but it doesn't seem to work.
SImilarly, can gaxios be used in the following context:
request
.get('https://some.data.com')
.on('error', function(err) {
console.log(err);
})
.pipe(unzipper.Parse())
.on('entry', myEntryHandlerFunction);

Install gaxios:
npm install gaxios
And then use request with the Readable type specified and with responseType set to 'stream'.
// script.ts
import { request } from 'gaxios';
(await(request<Readable>({
url: 'https://some.data.com',
responseType: 'stream'
}))
.data)
.on('error', function(err) {
console.log(err);
})
.pipe(unzipper.Parse())
.on('entry', myEntryHandlerFunction);
// first-example.ts
import { request } from 'gaxios';
// pipe req to destination...
const readableA = (await request<Readable>({
url: 'http://some.url.com',
method: 'POST',
data: req, // suppose `req` is a readable stream
responseType: 'stream'
})).data;
readableA.on('end', function () {
// do optional stuff on end
});
// pipe response to res...
const readableB = readableA.pipe(res);
readableB.on('end', function () {
// do optional stuff on end
});
Gaxios is a stable tool and is used in official Google API client libraries. It's based on the stable node-fetch. And it goes with TypeScript definitions out of the box. I switched to it from the deprecated request and from the plain node-fetch library.

I guess if you provide responseType as stream and use res.data, you will get a stream which you could pipe like this
const {request} = require("gaxios");
const fs = require("fs");
const {createGzip} = require("zlib");
const gzip = createGzip();
(async () => {
const res = await request({
"url": "https://www.googleapis.com/discovery/v1/apis/",
"responseType": "stream"
});
const fileStream = fs.createWriteStream("./input.json.gz");
res.data.pipe(gzip).pipe(fileStream);
})();

Looks like you are trying to basically forward requests from your express server to the clients. This worked for me.
import { request } from "gaxios";
const express = require("express");
const app = express();
const port = 3000;
app.get("/", async (req: any, res: any) => {
const readableA = (await request({
url: "https://google.com",
responseType: "stream",
})) as any;
console.log(req, readableA.data);
const readableB = await readableA.data.pipe(res);
console.log(res, readableB);
});
app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`);
});
I imagine more complicated responses from A will require more nuiance in how to pipe it. But then you can probably just interact with express's response object directly and set the appropriate fields.

A lot of HTTP requests and save as json: connect ETIMEDOUT

I have a problem with my code. I need to capture a lot of data by API. First, i had a problem with memory and now I see: connect ETIMEDOUT error
My code:
var fetch = require('node-fetch');
const async = require('async');
const request = require('request');
var jsonfile = require('jsonfile');
var file = '/temp/data.json'
let urls = [];
for (var i = 1; i <= 608469; i++) {
let url = "https://api.demo.com/v2.1/people?user_key=auth_key&page="+i+"&sort_order=created_at%20DESC";
urls.push(url);
}
function httpGet(url, callback) {
const options = {
url : url,
json : true
};
request(options,
function(err, res, body) {
callback(err, body);
}
);
}
async.map(urls, httpGet, function (err, res){
if (err) return console.log(err);
console.log(typeof(res));
jsonfile.writeFileSync(file, res);
});

Just replace async's map method with eachLimit. Key here is limit. It still will run http request for each link, but in no more than limit paraller threads. Here is a doc: https://caolan.github.io/async/docs.html#eachLimit

How to download audio file from URL in Node.js?

How to download audio file from URL and store it in local directory?
I'm using Node.js and I tried the following code:
var http = require('http');
var fs = require('fs');
var dest = 'C./test'
var url= 'http://static1.grsites.com/archive/sounds/comic/comic002.wav'
function download(url, dest, callback) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close(callback); // close() is async, call callback after close completes.
});
file.on('error', function (err) {
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (callback)
callback(err.message);
});
});
}
No error occured but the file has not been found.

Duplicate of How to download a file with Node.js (without using third-party libraries)?, but here is the code specific to your question:
var http = require('http');
var fs = require('fs');
var file = fs.createWriteStream("file.wav");
var request = http.get("http://static1.grsites.com/archive/sounds/comic/comic002.wav", function(response) {
response.pipe(file);
});

Your code is actually fine, you just don't call the download function. Try adding this to the end :
download(url, dest, function(err){
if(err){
console.error(err);
}else{
console.log("Download complete");
}
});
Also, change the value of dest to something else, like just "test.wav" or something. 'C./test' is a bad path.
I tried it on my machine and your code works fine just adding the call and changing dest.

Here is an example using Axios with an API that may require authorization
const Fs = require('fs');
const Path = require('path');
const Axios = require('axios');
async function download(url) {
let filename = "filename";
const username = "user";
const password = "password"
const key = Buffer.from(username + ':' + password).toString("base64");
const path = Path.resolve(__dirname, "audio", filename)
const response = await Axios({
method: 'GET',
url: url,
responseType: 'stream',
headers: { 'Authorization': 'Basic ' + key }
})
response.data.pipe(Fs.createWriteStream(path))
return new Promise((resolve, reject) => {
response.data.on('end', () => {
resolve();
})
response.data.on('error', () => {
reject(err);
})
})
}

Develop Reference

JavaScript is the programming language of the Web.

Downloading a lot of images with NodeJS - javascript

Related

Node app not writing to a file, no error logs

Unexpected file transfer behaviour when building API using Express

Piping requests using gaxios (or axios)

A lot of HTTP requests and save as json: connect ETIMEDOUT

How to download audio file from URL in Node.js?

Categories

Resources