Can't download multiple images from array using axios - javascript

const Fs = require('fs')
const Path = require('path')
const Axios = require('axios')
var dir = './tmp';
async function downloadImage () {
if (!Fs.existsSync(dir)){
Fs.mkdirSync(dir);
}
var arr = ['https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/01-copy.jpg','https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/02-copy.jpg','https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/03-copy.jpg']
for(var i=0;i<arr.length;i++){
var url = arr[i]
var name = i + '.jpg'
var path = Path.resolve(__dirname,dir, name)
var writer = Fs.createWriteStream(path)
var response = await Axios({
url,
method: 'GET',
responseType: 'stream'
})
response.data.pipe(writer)
return new Promise((resolve, reject) => {
writer.on('finish', resolve)
writer.on('error', reject)
})
}
}
downloadImage()
This is the above code I am using to download images when I tried downloading multiple images whose links are in array it only downloads the image of first array i.e. arr[0] and can't figure out what's the problem it doesn't give any error to and i can individually download single images but not bulky.random manga generator

So, I played around with this code snippet and I was able to fix by removing the promise callback at the bottom of your for loop. You were right, it would only complete one GET request, and then terminate. It now runs through all three, and saves it in your ./tmp directory.const
Fs = require("fs");
const Path = require("path");
const Axios = require("axios");
var dir = "./tmp";
async function downloadImage() {
if (!Fs.existsSync(dir)) {
Fs.mkdirSync(dir);
}
var arr = [
"https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/01-copy.jpg",
"https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/02-copy.jpg",
"https://reaperscans.com/wp-content/uploads/WP-manga/data/manga_6295b8da2aa90/5461fc34b58cd174c806625056c6e0dc/03-copy.jpg"
];
for (var i = 0; i < arr.length; i++) {
var url = arr[i];
// for debugging
console.log(arr[i])
var name = i + ".jpg";
var path = Path.resolve(__dirname, dir, name);
var writer = Fs.createWriteStream(path);
var response = await Axios({
url,
method: "GET",
responseType: "stream"
});
// for debugging
console.log(response)
}
}
downloadImage();

Related

iteration over createreadstream in node js using webdav downloads empty files

im trying to download some files from a folder on nextcloud,using webdav.
i want to iterate over folder and download all the files in it,here is my code:
let dir = "/foo"
let folder = await WebDav.getDirectoryContents("bar")
folder is returned as an array
for (let i = 0; i < folder.length; i++) {
await WebDav.createReadStream(folder[0].filename).pipe(
fs.createWriteStream(`${dir}/${folder[0].basename}`)
);
}
});
the files are created with correct names,but they have no content in them and their size are zero KBs
but when i put pipe outside of the for loop, it works fine and downloads the file.
Use this code , it worked for me
let fs = require('fs');
let { createClient } = require("webdav");
let start = async () => {
let client = createClient(
"https://*/on/demandware.servlet/webdav/Sites/Logs",
{
username: "*",
password: "*"
}
);
let directories = await client
.getDirectoryContents("/");
let files = await directories;
//console.log(files);
for(let i =0; i< files.length ; i++) {
let fileName = files[i].filename;
if(fileName.includes('.log')){
let readStream = client.createReadStream(fileName);
// wait for 5 seconds, then pipe
setTimeout(function() {
let writeStream = readStream.pipe(fs.createWriteStream("C:/Users/*"+ fileName));
writeStream.on('finish', function() {
console.log('all done!');
});
writeStream.on('error', function(err) {
console.error(err);
});
}, 5000);
}
}
}
start();

Nodejs Scraper isn't moving to next page(s)

Hey guys this is a follow on from my other question, i have created a Nodejs Scraper that doesnt seem to want to go through the pages, it stays on the first. my source code is below
const rp = require('request-promise');
const request = require('request');
const otcsv = require('objects-to-csv');
const cheerio = require('cheerio');
//URL To scrape
const baseURL = 'xxx';
const searchURL = 'xxxx';
//scrape info
const getCompanies = async () => {
// Pagination test
for (let index = 1; index <= 20; index = index + 1) {
const html = await rp.get(baseURL + searchURL + index);
const $ = await cheerio.load(html);
console.log("Loading Pages....");
console.log("At page number " + index);
// end pagination test
//const htmls = await rp(baseURL + searchURL);
const businessMap = cheerio('a.business-name', html).map(async (i, e) => {
const link = baseURL + e.attribs.href;
const innerHtml = await rp(link);
const emailAddress = cheerio('a.email-business', innerHtml).prop('href');
const name = e.children[0].data || cheerio('h1', innerHtml).text();
const phone = cheerio('p.phone', innerHtml).text();
return {
// link,
name,
emailAddress: emailAddress ? emailAddress.replace('mailto:', '') : '',
phone,
}
}).get();
return Promise.all(businessMap);
}
};
console.log("Finished Scraping.... Now Saving!")
//save to CSV
getCompanies()
.then(result => {
const transformed = new otcsv(result);
return transformed.toDisk('./output.csv');
})
.then(() => console.log('Scrape Complete :D '));
As you can see I have tried a few different ways to make this happen so any help will be gratefully appreciated.

Asynchronous request for web crawler

I have an array of URLs from each of which I want to crawl an html table and save it in another array in the same order as the original array.
Due to the asynchronous nature of node I assume it's not working as I expect, the results are in a different order every time.
I googled a lot and tried different things like using a custom async-forEach-function or request-promise instead of request, but nothing worked.
const request = require('request');
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require('fs');
let verbs = [];
let conjugations = [];
fs.readFileSync('verbs.txt', 'utf-8').split(/\r?\n/).forEach
(function(line){
verbs.push(line);
});
verbs.forEach((verb) => {
const URI = encodeURI("https://ru.wiktionary.org/wiki/" + verb);
var options = {
uri: URI,
transform: function (body) {
return cheerio.load(body);
}
};
rp(options).then(function ($) {
let table = $('span#Русский.mw-headline').parent().nextAll('table').first();
conjugations.push(table.text());
console.log(conjugations[0]);
})
.catch(function (err) {
});
})
Use Promise.all if the order is important.
The Promise.all() method returns a single Promise that resolves when all of the promises passed as an iterable have resolved or when the iterable contains no promises. It rejects with the reason of the first promise that rejects.
Example of keeping things in order:
const verbs = ["hello", "world", "example"];
let timeout = 2000;
const promises = verbs.map(verb=>{
timeout -= 500;
return new Promise((resolve,reject)=>{
setTimeout(function(){
resolve(verb);
}, timeout);
});
});
Promise.all(promises).then(dataArray=>console.log(dataArray));
Solution with your code.
const promises = verbs.map((verb) => {
const URI = encodeURI("https://ru.wiktionary.org/wiki/" + verb);
var options = {
uri: URI,
transform: function(body) {
return cheerio.load(body);
}
};
return rp(options);
})
Promise.all(promises).then(dataArray=>{
dataArray.forEach(function($) {
let table = $('span#Русский.mw-headline').parent().nextAll('table').first();
conjugations.push(table.text());
console.log(conjugations[0]);
})
}).catch(function(err) {});
Downside, if one request fails they all fail.
Alternatively, you could do something like this by using the index of each verb (Using Promise.all to determine when everything is done but that step can be ignored...)
const verbs = ["hello", "world", "example"];
const conjugations = [];
let timeout = 2000;
const promises = verbs.map((verb, index)=>{
return new Promise((resolve, reject)=>{
setTimeout(function(){
conjugations[index] = verb;
resolve();
}, timeout);
timeout -= 500;
});
});
Promise.all(promises).then(()=>console.log(conjugations));
Example with your code.
const request = require('request');
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require('fs');
let verbs = [];
let conjugations = [];
fs.readFileSync('verbs.txt', 'utf-8').split(/\r?\n/).forEach(function(line) {
verbs.push(line);
});
verbs.forEach((verb, index) => {
const URI = encodeURI("https://ru.wiktionary.org/wiki/" + verb);
var options = {
uri: URI,
transform: function(body) {
return cheerio.load(body);
}
};
rp(options).then(function($) {
let table = $('span#Русский.mw-headline').parent().nextAll('table').first();
conjugations[index] = table.text();
console.log(conjugations[index]);
})
.catch(function(err) {});

module.exports is returning undefined

I am currently new to Node JS, and today I was trying to read data from a file data.json.
Here is the JSON file:
{"username":"rahul_v7","password":"9673"} {"username":"7vik","password":"3248"} {"username":"pradypot_2","password":"6824"} {"username":"ad_1","password":"9284"} {"username":"premchand_4","password":"4346"}
And, I was using the below code present in a file GetData.js, to read the data present in the data.json:
'use strict';
const fs = require('fs');
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
module.exports.objects = resObjs;
}
});
As you can see, I am exporting the resObjs array, which is actually an array of objects, to an another file named AppendData.js, which is given below:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData');
console.log(getObjs.objects);
But, when I run AppendData.js in Node.js 9.3.0 (ia32), it gives the following output:
You're trying to use the objects before they've been read. Remember that your code reading the stream runs asynchronously, and nothing in your code attempts to coordinate it with module loading. So AppendData.js isn't seeing the objects export because it doesn't exist yet as of when that code runs.
Instead, return a promise of the objects that AppendData.js can consume; see *** comments:
'use strict';
const fs = require('fs');
// *** Export the promise
module.exports.objectsPromise = new Promise((resolve, reject) => {
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
resolve(resObjs); // *** Resolve the promise
}
}).on('error', error => {
reject(error); // *** Reject the promise
});
});
Note I added a handler for errors.
And then:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData');
getObjs.objectsPromise
.then(console.log)
.catch(error => {
// Do something
});
Again note the error handler.
The problem happens because you're trying to use the objects in AppendData.js before they are loaded on GetData.js due to fs.createReadStream being asynchronous. To fix this just make module.exports be a function that expect a callback in GetData.js like:
'use strict';
const fs = require('fs');
module.exports = function(callback) {
let res = '', resObjs = [];
let fin = fs.createReadStream('F:/RahulVerma/NodeJS/data.json', 'utf-8');
fin.on('data', data => {
if(data.length > 0) res += data;
}).on('end', () => {
if(res.length > 0) {
let resArr = res.trim().split(' ');
for(let i = 0; i < resArr.length; i++) {
resObjs.push(JSON.parse(resArr[i]));
}
callback(resObjs); // call the callback with the array of results
}
});
}
Which you can then use like this in AppendData.js:
'use strict';
const fs = require('fs');
const getObjs = require('./GetData'); // getObjs is now a function
getObjs(function(objects) {
console.log(objects);
});

Nodejs Parse Json file transform the input and write to file as JSON array

I need to read a very big location history file and extract some data and write to a file as JSON data. How can i do that.
The following code doesn't generate any output.
Edit:
I expect to string output in the file, because it's piped into fileOutputStream
const fs = require('fs')
var JSONStream = require('JSONStream');
var es = require('event-stream');
const filePath = './location-history.json'
const fileOutputPath = './transform-location-history.json'
fileStream = fs.createReadStream(filePath);
fileOutputStream = fs.createWriteStream(fileOutputPath)
const transformer = (data) => {
const location = {
latitude: data.latitudeE7 / 10000000,
longitude: data.longitudeE7 / 10000000
}
return JSON.stringify(location);
}
fileStream
.pipe(JSONStream.parse('locations.*'))
.pipe(es.through(transformer))
.pipe(fileOutputStream)
This is my solution the my problem. JSONStream parses the input file and spits JSON objects. The es.through(transformer) takes the JSON object and writes it to the file as string. To make file output file to be importable in ES6, 'export default locationHistory' is added.
https://gist.github.com/tuncatunc/35e5449905159928e718d82c06bc66da
const fs = require('fs')
const JSONStream = require('JSONStream');
var es = require('event-stream');
const filePath = './location-history.json'
const fileOutputPath = './transform-location-history.js'
const fileStream = fs.createReadStream(filePath);
const fileOutputStream = fs.createWriteStream(fileOutputPath)
let index = 0;
const transformer = (data) => {
const location = {
latitude: data.latitudeE7 / 10000000,
longitude: data.longitudeE7 / 10000000
};
let result = JSON.stringify(location) + ',';
if (index === 0) {
result = 'const locationHistory = [' + result
}
index++;
if (index < 100)
fileOutputStream.write(result);
}
const end = () => {
const finish = ']; export default locationHistory\n'
fileOutputStream.write(finish, () => {
fileOutputStream.close()
})
console.log(`${index} objects are written to file`)
}
fileStream
.pipe(JSONStream.parse('locations.*'))
.pipe(es.through(transformer, end))

Categories

Resources