Related
I've been working on a scraper. Now I've created two scrapers which are creating one array of objects each.
Now I've managed to scrape data, put them in seperate array, created component parseData where I'm sending these two arrays of objects inside it. And I now want to create a final one array of objects which will be sent to DB for inserting data.
Now here is where I'm trying to merge these two arrays:
const result = []
exports.parseData = (apartments) => {
result.push(apartments);
console.log(result);
};
And this doesn't give me output that I expected. I expected two arrays to be merged not pushed into one. And I understand why is that happening - because of push method
Individual outputs of these two are:
apartments in olxScraper
[
{
id: '41676279',
link: 'www.link.com',
description: 'Description of apartment',
price: 387000,
picture: 'link to picture',
squaremeters: 86,
pricepersquaremeter: 4500
},
{
//same object as before but different data
}
]
apartments in santScraper
[
{
id: '3524',
link: 'www.linkSecondScraper.com',
description: 'Description',
price: 150000,
picture: 'picture',
squaremeters: 55,
pricepersquaremeter: 2727
},
{
//same object as before but different data
}
]
They are the same arrays with same key:value pairs in the objects. Only difference is number of objects inside of a array.
Now What would be proper way to merge these two? I just want one array that contains objects from these two arrays.
Also here is from where arrays are created:
const parsing = require('../data-functions/parseData');
const axios = require('axios');
const cheerio = require('cheerio');
exports.olxScraper = (count) => {
const url = `https://www.olx.ba/pretraga?vrsta=samoprodaja&kategorija=23&sort_order=desc&kanton=9&sacijenom=sacijenom&stranica=${count}`;
const apartments = [];
const getRawData = async () => {
try {
await axios.get(url).then((res) => {
const $ = cheerio.load(res.data);
$('div[id="rezultatipretrage"] > div')
.not('div[class="listitem artikal obicniArtikal i index"]')
.not('div[class="obicniArtikal"]')
.each((index, element) => {
$('span[class="prekrizenacijena"]').remove();
const getLink = $(element)
.find('div[class="naslov"] > a')
.attr('href');
const getDescription = $(element)
.find('div[class="naslov"] > a > p')
.text();
const getPrice = $(element)
.find('div[class="datum"] > span')
.text()
.replace(/\.| ?KM$/g, '')
.replace(' ', '');
const getPicture = $(element)
.find('div[class="slika"] > img')
.attr('src');
apartments[index] = {
id: getLink.substring(27, 35),
link: getLink,
description: getDescription,
price: parseFloat(getPrice),
picture: getPicture,
};
});
const fetchSquaremeters = Promise.all(
apartments.map((item) => {
return axios.get(item.link).then((response) => {
const $ = cheerio.load(response.data);
const getSquaremeters = $('div[class="df2 "]')
.first()
.text()
.replace('m2', '')
.replace(',', '.')
.split('-')[0];
item.squaremeters = parseFloat(getSquaremeters);
item.pricepersquaremeter = Math.round(
parseFloat(item.price) / parseFloat(getSquaremeters)
);
});
})
);
fetchSquaremeters.then(() => {
//console.log(olxApartments);
parsing.parseData(apartments);
});
});
} catch (error) {
console.log(error);
}
};
getRawData();
};
const parsing = require('../data-functions/parseData');
const axios = require('axios');
const cheerio = require('cheerio');
exports.santScraper = (count) => {
const url = `https://www.sant.ba/pretraga/prodaja-1/tip-2/cijena_min-20000/stranica-${count}`;
const apartments= [];
const getRawData = async () => {
try {
await axios.get(url).then((response) => {
const $ = cheerio.load(response.data);
$('div[class="col-xxs-12 col-xss-6 col-xs-6 col-sm-6 col-lg-4"]').each(
(index, element) => {
const getLink = $(element).find('a[class="re-image"]').attr('href');
const getDescription = $(element).find('a[class="title"]').text();
const getPrice = $(element)
.find('div[class="prices"] > h3[class="price"]')
.text()
.replace(/\.| ?KM$/g, '')
.replace(',', '.');
const getPicture = $(element).find('img').attr('data-original');
const getSquaremeters = $(element)
.find('span[class="infoCount"]')
.first()
.text()
.replace(',', '.')
.split('m')[0];
const pricepersquaremeter =
parseFloat(getPrice) / parseFloat(getSquaremeters);
apartments[index] = {
id: getLink.substring(42, 46),
link: getLink,
description: getDescription,
price: Math.round(getPrice),
picture: getPicture,
squaremeters: Math.round(getSquaremeters),
pricepersquaremeter: Math.round(pricepersquaremeter),
};
}
);
parsing.parseData(apartments);
});
} catch (error) {
console.log(console.log(error));
}
};
getRawData();
};
EDIT
Also one problem: I need somehow to tell this function to wait for both arrays to come in, because it first pushes the one array alone, then when second gets in, they are both pushed in.
[
[
{//objects of sant}
]
]
[
[
{//objects of sant}
],
[
{//objects of olx}
]
]
Your parseData function excepts 2 arguments.
exports.parseData = (olxApartments, santApartments) => {
const result = [...olxApartments, ...santApartments];
console.log(result);
};
But in both cases where you call the function, you are only passing it one argument.
parsing.parseData(olxApartments);
parsing.parseData(santApartments);
So you are getting this error because inside the parseData function, santApartments is undefined and therefore not iterable. Both times you are calling the function, you are only passing one argument to it, which is then referenced as olxApartments.
parseData does not store the values of these arguments. If you want to combine olxApartments and santApartments, you need to create an array at a higher scope than this function, and push the relevant data to that array.
Adding info based on questions. The following code might help you:
const apartmentsData = [];
const getOlxApartments = () => {
let olxApartments = [];
olxApartments.push({
id: "41676279",
});
olxApartments.push({
id: "41676280",
});
parseData(olxApartments);
};
const getSantApartments = () => {
let santApartments = [];
santApartments.push({
id: "3524",
});
santApartments.push({
id: "3521",
});
parseData(santApartments);
};
const parseData = (apartments) => {
apartmentsData.push(...apartments);
};
getOlxApartments();
getSantApartments();
console.log(apartmentsData);
// Array(4)
// 0: {id: "41676279"}
// 1: {id: "41676280"}
// 2: {id: "3524"}
// 3: {id: "3521"}
I need to create a new array from another with the condition:
for example from an array
mainArr: [
{
"id":1,
"name":"root"
},
{
"id":2,
"parentId":1,
"name":"2"
},
{
"id":148,
"parentId":2,
"name":"3"
},
{
"id":151,
"parentId":148,
"name":"4"
},
{
"id":152,
"parentId":151,
"name":"5"
}
]
I need to make an array ['1','2','148','151'] which means the path from "parentId"'s to "id":152 - (argument for this function).
I think main logic can be like this:
const parentsArr = [];
mainArr.forEach((item) => {
if (item.id === id) {
parentsArr.unshift(`${item.parentId}`);
}
and the result {item.parentId} should be used to iterate again. But I don't understand how to do it...
You could use a recursive function for this. First you can transform your array to a Map, where each id from each object points to its object. Doing this allows you to .get() the object with a given id efficiently. For each object, you can get the parentId, and if it is defined, rerun your traverse() object again searching for the parent id. When you can no longer find a parentid, then you're at the root, meaning you can return an empty array to signify no parentid object exist:
const arr = [{"id":1,"name":"root"},{"id":2,"parentId":1,"name":"2"},{"id":148,"parentId":2,"name":"3"},{"id":151,"parentId":148,"name":"4"},{"id":152,"parentId":151,"name":"5"}];
const transform = arr => new Map(arr.map((o) => [o.id, o]));
const traverse = (map, id) => {
const startObj = map.get(+id);
if("parentId" in startObj)
return [...traverse(map, startObj.parentId), startObj.parentId];
else
return [];
}
console.log(traverse(transform(arr), "152"));
If you want to include "152" in the result, you can change your recursive function to use the id argument, and change the base-case to return [id] (note that the + in front of id is used to convert it to a number if it is a string):
const arr = [{"id":1,"name":"root"},{"id":2,"parentId":1,"name":"2"},{"id":148,"parentId":2,"name":"3"},{"id":151,"parentId":148,"name":"4"},{"id":152,"parentId":151,"name":"5"}];
const transform = arr => new Map(arr.map((o) => [o.id, o]));
const traverse = (map, id) => {
const startObj = map.get(+id);
if("parentId" in startObj)
return [...traverse(map, startObj.parentId), +id];
else
return [+id];
}
console.log(traverse(transform(arr), "152"));
I would start by indexing the data by id using reduce
var byId = data.reduce( (acc,i) => {
acc[i.id] = i
return acc;
},{});
And then just go through using a loop and pushing the id to a result array
var item = byId[input];
var result = []
while(item.parentId) {
result.push(item.parentId)
item = byId[item.parentId];
}
Live example:
const input = 152;
const data = [ { "id":1, "name":"root" }, { "id":2, "parentId":1, "name":"2" }, { "id":148, "parentId":2, "name":"3" }, { "id":151, "parentId":148, "name":"4" }, { "id":152, "parentId":151, "name":"5" } ]
var byId = data.reduce( (acc,i) => {
acc[i.id] = i
return acc;
},{});
var item = byId[input];
var result = []
while(item.parentId) {
result.push(item.parentId)
item = byId[item.parentId];
}
console.log(result.reverse());
Try changing this line
parentsArr.unshift(`${item.parentId}`);
To this
parentsArr.push(`${item.parentId}`);
Then try
console.log(parentsArr);
This is what I ended up with. Basically a mix of Janek and Nicks answers. It's just 2 steps:
transform code to a map.
extract the ancester_id's with a little function
let data = [
{"id":1,"name":"root"},
{"id":2,"parentId":1,"name":"2"},
{"id":148,"parentId":2,"name":"3"},
{"id":151,"parentId":148,"name":"4"},
{"id":152,"parentId":151,"name":"5"}
];
data = data.reduce( (acc, value) => {
// could optionally filter out the id here
return acc.set(value.id, value)
}, new Map());
function extract_ancestors( data, id ) {
let result = [];
while( data.get( id ).parentId ) {
id = data.get( id ).parentId;
result.push(id)
}
return result;
}
// some visual tests
console.log( extract_ancestors( data, 152 ) );
console.log( extract_ancestors( data, 148 ) );
console.log( extract_ancestors( data, 1 ) );
PS: My OOP tendencies start to itch so much from this haha.
arr = ["sadik", "arif", "rahman"]
I want to create a nested object with the same key but different value like:
{
subcategory: {
name: sadik
subcategory: {
name: arif
subcategory: {
name: rahman
}
}
}
}
my code:
let arr = ['sadik', 'arif', 'babor']
let obj = {}
arr.forEach((elem) => {
let a = {}
a["subcategory"] = {name:elem}
Object.assign(obj, a)
})
i get only last value:
{
subcategory: {
name:"babor"
}
}
Your code did not work because you were replacing your "subcategory" key value in each iteration. You have change the reference object to the next nested level each time to get the expected output, see the working snippet below:
const arr = ['level-1', 'level-2', 'level-3']
const obj = {}
let refObj = obj;
arr.forEach( ele => {
refObj = refObj['subcategory'] || refObj;
refObj['subcategory'] = { 'name': ele};
})
console.log('output', obj);
Let's say I have an Object myBook and an array allCategories.
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {
isItScienceFiction: true,
isItManga: false,
isItForKids: false
}
What I want : Loop over categories to check the value of Book, for example, check if "sciencefiction" exists in my Book Object and then check it's value
What I have tried :
1) With indexOf
allCategories.map((category) => {
Object.keys(myBook).indexOf(category)
// Always returns -1 because "sciencefiction" doesn't match with "isItScienceFiction"
});
2) With includes
allCategories.map((category) => {
Object.keys(myBook).includes(category)
// Always returns false because "sciencefiction" doesn't match with "isItScienceFiction"
});
Expected output :
allCategories.map((category) => {
// Example 1 : Returns "sciencefiction" because "isItScienceFiction: true"
// Example 2 : Returns nothing because "isItManga: false"
// Example 3 : Returns nothing because there is not property in myBook with the word "school"
// Example 4 : Returns nothing because there is not property in myBook with the word "art"
// If category match with myBook categories and the value is true then
return (
<p>{category}</p>
);
});
If you need more information, just let me know, I'll edit my question.
You could use filter and find methods to return new array of categories and then use map method to return array of elements.
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {isItScienceFiction: true, isItManga: false, isItForKids: false}
const result = allCategories.filter(cat => {
const key = Object.keys(myBook).find(k => k.slice(4).toLowerCase() === cat);
return myBook[key]
}).map(cat => `<p>${cat}</p>`)
console.log(result)
You can also use reduce instead of filter and map and endsWith method.
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {isItScienceFiction: true,isItManga: false,isItForKids: false}
const result = allCategories.reduce((r, cat) => {
const key = Object.keys(myBook).find(k => k.toLowerCase().endsWith(cat));
if(myBook[key]) r.push(`<p>${cat}</p>`)
return r;
}, [])
console.log(result)
You can use
Object.keys(myBook).forEach(function(key){console.log(myBook[key])})
... place you code instead of console.log. This can do the trick without hard coding and also the best practice.
You should really not keep a number of properties containing booleans. While that might work for 1, 2 or 3 categories, for a few hundred it won't work well. Instead, just store the categories in an array:
const myBook = {
categories: ["sciencefiction", "manga", "kids"],
};
If you got some object with the old structure already, you can easily convert them:
const format = old => {
const categories = [];
if(old.isItScienceFiction)
categories.push("sciencefiction");
if(old.isItManga)
categories.push("manga");
if(old.isItForKids)
categories.push("kids");
return { categories };
};
Now to check wether a book contains a certain category:
const isManga = myBook.categories.includes("manga");
And your rendering is also quite easy now:
myBook.categories.map(it => <p>{it}</p>)
Use Array.filter() and Array.find() with a RegExp to find categories that have matching keys. Use Array.map() to convert the categories to strings/JSX/etc...
const findMatchingCategories = (obj, categories) => {
const keys = Object.keys(obj);
return allCategories
.filter(category => {
const pattern = new RegExp(category, 'i');
return obj[keys.find(c => pattern.test(c))];
})
.map(category => `<p>${category}</p>`);
};
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {
isItScienceFiction: true,
isItManga: false,
isItForKids: false
};
const result = findMatchingCategories(myBook, allCategories);
console.log(result);
You can modify the key names in myBook object for easy lookup like:
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {
isItScienceFiction: true,
isItManga: false,
isItForKids: false
}
const modBook = {}
Object.keys(myBook).map((key) => {
const modKey = key.slice(4).toLowerCase()
modBook[modKey] = myBook[key]
})
const haveCategories = allCategories.map((category) => {
if (modBook[category]) {
return <p>{category}</p>
}
return null
})
console.log(haveCategories)
Converting sciencefiction to isItScienceFiction is not possible and looping all the keys of myBook for every category is not optimal.
But converting isItScienceFiction to sciencefiction is pretty easy, so you can create newMyBook from yourmyBook and use it instead to check.
Creating newMyBook is a one time overhead.
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {isItScienceFiction: true,isItManga: false,isItForKids: false};
const newMyBook = Object.keys(myBook).reduce((a, k) => {
return { ...a, [k.replace('isIt', '').toLowerCase()]: myBook[k] };
}, {});
console.log(
allCategories.filter(category => !!newMyBook[category]).map(category => `<p>${category}</p>`)
);
You can try like this:
const allCategories = ["sciencefiction", "manga", "school", "art"];
const myBook = {
isItScienceFiction: true,
isItManga: false,
isItForKids: false
};
const myBookKeys = Object.keys(myBook);
const result = allCategories.map(category => {
const foundIndex = myBookKeys.findIndex(y => y.toLowerCase().includes(category.toLowerCase()));
if (foundIndex > -1 && myBook[myBookKeys[foundIndex]])
return `<p>${category}</p>`;
});
console.log(result);
You could create a Map for the the categories and keys of object:
const allCategories = ["sciencefiction", "manga", "school", "art"],
myBook = { isItScienceFiction:true, isItManga:false, isItForKids:false }
const map = Object.keys(myBook)
.reduce((r, k) => r.set(k.slice(4).toLowerCase(), k), new Map);
/* map:
{"sciencefiction" => "isItScienceFiction"}
{"manga" => "isItManga"}
{"forkids" => "isItForKids"}
*/
allCategories.forEach(key => {
let keyInObject = map.get(key); // the key name in object
let value = myBook[keyInObject]; // value for the key in object
console.log(key, keyInObject, value)
if(keyInObject && value) {
// do something if has the current key and the value is true
}
})
I have to remove unwanted object properties that do not match my model. How can I achieve it with Lodash?
My model is:
var model = {
fname: null,
lname: null
}
My controller output before sending to the server will be:
var credentials = {
fname: "xyz",
lname: "abc",
age: 23
}
I am aware I can use
delete credentials.age
but what if I have lots of unwanted properties? Can I achieve it with Lodash?
You can approach it from either an "allow list" or a "block list" way:
// Block list
// Remove the values you don't want
var result = _.omit(credentials, ['age']);
// Allow list
// Only allow certain values
var result = _.pick(credentials, ['fname', 'lname']);
If it's reusable business logic, you can partial it out as well:
// Partial out a "block list" version
var clean = _.partial(_.omit, _, ['age']);
// and later
var result = clean(credentials);
Note that Lodash 5 will drop support for omit
A similar approach can be achieved without Lodash:
const transform = (obj, predicate) => {
return Object.keys(obj).reduce((memo, key) => {
if(predicate(obj[key], key)) {
memo[key] = obj[key]
}
return memo
}, {})
}
const omit = (obj, items) => transform(obj, (value, key) => !items.includes(key))
const pick = (obj, items) => transform(obj, (value, key) => items.includes(key))
// Partials
// Lazy clean
const cleanL = (obj) => omit(obj, ['age'])
// Guarded clean
const cleanG = (obj) => pick(obj, ['fname', 'lname'])
// "App"
const credentials = {
fname:"xyz",
lname:"abc",
age:23
}
const omitted = omit(credentials, ['age'])
const picked = pick(credentials, ['age'])
const cleanedL = cleanL(credentials)
const cleanedG = cleanG(credentials)
Get a list of properties from model using _.keys(), and use _.pick() to extract the properties from credentials to a new object:
var model = {
fname:null,
lname:null
};
var credentials = {
fname:"xyz",
lname:"abc",
age:23
};
var result = _.pick(credentials, _.keys(model));
console.log(result);
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.16.4/lodash.min.js"></script>
If you don't want to use Lodash, you can use Object.keys(), and Array.prototype.reduce():
var model = {
fname:null,
lname:null
};
var credentials = {
fname:"xyz",
lname:"abc",
age:23
};
var result = Object.keys(model).reduce(function(obj, key) {
obj[key] = credentials[key];
return obj;
}, {});
console.log(result);
You can easily do this using _.pick:
var model = {
fname: null,
lname: null
};
var credentials = {
fname: 'abc',
lname: 'xyz',
age: 2
};
var result = _.pick(credentials, _.keys(model));
console.log('result =', result);
<script src="https://cdn.jsdelivr.net/lodash/4.16.4/lodash.min.js"></script>
But you can simply use pure JavaScript (specially if you use ECMAScript 6), like this:
const model = {
fname: null,
lname: null
};
const credentials = {
fname: 'abc',
lname: 'xyz',
age: 2
};
const newModel = {};
Object.keys(model).forEach(key => newModel[key] = credentials[key]);
console.log('newModel =', newModel);
Lodash unset is suitable for removing a few unwanted keys.
const myObj = {
keyOne: "hello",
keyTwo: "world"
}
unset(myObj, "keyTwo");
console.log(myObj); /// myObj = { keyOne: "hello" }
Here I have used omit() for the respective 'key' which you want to remove... by using the Lodash library:
var credentials = [{
fname: "xyz",
lname: "abc",
age: 23
}]
let result = _.map(credentials, object => {
return _.omit(object, ['fname', 'lname'])
})
console.log('result', result)
You can use _.omit() for emitting the key from a JSON array if you have fewer objects:
_.forEach(data, (d) => {
_.omit(d, ['keyToEmit1', 'keyToEmit2'])
});
If you have more objects, you can use the reverse of it which is _.pick():
_.forEach(data, (d) => {
_.pick(d, ['keyToPick1', 'keyToPick2'])
});
To select (or remove) object properties that satisfy a given condition deeply, you can use something like this:
function pickByDeep(object, condition, arraysToo=false) {
return _.transform(object, (acc, val, key) => {
if (_.isPlainObject(val) || arraysToo && _.isArray(val)) {
acc[key] = pickByDeep(val, condition, arraysToo);
} else if (condition(val, key, object)) {
acc[key] = val;
}
});
}
https://codepen.io/aercolino/pen/MWgjyjm
This is my solution to deep remove empty properties with Lodash:
const compactDeep = obj => {
const emptyFields = [];
function calculateEmpty(prefix, source) {
_.each(source, (val, key) => {
if (_.isObject(val) && !_.isEmpty(val)) {
calculateEmpty(`${prefix}${key}.`, val);
} else if ((!_.isBoolean(val) && !_.isNumber(val) && !val) || (_.isObject(val) && _.isEmpty(val))) {
emptyFields.push(`${prefix}${key}`);
}
});
}
calculateEmpty('', obj);
return _.omit(obj, emptyFields);
};
For array of objects
model = _.filter(model, a => {
if (!a.age) { return a }
})
Recursively removing paths.
I just needed something similar, not removing just keys, but keys by with paths recursively.
Thought I'd share.
Simple readable example, no dependencies
/**
* Removes path from an object recursively.
* A full path to the key is not required.
* The original object is not modified.
*
* Example:
* const original = { a: { b: { c: 'value' } }, c: 'value' }
*
* omitPathRecursively(original, 'a') // outputs: { c: 'value' }
* omitPathRecursively(original, 'c') // outputs: { a: { b: {} } }
* omitPathRecursively(original, 'b.c') // { a: { b: {} }, c: 'value' }
*/
export const omitPathRecursively = (original, path, depth = 1) => {
const segments = path.split('.')
const final = depth === segments.length
return JSON.parse(
JSON.stringify(original, (key, value) => {
const match = key === segments[depth - 1]
if (!match) return value
if (!final) return omitPathRecursively(value, path, depth + 1)
return undefined
})
)
}
Working example: https://jsfiddle.net/webbertakken/60thvguc/1/
While looking for a solution that would work for both arrays and objects, I didn't find one and so I created it.
/**
* Recursively ignore keys from array or object
*/
const ignoreKeysRecursively = (obj, keys = []) => {
const keyIsToIgnore = (key) => {
return keys.map((a) => a.toLowerCase()).includes(key)
}
const serializeObject = (item) => {
return Object.fromEntries(
Object.entries(item)
.filter(([key, value]) => key && value)
.reduce((prev, curr, currIndex) => {
if (!keyIsToIgnore(curr[0]))
prev[currIndex] =
[
curr[0],
// serialize array
Array.isArray(curr[1])
? // eslint-disable-next-line
serializeArray(curr[1])
: // serialize object
!Array.isArray(curr[1]) && typeof curr[1] === 'object'
? serializeObject(curr[1])
: curr[1],
] || []
return prev
}, []),
)
}
const serializeArray = (item) => {
const serialized = []
for (const entry of item) {
if (typeof entry === 'string') serialized.push(entry)
if (typeof entry === 'object' && !Array.isArray(entry)) serialized.push(serializeObject(entry))
if (Array.isArray(entry)) serialized.push(serializeArray(entry))
}
return serialized
}
if (Array.isArray(obj)) return serializeArray(obj)
return serializeObject(obj)
}
// usage
const refObject = [{name: "Jessica", password: "ygd6g46"}]
// ignore password
const obj = ignoreKeysRecursively(refObject, ["password"])
// expects returned array to only have name attribute
console.log(obj)
let asdf = [{"asd": 12, "asdf": 123}, {"asd": 121, "asdf": 1231}, {"asd": 142, "asdf": 1243}]
asdf = _.map(asdf, function (row) {
return _.omit(row, ['asd'])
})