I have some huge files which are difficult to read in memory. I need to read each line and then replace double quotes if found and edit the same file. Right now, I am reading the file line by line, storing in an array and overwriting the same file. But, that's giving memory issue for big files. Any pointers ?
Here is my present implementation :
var allData = fs.readFileSync(fileName, { encoding: 'utf8' }).toString().split("\n");
var finalString = "";
for (i in allData) {
allData[i] = allData[i].replace(/"/g, '""');
finalString = finalString.concat(allData[i]);
finalString = finalString.concat("\n");
}
fs.writeFileSync(fileName, finalString);
Is there a way to edit by reading one line at a time and changing that in the file?
I have seen the similar question with scramjet, but that gives an error and is not compatible with all nodejs versions : node.js modify file data stream?
After going through a lot of answers, this worked for me which took care of the required synchronous and asynchronous behaviour, large file and keeping the name same.
function format_file(fileName) {
return new Promise((resolve, reject) => {
if (fs.existsSync(fileName)) {
var fields = fileName.split('/');
var tempFile = "";
var arrayLength = fields.length;
for (var i = 0; i < arrayLength - 1; i++) {
tempFile = tempFile + fields[i] + "/";
}
tempFile = tempFile + "tempFile" + fields[arrayLength - 1];
console.log("tempfile name is : " + tempFile + " actualFileName is :" + fileName);
var processStream = new ProcessStream();
fs.createReadStream(fileName, { bufferSize: 128 * 4096 })
.pipe(processStream)
.pipe(fs.createWriteStream(tempFile)).on('finish', function() { // finished
fs.renameSync(tempFile, fileName);
console.log('done encrypting');
resolve('done');
});
} else {
reject('path not found')
}
});
}
Please see my code below, I am interrogating active directory and retrieving back two fields, "name" and "cn". I want to concatenate these in an array and then assign to my drop down list. i.e. name + ' ' + cn. The code below publishes my results wrongly and is displaying all names and cn as individual results i.e. not concatenated.
Can someone advise me and put me in the right direction?
thanks,
George
try
{
// Get LDAP Context
ctx = LdapServices.getLdapContext();
//Specify the search scope
ctls.setSearchScope(SearchControls.ONELEVEL_SCOPE);
var searchFilter = "(&(objectClass=group))";
//Specify the Base for the search
var searchBase = "ou=Licensed Applications,ou=SCCM Apps,ou=Applications,ou=Groups,dc=XXX,dc=XX,dc=XX";
//initialize counter to total the group members
var totalResults = 0;
//Specify the attributes to return
var returnedAtts=["name", "cn"];
ctls.setReturningAttributes(returnedAtts);
//Search for objects using the filter
var answer = ctx.search(searchBase, searchFilter, ctls);
//Loop through the search results
while (answer.hasMoreElements())
{
var sr = answer.next();
var attrs = sr.getAttributes();
if (attrs != null)
{
try
{
for (var ae = attrs.getAll();ae.hasMore();)
{
var attr = ae.next();
var pos = attr.toString().indexOf(":",0);
var attributeName = attr.toString().substring(0,pos);
var name = "";
var cn = "";
for (var e = attr.getAll();e.hasMore();totalResults++)
{
if(attributeName == "name")
{
name = e.next().replace('SCCM_','');
}
if(attributeName == "cn")
{
cn = e.next();
}
}
listItems.push(name + ' (' + cn + ')');
}
}
catch (e)
{
log("Problem listing items: " + e);
}
}
}
}
catch (e)
{
log("Problem searching directory: " + e);
}
finally
{
// Close LDAP Context
ctx.close();
}
I don't know what javascript library you use to have JNDI like code into, but from the purely LDAP point of view :
An attribute can be multi-valued, so every attributes value are returned in a array, even if single valued (possible exception for the dn), for example :
{
"dn":"cn=user,dc=example,dc=com",
"name":["username"],
"cn":["commonname"]
}
If your library works like JNDI, a way to do it could be :
After the line : var attrs = sr.getAttributes();
if (attrs != null) {
try {
log ("name: " + attrs.get("name").get());
log ("cn: " + attrs.get("cn").get());
} catch (e) {
log ("Problem listing attributes from Global Catalog: " + e);
}
}
I'm trying to load in two txt files and compare the differences between the two. More specifically I'm looping through one file per line and comparing it to every line in another txt file.
For the most part everything is working but I have found that I'm only able to access the array within the lr.on('line') function. However I have declared the array in the global scope.
Here is the code:
var LineByLineReader = require('line-by-line');
var lr = new LineByLineReader('phones.txt');
var lr2 = new LineByLineReader('wor.txt');
var phoneArray = [];
var worArray = [];
lr.on('error', function(err){
if(err){
console.log("We have found the following error: " + err);
}
});
lr2.on('error', function(err){
if(err){
console.log("We have found the following error: " + err);
}
});
lr.on('line', function(line){
phoneArray.push(line);
});
lr2.on('line', function(line){
worArray.push(line);
});
for(var i = 0; i < phoneArray.length; i++){
for(var x = 0; x < worArray.length; x++){
if(array1[i] === array2[x]){
console.log("Found Match: " + array2[x]);
}
}
}
Maybe you just forgot to use the right variables names inside your for loop?
And you just need this? :
if(phoneArray[i] === worArray[x]){
console.log("Found Match: " + worArray[x]);
I am attempting to create a web scraper (in node.js) that will pull down information from a site, and write it to a file. I have it built to correctly work for one page, but when I try to use the function in a for loop, to iterate through multiple games, I get bad data in all of the games.
I understand that this is related to Javascript's asynchronous nature, and I have read about callback functions, but I'm not sure I understand how to apply it to my code. Any help would be GREATLY appreciated:
for(x = 4648; x < 4650; x++){ //iterate over a few gameIDs, used in URL for request
scrapeGame(x);
}
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
Essentially, what I am looking to do, is within the for loop, tell it to WAIT to finish the scrapeGame(x) function before incrementing x and running it for the next game -- otherwise, the arrays start to overwrite each other and the data becomes a huge mess.
EDIT: I've now included the full code which I am attempting to run! I'm getting errors when looking in the files after they are written. For example, the first file is 8kb, second is ~16, 3rd is ~32, etc. It seems things aren't getting cleared before running the next game.
Idea of the program is to pull Jeopardy questions/answers from the archive site in order to eventually build a quiz app for myself.
//Iterate over arbitrary number of games, scrape each
for(x = 4648; x < 4650; x++){
scrapeGame(x, function(scrapeResult) {
if(scrapeResult){
console.log('Scrape Successful');
} else {
console.log('Scrape ERROR');
}
});
}
function scrapeGame(gameId, callback){
var request = require('request');
cheerio = require('cheerio');
fs = require('fs');
categories = [];
categorylist = [];
ids = [];
clues = [];
values = ['0','$200','$400','$600','$800','$1000','$400','$800','$1200','$1600','$2000'];
valuelist = [];
answers = [];
array = [];
file = [];
status = false;
var showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId;
var showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function(){
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function(){
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1){
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1){
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function(){
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
$('.correct_response').each(function(){
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for(var i = 0; i < array[0].length; i++){
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId +'.txt', stringFile, function(err) {
if(err) {
console.log(err);
} else {
console.log("Game #" + gameId + " has been scraped.");
status = true;
}
});
}
});
}
});
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
//feed callback status
callback(status);
}
// Iterate over a few gameIDs, used in URL for request.
for (x = 4648; x < 4650; x++) {
// Pass in the callback as an anonymous function.
// So below I am passing in the id and the function I want to execute.
// AND, defining the results I am expecting as passed in arguments.
scrapeGame(x, function(scrapeResult, err) {
// This will *NOT* execute *UNTIL* you call it in the function below.
// That means that the for loop's execution is halted.
// This function receives the status that is passed in,
// in this case, a boolean true/false and an error if any.
if (scrapeResult) {
// Scrape was true, nothing to do.
// The for loop will now move on to the next iteration.
console.log('Scrape Successful');
} else {
// Scrape was false, output error to console.log and
// break loop to handle error.
console.log('Scrape ERROR :: ' + err);
// Notice we are calling break while in the
// scope of the callback function
// Remove the break if you want to just move onto
// the next game ID and not stop the loop
break;
}
});
}
// This function now accepts two arguments.
function scrapeGame(gameId, callback) {
// ************************************************
// ** Do Your Work Here **
// Request from URL, scrape HTML to arrays as necessary.
// Write final array to file.
// After file creation, execute the callback and pass bool
// status (true/false).
// ************************************************
var request = require('request'),
cheerio = require('cheerio'),
fs = require('fs'),
categories = [],
categorylist = [],
ids = [],
clues = [],
values = [
'0',
'$200',
'$400',
'$600',
'$800',
'$1000',
'$400',
'$800',
'$1200',
'$1600',
'$2000'
],
valuelist = [],
answers = [],
array = [],
file = [],
showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId,
showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function() {
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function() {
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1) {
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1) {
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function() {
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
$('.correct_response').each(function() {
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for (var i = 0; i < array[0].length; i++) {
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId + '.txt', stringFile, function(err) {
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
if (err) {
// ******************************************
// Callback false with error.
callback(false, err);
// ******************************************
} else {
console.log("Game #" + gameId + " has been scraped.");
// ******************************************
// Callback true with no error.
callback(true);
// ******************************************
}
});
}
});
}
});
}
My assumption is that you want them to be scraped one after one, not in parallel. So, for loop won't help. The following approach should do the trick:
var x = 4648;
var myFunc = scrapeGame(x, function cb(){
if(x >= 4650){
return;
}
x++;
return myFunc(x, cb);
});
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
For nested async function, where you want them be executed in serial manner, you should just forget about for loop.
An example of correct request handling with http client:
function scrapeGame(gameId, cb){
//your code and set options
http.request(options, function(response){
var result = "";
response.on('data', function (chunk) {
result += chunk;
});
response.on('end',function(){
//write data here;
//do the callback
cb();
});
});
}
I solved the ROOT cause of the issue that I was seeing, though I do believe without the callback assistance from red above, I would have been just as lost.
Turns out the data was processing correctly, but the file write was scrambling. Turns out that there is a different method to call instead of writeFile or appendFile:
fs.appendFileSync();
Calling the Synchronous version processed the writes to the file IN THE ORDER they got appended to the file, instead of just going for it. This, in addition to the callback help above, solved the issue.
Thanks to everyone for the assistance!
I am trying to get a list of files and directories in Firefox Add-ons.
var io_file = require("sdk/io/file");
var HomePath = require('sdk/system').pathFor("Home");
var list_files = io_file.list(HomePath);
How to determine is the element of the array a file or a directory?
Do console.log('debug:', list_files) and then press Ctrl + Shift + J and then click on the link next to list_files, then it opens in varaible viewer and you can explore it there.
like this, notice the "Browser Console" box and see the "Properties" section below, tha comes up when I clicked on "[object MouseEvent]"s you see in the logs above.
const fileIO = require("sdk/io/file");
let path = "/Users/Work/";
let list = fileIO.list(path);
for (i = 0; i < list.length; i++) {
let item = fileIO.join(path, list[i]);
if (fileIO.isFile(item)) {
console.log(item + " is a file");
}
else {
console.log(item + " is a directory");
}
}
or
const {Cu} = require("chrome");
const {TextEncoder, OS} = Cu.import("resource://gre/modules/osfile.jsm", {});
var iterator = new OS.File.DirectoryIterator('/home/user');
var promise = iterator.forEach(
function onEntry(entry) {
if (entry.isDir) { // is Directory
console.log(JSON.stringify(entry));
} else { // is File
console.log(JSON.stringify(entry));
}
}
);
promise.then(
function onSuccess() {
iterator.close();
return subdirs;
},
function onFailure(reason) {
iterator.close();
throw reason;
}
);