Overwrite a line in a file using node.js - javascript

What's the best way to overwrite a line in a large (2MB+) text file using node.js?
My current method involves
copying the entire file into a buffer.
Spliting the buffer into an array by the new line character (\n).
Overwriting the line by using the buffer index.
Then overwriting the file with the buffer after join with \n.

First, you need to search where the line starts and where it ends. Next you need to use a function for replacing the line. I have the solution for the first part using one of my libraries: Node-BufferedReader.
var lineToReplace = "your_line_to_replace";
var startLineOffset = 0;
var endLineOffset = 0;
new BufferedReader ("your_file", { encoding: "utf8" })
.on ("error", function (error){
console.log (error);
})
.on ("line", function (line, byteOffset){
startLineOffset = endLineOffset;
endLineOffset = byteOffset - 1; //byteOffset is the offset of the NEXT byte. -1 if it's the end of the file, if that's the case, endLineOffset = <the file size>
if (line === lineToReplace ){
console.log ("start: " + startLineOffset + ", end: " + endLineOffset +
", length: " + (endLineOffset - startLineOffset));
this.interrupt (); //interrupts the reading and finishes
}
})
.read ();

Maybe you can try the package replace-in-file
suppose we have a txt file as below
// file.txt
"line1"
"line2"
"line5"
"line6"
"line1"
"line2"
"line5"
"line6"
and we want to replace:
line1 -> line3
line2 -> line4
Then, we can do it like this:
const replace = require('replace-in-file');
const options = {
files: "./file.txt",
from: [/line1/g, /line2/g],
to: ["line3", "line4"]
};
replace(options)
.then(result => {
console.log("Replacement results: ",result);
})
.catch(error => {
console.log(error);
});
the result as below:
// file.txt
"line3"
"line4"
"line5"
"line6"
"line3"
"line4"
"line5"
"line6"
More details please refer to its docs: https://www.npmjs.com/package/replace-in-file

This isn't file size focoused solution, but Overwrites a line in a file using node.js. It may help other people that search engines redirect to this post, like me.
import * as fs from 'fs'
const filename = process.argv[2]
const lineIndexToUpdate = parseInt(process.argv[3]) - 1
const textUpdate = process.argv[4]
function filterLine(indexToUpdate, dataString) {
return dataString
.split('\n')
.map((val, index) => {
if (index === indexToUpdate)
return textUpdate
else
return val
})
.join('\n')
}
fs.readFile(filename, 'utf8', (err, data) => {
if (err) throw err
fs.writeFile(filename, filterLine(lineIndexToUpdate, data), (err, data) => {
if (err) throw err
console.log("Line removed")
})
})
Script use exemple:
node update_line.js file 10 "te voglio benne"

Related

csvtojson node.js (combine two codes)

How to combine these two codes, so it doesn't just covert csv to Json (first code), but also save this as an json array in an extra file?(second code)
this (first) code converts csv file to json array:
const fs = require("fs");
let fileReadStream = fs.createReadStream("myCsvFile.csv");
let invalidLineCount = 0;
const csvtojson = require("csvtojson");
csvtojson({ "delimiter": ";", "fork": true })
.preFileLine((fileLineString, lineIdx)=> {
let invalidLinePattern = /^['"].*[^"'];/;
if (invalidLinePattern.test(fileLineString)) {
console.log(`Line #${lineIdx + 1} is invalid, skipping:`, fileLineString);
fileLineString = "";
invalidLineCount++;
}
return fileLineString
})
.fromStream(fileReadStream)
.subscribe((dataObj) => {
console.log(dataObj);
// I added the second code hier, but it wirtes the last object of the array (because of the loop?)
}
});
and this (second) code saves the json array to an external file:
fs.writeFile('example.json', JSON.stringify(dataObj, null, 4);
The quistion is how to put the second codes into the first code (combine them)?
You can use .on('done',(error)=>{ ... }) method. (csvtojson). Push the data into a variable in subscribe method and write the data as JSON in .on('done'). (test was successful).
Check it out:
let fileReadStream = fs.createReadStream("username-password.csv");
let invalidLineCount = 0;
let data = []
csvtojson({ "delimiter": ";", "fork": true })
.preFileLine((fileLineString, lineIdx)=> {
let invalidLinePattern = /^['"].*[^"'];/;
if (invalidLinePattern.test(fileLineString)) {
console.log(`Line #${lineIdx + 1} is invalid, skipping:`, fileLineString);
fileLineString = "";
invalidLineCount++;
}
return fileLineString
})
.fromStream(fileReadStream)
.subscribe((dataObj) => {
// console.log(dataObj)
data.push(dataObj)
})
.on('done',(error)=>{
fs.writeFileSync('example.json', JSON.stringify(data, null, 4))
})
Not sure if you are able to change the library but I would definitely recommend Papaparse for this - https://www.npmjs.com/package/papaparse
Your code would then look something like this:
const fs = require('fs'), papa = require('papaparse');
var readFile = fs.createReadStream(file);
papa.parse(readFile, {
complete: function (results, file) {
fs.writeFile('example.json', JSON.stringifiy(results.data), function (err) {
if(err) console.log(err);
// callback etc
})
}
});

I want to read a certain amount of lines from a text file, and with them to create an object after reading the lines using JavaScript

okay, so this is my requirement: The application must read each line from the text file. A class can be used so that after reading a certain number of lines an object is created (in this way the application is more clearly structured). After reading a data set that corresponds to a student's data, it will add this data set to a string (separate so that it is presented in consecutive rows).
So i have these information of 2 students which are one under the other like in the picture below but without the name address etc.(it doesn't show quite right in here).
Ebonie Rangel
7175 Yukon Street
(507) 833-3567
Geography
Keenan Ellwood
2 Elm Lane
(894) 831-6482
History
which are in that file. and after reading every line, I am supposed to add Name in front of the first line, Address in front of the second.. phone and Course and so on.
The result should look like this:
This is what i have for now (I have to use Fetch to get to the file, async and await. or with Promise)
let button = document.getElementById("text-button");
let textArea = document.getElementById("text-area");
button.addEventListener("click", function () {
getData();
});
//cod fetch
async function getData() {
try {
let response = await fetch('fileName.txt');
if (response.status !== 200) {
throw new Error("Error while reading file");
}
let text = await response.text();
textArea.innerHtml = text;
} catch (err) {
textArea.innerHTML = 'Problem occurred: ' + err.message;
}
}
please help! I am stuck since forever on this.
Since you're pulling from a .txt file I think its important to understand the line breaks being used in the file. Here's a decent link I found that says all you need at the top of the article: End of Line or Newline Characters
I opened up the .txt file in Notepad++ like the article recommended and saw this:
The [CR][LF] being displayed after each line means that the newline characters used are \r\n.
When you understand that you realize you can use those line breaks to separate your string at each line break.
Here's the MDN for String.split() String.prototype.split()
String.split('\r\n') will return an Array of items, specifically the strings that were between but not including the \r\n characters.
Let's add this to the getData function:
let button = document.getElementById("text-button");
let textArea = document.getElementById("text-area");
button.addEventListener("click", function () {
getData();
});
//cod fetch
async function getData() {
try {
let response = await fetch('fileName.txt');
if (response.status !== 200) {
throw new Error("Error while reading file");
}
let text = await response.text();
//New stuff:
let arrayOfText = text.split('\r\n');
//Now we could add what we want before the text.
//We need to do every 4 lines so lets use this as a chance to learn % better
arrayOfText = arrayOfText.map((textItem, index) => {
let remainder = (index) % 4 //This will return 0, 1, 2, 3
//switch but you could use anything
switch (remainder) {
case 0:
textItem = 'Name: ' + textItem + '\r\n';
break;
case 1:
textItem = 'Address: ' + textItem + '\r\n';
break;
case 2:
textItem = 'Phone: ' + textItem + '\r\n';
break;
case 3:
textItem = 'Course: ' + textItem + '\r\n\r\n'; //two here to separate the groups
break;
//we need a default so lets make it just return textItem if something goes wrong
default:
break;
};
//Our new array has all the info so we can use
//Array.prototype.join('') with an empty string to make it a string.
//We need those old line breaks though so lets put them
//in the switch returns above.
text = arrayOfText.join('');
//End of my changes/////////////
textArea.innerHtml = text;
} catch (err) {
textArea.innerHTML = 'Problem occurred: ' + err.message;
}
}
I hope this works out for you. Its not the most glamorous solution but its a good learning solution because it uses only things you learn early on in your studies.
Let me know if I can clarify anything!
async function getData() {
try {
let response = await fetch('https://v-dresevic.github.io/Advanced-JavaScript-Programming/data/students.txt');
if (response.status !== 200) {
throw new Error("Error while reading file");
}
let text = await response.text();
const lines = text.split('\n');
const CHUNK_SIZE = 4;
textArea.innerHTML = new Array(Math.ceil(lines.length / CHUNK_SIZE))
.fill()
.map(_ => lines.splice(0, CHUNK_SIZE))
.map(chunk => {
const [Name, Address, Phone, Course] = chunk;
return {Name, Address, Phone, Course};
})
.reduce((text, record) => {
text += Object.keys(record).map(key => `${key} ${record[key]}`).join('\n') + '\n';
return text;
}, '');
} catch (err) {
textArea.innerHTML = 'Problem occurred: ' + err.message;
}
}

fs readfile position if already at end of file

I'm writing something that is effectively "tailing" a file - every now and then the browser polls back and says "I read up to position 23123, give me anything more". I've written this function which is supposed to give me anything new:
async function readNew( filename, mark )
{
var fileDescriptor = await fs.open( filename, "r" );
var buffer = new Buffer( 32768 );
var result = await fs.read( fileDescriptor, buffer, 0, buffer.length, mark );
await fs.close( fileDescriptor );
return result;
}
and it works perfectly if there is something new. If there's not, for some reason it goes back to the beginning and reads the entire file! So for instance, if mark is 16427 - and the file is of size 16427 - I get 16427 bytes read - and what I want is 0.
Any ideas how I can fix this reliably, and with some level of performance?
Most of the time, there won't be any changes - so I want that case to be as fast as possible.
found a really horrible answer - which is to go back, read two bytes - and if I only get one - decide that I'm at the end - but it seems there must be a better way?
async function readPart( filename, mark )
{
var fileDescriptor = await fs.open( filename, "r" );
var buffer = new Buffer( 32768 );
var result = await fs.read( fileDescriptor, buffer, 0, 2, Math.max( 0, mark-1));
if (result[0] == 1)
return [0, new Buffer(0)];
result = await fs.read( fileDescriptor, buffer, 0, buffer.length-1, mark );
await fs.close( fileDescriptor );
return result;
}
I can't reproduce this behavior. From what I've tested, your function seems to work fine. Please correct me if I misunderstand but after running this test:
readNew(path.join(__dirname, 'test.txt'), 0).then((result) => {
console.log('Read: ' + result.bytesRead + ' bytes');
readNew(path.join(__dirname, 'test.txt'), result.bytesRead).then((result) => {
console.log('Read: ' + result.bytesRead + ' bytes');
});
});
The file test.txt in this case contains: A few bytes!
And this is my output:
Read: 13 bytes
Read: 0 bytes
Perhaps something goes wrong elsewhere in your code, such as supplying a position that is out of range.
EDIT:
Here's the full test using the mz library. It gives the same output as the other test using fs-extra.
const fs = require('mz/fs');
const path = require('path');
async function readNew(filename, mark) {
var fileDescriptor = await fs.open(filename, "r");
var buffer = new Buffer(32768);
var result = await fs.read(fileDescriptor, buffer, 0, buffer.length, mark);
await fs.close(fileDescriptor);
return result;
}
readNew(path.join(__dirname, 'test.txt'), 0).then(([bytesRead, buffer]) => {
console.log('Read: ' + bytesRead + ' bytes');
readNew(path.join(__dirname, 'test.txt'), bytesRead).then(([bytesRead, buffer]) => {
console.log('Read: ' + bytesRead + ' bytes');
});
});

sheet js xlsx writeFile callback

I am trying to write multiple csv files from a set of data that I have loaded using the sheet js library. My first attempt was like:
for (let i = 0; i < dataSetDivided.length; i++) {
let exportSet = dataSetDivided[i]
console.log(exportSet)
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + (i + 1) + ').csv'
XLSX.writeFile(wb, exportFileName)
}
With this code only some files are written I guess because the for loop doesn't wait for the file to be written before continuing.
So I am trying to write each file within a promise like below:
Promise.all(
dataSetDivided.map((exportSet, i) => {
return new Promise((resolve, reject) => {
console.log(exportSet)
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + (i + 1) + ').csv'
XLSX.writeFile(wb, exportFileName, (err) => {
if (err) {
console.log(err)
reject(err)
} else {
console.log('Created ' + exportFileName)
resolve()
}
})
})
})
)
.then(() => {
console.log('Created multiple files successfully')
})
.catch((err) => {
console.log('ERROR: ' + err)
})
But... this isn't working, again only some files are written and nothing is logged to the console. Can anyone give me any ideas how to make this work or a better way to achieve the goal of writing multiple files like this? There is a XLSX.writeFileAsync method but I can't find any examples of how it works and I'm not sure if that is what I need.
With thanks,
James
UPDATE:
I am now using setTimeout to delay the next writeFile call... this is working for my test cases but I am aware it isn't a good solution, would be much better to have a callback when the file is successfully written:
writeFileToDisk(dataSetDivided, i) {
if (dataSetDivided.length > 0) {
let exportSet = dataSetDivided[0]
let ws = XLSX.utils.json_to_sheet(exportSet, {header: finalHeaders})
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let todayDate = this.returnFormattedDate()
let originalFileName = this.state.fileName
let exportFileName = 'import_' + originalFileName + '_' + todayDate + '(part_' + i + ').csv'
XLSX.writeFile(wb, exportFileName)
dataSetDivided.splice(0, 1)
i += 1
setTimeout(() => {this.writeFileToDisk(dataSetDivided, i)}, 2500)
}
}
this.writeFileToDisk(dataSetDivided, 1)
Any suggestions how to get this working without simulating the file write time would be much appreciated.
I just tried this (first time) XLSX code and can confirm that it writes the expected workbooks and runs synchronously...
'use strict'
const XLSX = require('xlsx');
let finalHeaders = ['colA', 'colB', 'colC'];
let data = [
[ { colA: 1, colB: 2, colC: 3 }, { colA: 4, colB: 5, colC: 6 }, { colA: 7, colB: 8, colC: 9 } ],
[ { colA:11, colB:12, colC:13 }, { colA:14, colB:15, colC:16 }, { colA:17, colB:18, colC:19 } ],
[ { colA:21, colB:22, colC:23 }, { colA:24, colB:25, colC:26 }, { colA:27, colB:28, colC:29 } ]
];
data.forEach((array, i) => {
let ws = XLSX.utils.json_to_sheet(array, {header: finalHeaders});
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let exportFileName = `workbook_${i}.xls`;
XLSX.writeFile(wb, exportFileName)
});
Running this yields workbook_0.xls, workbook_1.xls, and workbook_2.xls, each with a single sheet entitled "SheetJS". They all look good in excel, for example, workbook_0 has...
I think you should do the writing asynchronously, and would suggest the following adaptation of the above ...
function writeFileQ(workbook, filename) {
return new Promise((resolve, reject) => {
// the interface wasn't clearly documented, but this reasonable guess worked...
XLSX.writeFileAsync(filename, workbook, (error, result) => {
(error)? reject(error) : resolve(result);
})
})
}
let promises = data.map((array, i) => {
let ws = XLSX.utils.json_to_sheet(array, {header: finalHeaders});
let wb = XLSX.utils.book_new()
XLSX.utils.book_append_sheet(wb, ws, "SheetJS")
let exportFileName = `workbook_${i}.xls`;
return writeFileQ(wb, exportFileName)
});
Promise.all(promises).then(result => console.log(result)).catch(error => console.log(error));
Running this async code, I found that it produced the same expected results and did so asynchronously.
So your original loop looks right, and should work synchronously. The fact that you aren't getting expected results must be caused by something apart from timing (or maybe some timing issue induced by react?).
In any event, if you do want to use the async approach, which I highly recommend, I've shown how to do that (but I worry that might not fully solve the problem unless you sort out what's happening with your first attempt).
XLSX.writeFileAsync does have a callback with the following syntax.
xlsx.writeFileAsync(workbookName, workbook, (err) => {
// It's a callback
});
But this will handle only writing one file asynchronously.
Your case is typical, if you want to do a series things in which each item is asynchronous, then you should not just use iterative methods like loops/map/forEach.
One best library I would suggest for this is 'async'.
'async.parallel' function which takes a array of functions which execute asynchronously and calls callback after all of them finished.
https://caolan.github.io/async/docs.html#parallel
If the concern is to use the library asynchronously for not blocking the server, you should know that this library implementation seems to be synchronous and you should check out the library's server demos README since it has several proposals to workaround this problem: https://github.com/SheetJS/sheetjs/tree/master/demos/server

How to get complete chunk of data using split from file system?

I have search feature implemented on fs so when i have input string from client i split files data based on each line, but if you see server.log if i pulled data based on line
it missed data from chunk that has multiple lines e.g you can see first event is two lines so based on my search it will return [2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue: { queue: 'd-email',
it will miss second line for that event. How can i get complete data may be based on time variable ?
searchService.js
async.eachSeries(filesData.logFiles, function(logfile, done) {
// read file
console.log('SearchEnv in eachSeries', filesData.searchEnv);
fs.createReadStream('./logs/' + filesData.searchEnv + '/' + logfile.filename)
.pipe(split())
.on('data', function(line) {
if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
else prevLine = line;
});
function parseLog(line, prev) {
// Very rudimentary check...
if (line.indexOf('|') === -1) line = prev + line;
// Parse as you were doing
var messageDateInfo = line.split('|')[0].replace(/[\[\]']+/g, '');
console.log('1st message date is', messageDateInfo)
messageDateInfo = new Date(messageDateInfo).getTime();
searchStartDate = new Date(searchStartDate).getTime();
searchEndDate = new Date(searchEndDate).getTime();
console.log('message date is', messageDateInfo)
console.log('start date is ', messageDateInfo - searchStartDate);
console.log('end date is ', searchEndDate - messageDateInfo);
if (messageDateInfo - searchStartDate > 0 && searchEndDate - messageDateInfo > 0) {
// console.log("message date is within this time range");
results.push({
filename: logfile.filename,
value: line
});
}
}
done();
}, function(err) {
if (err) {
console.log('error', err);
}
// wrong: results.map(result, function (result){
results.map(function(result) {
console.log('results');
});
// send back results
callback(results);
results = [];
logFiles = null;
});
}
server.log
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue: { queue: 'd-email',
msgId: '7eec01e9-6395-4fee-b44f-f09a40e56978' }
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.templateActor|Filter match for actor/rule (d-email/email_service) with msgId: 7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: 7eec01e9-6395-4fee-b44f-f09a40e56978|mailDispatcher|Received mail event. msgId=7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|mailDispatcher|Mail event with msgId 7eec01e9-6395-4fee-b44f-f09a40e56978 successful: 3 messages delivered
[2017-03-22T20:25:05Z]|zldv6658|verbose|bmid: n/a|routes.event|Received Event from IP (::ffff:130.9.137.139): 74609753-143b-4e06-845c-9a5721575c19
{"event":{"header":{"eventSource":"AOTSvTM","timestamp":1481966987000,"eventType":"http://aotsvtm.eventing.att.com/SendEscalationsEvent/V1","entityId":"ENTITYID_1"}
You can use the split module (similarly how I demonstrated in my other answer to your very similar question) with the fs module.
fs.createReadStream(file)
.pipe(split())
.on('data', function (line) {
//each chunk now is a seperate line!
});
See the docs: https://www.npmjs.com/package/split
If your log actually has multiline events, you could just keep the previous line(s) in memory while parsing. Also, don't just load the whole thing at once in memory. Use streams to reduce the strain on your machine.
let prevLine;
fs.createReadStream(file)
.pipe(split())
.on('data', function (line) {
if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
else prevLine = line;
});
});
function parseLog(line, prev) {
// Very rudimentary check...
if (line.indexOf('|') === -1) line = prev + line;
// Parse as you were doing
}
As a rule of thumb for the future, log files are much easier to manage when build with single-line-json.

Categories

Resources