read CAR file using js-car - javascript

I have a CAR file object in javascript and want to read it using js-car github. But I keep getting unexpected end of the file error. Here is my code I am trying
let arrayBuffer = await files[0].arrayBuffer();
let bytes=new Uint8Array(carFile);
const reader = await CarReader.fromBytes(bytes) //throws error here
const indexer = await CarIndexer.fromBytes(bytes) //throws error here
I also tired this
let str = await files[0].stream()
const reader = await CarReader.fromIterable(files[0].stream()) //throws error here
and none of them work. However with the same file this code works
const inStream = fs.createReadStream('test.car')
const reader = await CarReader.fromIterable(inStream)
I checked and I know that CarReader.fromBytes needs a Unit8Arrey and I am sure files[0] is not null. Does anyone knows what I am missing here?

for the people might face similar issue in future this is my solution:
I used res.body directly and converted it to an async stream and read it using fromIterable
async function* streamAsyncIterator(stream) {
// Get a lock on the stream
const reader = stream.getReader();
try {
while (true) {
// Read from the stream
const { done, value } = await reader.read();
// Exit if we're done
if (done) return;
// Else yield the chunk
yield value;
}
}
finally {
reader.releaseLock();
}
}
const info = await w3StorageClient.status(response)
if (info) {
// Fetch and verify files from web3.storage
const res = await w3StorageClient.get(response);
const reader = await CarReader.fromIterable(streamAsyncIterator(res.body))
// read the list of roots from the header
const roots = await reader.getRoots()
// retrieve a block, as a { cid:CID, bytes:UInt8Array } pair from the archive
const got = await reader.get(roots[0])
// also possible: for await (const { cid, bytes } of CarIterator.fromIterable(inStream)) { ... }
let decoded = cbor.decode(got.bytes)
console.log('Retrieved [%s] from example.car with CID [%s]',
decoded,
roots[0].toString())
}

Related

How to wait till I get the secret values from Keyvault in Node JS?

I am fairly new to Javascript and I understand that it executes asynchronously. I tried using the callback method to fetch the secret values and then execute next block of code. But it is not waiting.
This is the function that fetches the keyvault secret values
function getsecret_values(client,secret_name,callback) {
let val = []
for (let i =0;i<secret_name.length;i++){
client.getSecret(secret_name[i]).then((latestSecret) => {
val[i] = latestSecret.value;
})
}
callback(val)
}
I am calling getsecret_values function from main block
let vaultName = result.database;
const url = `https://${vaultName}.vault.azure.net`;
const credential = new ClientSecretCredential(result.host, result.user, result.password);
const client = new SecretClient(url, credential);
let secret_values = []
getsecret_values(client, secrets, function(result) {
secret_values = result
console.log(secret_values)
});
console.log(secret_values)
\\next code block
Both the console.log returns empty array.
I want my code to wait till the secret values are fetched and put into secret_values array and then proceed to next block of code. How do I achieve this?
the easiest way is to use Async Await pattern, which uses promises in the background. Trying not to change your code much:
async function getsecret_values(client,secret_name) {
let val = []
for (let i =0;i<secret_name.length;i++){
const latestSecret = await client.getSecret(secret_name[i])
val[i] = latestSecret.value;
}
return val
}
in your main block:
getsecret_values(client, secrets).then(function(result) {
secret_values = result
console.log(secret_values)
});
console.log(secret_values) // will still be an empty array as the then function has not been executed yet....
my approach would be:
async function getsecret_values(client,secret_name) {
let val = []
for (let i =0;i<secret_name.length;i++){
const latestSecret = await client.getSecret(secret_name[i])
val[i] = latestSecret.value;
}
return val
}
// main:
async function main() {
let vaultName = result.database;
const url = `https://${vaultName}.vault.azure.net`;
const credential = new ClientSecretCredential(result.host, result.user, result.password);
const client = new SecretClient(url, credential);
const secret_values = await getsecret_values(client, secrets)
console.log(secret_values)
}
main()

Fetch text from url then stringify it

So i have just started to learn javascript today and i'm trying to fetch text from a url and then split and stringify it using josn.
Then i'm trying to pick a random proxy from the stringify json and log it in my self invoking function but it logs as undefined and i can't workout what i'm doing wrong and i was hoping maybe someone could tell me what i'm doing wrong.
My code:
const doFetch = async () => {
try {
let proxies = [];
let socks = await fetch("https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt");
let response = await socks.text();
let list = response.toString().split('\n');
for (let i = 0; i < list.length; i++) {
let splitText = list[i].split(':');
proxies.push(JSON.stringify({'type': 'socks4', 'ip': splitText[0], 'port': splitText[1]}));
}
return proxies
} catch (error) {
console.log('fetch error:', error.message);
}
}
(async () => {
let proxies = await doFetch();
let proxie = proxies[Math.floor(Math.random() * proxies.length)];
console.log(proxie); // logs fine
console.log(proxie.type, proxie.ip, proxie.port); // logs as undefined
})();
proxie.type, proxie.ip, proxie.port are undefined because proxie is a string and not an object. Rewrite your code in this way:
(async () => {
let proxies = await doFetch();
let proxie = proxies[Math.floor(Math.random() * proxies.length)];
console.log(proxie); // logs fine
let proxieObj = JSON.parse(proxie);
console.log(proxieObj.type, proxieObj.ip, proxieObj.port);
})();

Node does not wait for loop to complete

I have tried async/await & using promises however I cannot get this code to execute in order.
The code iterates through a document and parses it before saving it to an array, then saving the array to .json file.
The code continues to run before the loop finishes however which means it writes an empty file as the parsing has not been completed.
Turning it into an async function to await does not solve the issue. Nor does returning a promise and then using .then() to execute final code. It still runs straight away.
const fs = require('fs');
const cheerio = require('cheerio');
const mammoth = require("mammoth");
const articleFolder = './Articles/';
var allArticles = [];
const extractDocuments = async () => {
let files = fs.readdirSync(articleFolder);
for(const file of files) {
await convertToHTML(file);
}
completedExtraction();
}
async function convertToHTML(filename) {
var filepath = articleFolder + filename;
mammoth.convertToHtml({path: filepath})
.then(function(result){
let html = result.value; // The generated HTML
let messages = result.messages; // Any messages, such as warnings during conversion
updateArticles(filename, html);
})
.done();
}
function updateArticles (filename, html) {
var article = {
file: filename,
content: parseHTML(html)
}
allArticles.push(article);
}
function parseHTML (html) {
let $ = cheerio.load(html);
let title = $('h3').first().text();
let date = $('h3:eq(1)').text();
$('h3').slice(0,2).remove()
let content = $('body').html();
let parsedArticle = {
title: title,
date: date,
content: content
}
return parsedArticle;
}
function completedExtraction() {
fs.writeFile('./articles.json', JSON.stringify(allArticles), (err)=>{
if (err) throw err;
console.log('File Written.');
});
console.log('Finished.');
}
extractDocuments();
To solve with map I would do something similar to:
const extractDocuments = async () => {
let files = fs.readdirSync(articleFolder);
const articlePromises = files.map(async file => {
const html = await convertToHTML(file)
return {
filename: file,
html: html
}
})
allArticles = await Promise.all(articlePromises)
completedExtraction();
}
async function convertToHTML(filename) {
var filepath = articleFolder + filename;
return mammoth.convertToHtml({path: filepath})
.then(function(result){
let html = result.value; // The generated HTML
let messages = result.messages; // Any messages, such as warnings during conversion
return html
})
.done();
}
So to wrap up extractDocuments uses a map to iterate and create articles. convertToHTML only returns the created HTML and nothing more. We no longer use the updateArticles since this is now handled in the extractDocuments
Hopes this helps a bit. Hope it points you in the right direction

Making any reference to Nodejs' process.argv causes errors in unexpected place (reading a file)

I am writing code that generates a very large JSON object, saves it to a file, then loads the file and inserts the data into a Mongo collection. I want to pass a string from the command line when calling the script that I use to set the file name, as well as the collection name. I call it like so: node --max-old-space-size=8192 data_generator.js foo 1000000.
The code fails with error ENOENT: no such file or directory, open 'foo.json' on the third line of the function gen_collection() where I set the variable data. This error does not appear when a file foo.json already exists, even if it is empty. Before it fails, the code successfully creates a file foo.json but it contains only an empty array [].
The code fails with this same exact error when I include any reference to process.argv. This includes when I try to set any variable to a value from the process.argv array. The code works when I set the variables fname as const fname = "foo" and size as const size = 0. However, even if the only reference I have to process.argv is in a console.log i.e. adding console.log(process.argv[2] to main(), it fails with the exact same error as above.
Here is the code I am trying to run:
const { MongoClient } = require("mongodb");
const fs = require('fs');
const bjson = require('big-json');
async function main() {
const uri = "my db uri";
const client = new MongoClient(uri);
const fname = process.argv[2];
const size = parseInt(process.argv[3]);
// const fname = 'small'
// const size = 1
try {
await client.connect({ useUnifiedTopology: true });
await write_data_to_disk(fname, size);
await gen_collection(client, fname);
} catch (e) {
console.error(e);
} finally {
await client.close();
}
};
// generate data as json aray and write to local file
async function write_data_to_disk(fname, size) {
let arr = [];
for (let i = 0; i < size; i++) {
let doc = gen_document();
arr.push(doc);
}
const strStream = bjson.createStringifyStream({
body: arr
})
let logger = fs.createWriteStream(`${fname}.json`);
strStream.on('data', (d) => {
logger.write(d);
})
};
async function gen_collection(client, fname) {
let db = client.db('test');
let collection = db.collection(fname);
let data = JSON.parse(fs.readFileSync(`${fname}.json`, 'utf8')); // ERROR APPEARS ON THIS LINE
bulkUpdateOps = [];
data.forEach((doc) => {
bulkUpdateOps.push({"insertOne": {"document": doc}});
if (bulkUpdateOps.length === 1000) {
collection.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
})
if (bulkUpdateOps.length > 0) {
collection.bulkWrite(bulkUpdateOps);
}
};
function gen_document() {
// returns json object
};
You're doing
await write_data_to_disk(...)
but that function doesn't return a promise that is connected to when it's done. So, you're trying to read the resulting file BEFORE it has been created or before it has valid content in it and thus the ENOENT error as the file doesn't yet exist when you're trying to read from it in the following function.
Writestreams do not play nicely with promises unless you wrap them in your own promise that resolves when you are completely done writing to the stream and the file has been closed.
Also, you probably want to just .pipe() strStream to the logger stream. Much easier and you can then just monitor when that pipe() operation is done to resolve the promise you wrap around that operation.
You can promisify write_data_to_disk() like this:
// generate data as json aray and write to local file
function write_data_to_disk(fname, size) {
return new Promise((resolve, reject) => {
const arr = [];
for (let i = 0; i < size; i++) {
let doc = gen_document();
arr.push(doc);
}
const strStream = bjson.createStringifyStream({ body: arr });
const dest = fs.createWriteStream(`${fname}.json`, {emitClose: true});
// monitor for completion and errors
dest.on('error', reject).on('close', resolve);
strStream.on('error', reject);
// pipe all the content from strStream to the dest writeStream
strStream.pipe(dest);
});
}
Since this returns a promise that is truly tied to when the write operation is done, you can then use await write_data_to_disk(...).

How to use ES8 async/await with streams?

In https://stackoverflow.com/a/18658613/779159 is an example of how to calculate the md5 of a file using the built-in crypto library and streams.
var fs = require('fs');
var crypto = require('crypto');
// the file you want to get the hash
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
fd.on('end', function() {
hash.end();
console.log(hash.read()); // the desired sha1sum
});
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
But is it possible to convert this to using ES8 async/await instead of using the callback as seen above, but while still keeping the efficiency of using streams?
The await keyword only works on promises, not on streams. There are ideas to make an extra stream-like data type that would get its own syntax, but those are highly experimental if at all and I won't go into details.
Anyway, your callback is only waiting for the end of the stream, which is a perfect fit for a promise. You'd just have to wrap the stream:
var fd = fs.createReadStream('/some/file/name.txt');
var hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
fd.pipe(hash);
var end = new Promise(function(resolve, reject) {
hash.on('end', () => resolve(hash.read()));
fd.on('error', reject); // or something like that. might need to close `hash`
});
There also exists a helper function to do just that in more recent versions of nodejs - pipeline from the stream/promises module:
import { pipeline } from 'node:stream/promises';
const fd = fs.createReadStream('/some/file/name.txt');
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
// read all file and pipe it (write it) to the hash object
const end = pipeline(fd, hash);
Now you can await that promise:
(async function() {
let sha1sum = await end;
console.log(sha1sum);
}());
If you are using node version >= v10.0.0 then you can use stream.pipeline and util.promisify.
const fs = require('fs');
const crypto = require('crypto');
const util = require('util');
const stream = require('stream');
const pipeline = util.promisify(stream.pipeline);
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
async function run() {
await pipeline(
fs.createReadStream('/some/file/name.txt'),
hash
);
console.log('Pipeline succeeded');
}
run().catch(console.error);
Node V15 now has a promisfiy pipeline in stream/promises.
This is the cleanest and most official way.
const { pipeline } = require('stream/promises');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
We all should appreciate how much works it's done here:
Capture errors in all the streams.
Destroy unfinished streams when error is raised.
Only return when the last writable stream is finished.
This pipe thing is one of the most powerful feature Node.JS has. Making it fully async is not easy. Now we have it.
Something like this works:
for (var res of fetchResponses){ //node-fetch package responses
const dest = fs.createWriteStream(filePath,{flags:'a'});
totalBytes += Number(res.headers.get('content-length'));
await new Promise((resolve, reject) => {
res.body.pipe(dest);
res.body.on("error", (err) => {
reject(err);
});
dest.on("finish", function() {
resolve();
});
});
}
2021 Update:
New example from Node documentation:
async function print(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
console.log(data);
}
see https://nodejs.org/api/stream.html#stream_readable_symbol_asynciterator
I would comment, but don't have enough reputation.
A WORD OF CAUTION:
If you have an application that is passing streams around AND doing async/await, be VERY CAREFUL to connect ALL pipes before you await. You can end up with streams not containing what you thought they did. Here's the minimal example
const { PassThrough } = require('stream');
async function main() {
const initialStream = new PassThrough();
const otherStream = new PassThrough();
const data = [];
otherStream.on('data', dat => data.push(dat));
const resultOtherStreamPromise = new Promise(resolve => otherStream.on('end', () => { resolve(Buffer.concat(data)) }));
const yetAnotherStream = new PassThrough();
const data2 = [];
yetAnotherStream.on('data', dat => data2.push(dat));
const resultYetAnotherStreamPromise = new Promise(resolve => yetAnotherStream.on('end', () => { resolve(Buffer.concat(data2)) }));
initialStream.pipe(otherStream);
initialStream.write('some ');
await Promise.resolve(); // Completely unrelated await
initialStream.pipe(yetAnotherStream);
initialStream.end('data');
const [resultOtherStream, resultYetAnotherStream] = await Promise.all([
resultOtherStreamPromise,
resultYetAnotherStreamPromise,
]);
console.log('other stream:', resultOtherStream.toString()); // other stream: some data
console.log('yet another stream:', resultYetAnotherStream.toString()); // yet another stream: data
}
main();
I believe it will be helpful for someone:
async function readFile(filename) {
let records = []
return new Promise(resolve => {
fs.createReadStream(filename)
.on("data", (data) => {
records.push(data);
})
.on("end", () => {
resolve(records)
});
})
}

Categories

Resources