Read pdf file with node JS - javascript

I am trying to read pdf file fro url as follows
const axios = require("axios");
const jsdom = require("jsdom");
const PdfReader = require('pdfreader').PdfReader;
const { JSDOM } = jsdom;
axios.get("https://url-to-pdf.pdf").then(function(result) {
new PdfReader().parseBuffer(result.data, function(err, item) {
if (err)
console.log(err);
else if (item.text)
console.log(item.text);
});
}).catch(function(err) {
});
It shows
An error occurred while parsing the PDF: stream must have data
{
parserError: 'An error occurred while parsing the PDF: stream must have data'
}
How to solve this issue.

The key point is to ask a responseType of arraybuffer but then you have to transform it into a Buffer.
try {
var options = {
method: 'get',
url: url,
headers: { 'User-Agent': 'PostmanRuntime/7.26.8' },
timeout: 2000,
responseEncoding: 'utf8',
maxRedirects: 5,
httpAgent: new http.Agent({ keepAlive: true }),
responseType: 'arraybuffer'
}
let response = await axios(options);
console.log(response.status);
console.log(response.headers['content-type']);
if (response.headers['content-type'].indexOf('pdf') != -1) {
console.log("pdf");
console.log(typeof response.data);
var buff = new Buffer.alloc(0);
buff = Buffer.concat([buff, response.data]);
temp = await extract_pdf.readlines(buff).catch(function (o) { console.log(o); return; });
console.log(temp);
}
} catch (error) {
if (error.response) {
}
else {
console.log(error);
}
}

Related

Return a response's body when using the await/async operators

I'm trying to convert a function that returns a Promise, to one that that uses await/async (which returns an implicit Promise, I understand).
This function "wraps" a call to the https.request object, the key part being the resolve(body) that returns the response's body:
promise_it = (data) => {
// throws FATAL
if (!data) {throw new Error('data is missing');}
return new Promise((resolve, reject) => {
let postData = JSON.stringify(data);
let options = {
hostname: 'httpbin.org',
port: 443,
path: '/post',
method: 'POST',
headers: {
// 'Content-Type': 'application/json',
// Accept: 'application/json'
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': postData.length
}
};
const https = require('https');
let req = https.request(options, (res) => {
let body = [];
res.on('data', function(chunk) {
body.push(chunk);
});
res.on('end', function() {
try {
body = JSON.parse(Buffer.concat(body).toString());
}
catch(e) {
console.error(e.message)
reject(e);
}
resolve(body);
});
});
req.on('error', (e) => {
console.error(e);
reject(e)
});
req.write(postData);
req.end();
});
}
try {
data = {firstName: 'Donald', lastName: 'Duck'}
const who = promise_it(data)
who
.then(r => console.info('INFO:',r))
.catch(e => console.error('ERROR:',e));
}
catch (error) {
console.error('FATAL:',error)
}
The function works as expected.
I translated this into an async/await function:
async_it = async (data) => {
if (!data) {throw new Error('data is missing');}
let postData = JSON.stringify(data);
let options = {
hostname: 'httpbin.org',
port: 443,
path: '/post',
method: 'POST',
headers: {
// 'Content-Type': 'application/json',
// Accept: 'application/json'
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': postData.length
}
};
const https = require('https');
let req = https.request(options, (res) => {
let body = [];
res.on('data', function(chunk) {
body.push(chunk);
});
res.on('end', function() {
try {
body = JSON.parse(Buffer.concat(body).toString());
// how do i return the body?
}
catch(e) {
console.error('HERE:',e.message)
throw e;
}
});
});
req.on('error', (e) => {
console.error('THERE:',e.message)
});
req.write(postData);
req.end();
}
(async () => {
try {
data = {firstName: 'Donald', lastName: 'Duck'}
const who = await async_it(data)
console.info('INFO:',who)
}
catch (error) {
console.error('FATAL:',error)
}
})();
But I can't seem to find a way to return the body. What am I missing?
Unfortunately https.request uses the "last parameter callback" Node.js style, so there is no possibility to use it as-is with async/await.
You could make a wrapper like in nodejs - How to promisify http.request? reject got called two times for example, and then you can use it like:
try {
const body = await httpsRequest(options, postData);
return body;
} catch (e) {
console.error('THERE:',e.message);
}
But the mentioned wrapper is not much less than your current one (Promise version).

How to get multiple values out of resolve() and reject()?

I would like both resolve() to return {valid_to: cert.valid_to, statusCode, statusMessage} and reject() should return {error: -1, statusCode, statusMessage}.
Question
How can I do that, when statusCode, statusMessage are in a different scope?
const https = require('https');
(async () => {
const options = {
hostname: "github.com",
port: 443,
path: '/',
method: 'GET',
timeout: 1000
};
options.agent = new https.Agent(options);
let valid_to = await new Promise((resolve, reject) => {
const req = https.request({
...options, checkServerIdentity: function (host, cert) {
resolve(cert.valid_to);
}
}).on('error', error => {
reject(-2);
});
req.on("timeout", chunk => {
reject(-1);
});
req.on('response', response => {
console.log(response.statusCode);
console.log(response.statusMessage);
});
req.end();
}).catch(error => {
console.log(error);
return -3;
});
})();
I will do something like this.
Edit: You need to specify res.on('data') in the https.request Object. Otherwise, timeout will always emit because there is no activity from the stream.
You can resolve in res.on("data") or res.on("end") and it is up to your use case.
res is an IncomingMessage object is created by http.ClientRequest and passed as the first argument to the 'request' and 'response' event respectively.
req is A reference to the original http.ClientRequest.
Both streams can emit events and you may handle them separately.
Also, when you reject the Promise, you actually cannot get the statusCode and StatusMessage from the req because there is an error in the req and the .on("response") will not be emitted. So, you need to customize the statusCode and statusMessage yourself.
const https = require("https");
// {valid_to: cert.valid_to, statusCode, statusMessage}
// {error: -1, statusCode, statusMessage}.
(async () => {
const options = {
hostname: "githubasdfa.com",
port: 443,
path: "/",
method: "GET",
timeout: 1000,
};
options.agent = new https.Agent(options);
try {
const response = await new Promise((resolve, reject) => {
let valid_to;
let statusCode;
let statusMessage;
const req = https
.request(
{
...options,
checkServerIdentity: function (host, cert) {
valid_to = cert.valid_to;
},
},
res => {
res.on("data", chunk => {
resolve({
valid_to,
statusCode,
statusMessage,
});
});
res.on("end", () => {
console.log("No more data in response.");
});
}
)
.on("error", err => {
console.log(err);
reject({
error: -2,
statusCode: "custom code",
statusMessage: "unhandled error",
});
})
.on("timeout", chunk => {
reject({
error: -1,
statusCode: "custom code",
statusMessage: "unhandled error",
});
})
.on("response", response => {
statusCode = response.statusCode;
statusMessage = response.statusMessage;
})
.end();
});
console.log(response);
} catch (error) {
console.log(error);
}
})();

How to upload file to Dropbox with axios

I need to upload file to Dropbox with axios. Here is my code:
const uploadToExternalService = async function uploadToExternalService(token, content) {
try {
let res = await axios({
url: 'https://api-content.dropbox.com/1/files_put/auto/'+'file_name',
method: 'put',
// timeout: 8000,
headers: {
Authorization: 'Bearer ' + token,
'Content-Type': 'text/plain',
body: content
}
})
if(res.status == 200){
// test for status you want, etc
console.log(res.status)
}
if(res.status == 400){
console.log(res)
}
return res.data
}
catch (err) {
console.error(err);
}
}
uploadToExternalService(SECRET_KEY, req.file).then(res => console.log(res));
I'm getting error Request failed with status code 400
Eventually I managed to find a solution using dropbox-v2-api. Hopefully this answer will provide a helpful code example for other community members although the solution was implemented w/o axios
import dropboxV2Api from "dropbox-v2-api";
import fs from "fs";
// authentication
const dropbox = dropboxV2Api.authenticate({
token: DROPBOX_SECRET_KEY
});
//configuring parameters
const params = Object.freeze({
resource: 'files/upload',
parameters: {
path: '/file_name.docx'
},
readStream: fs.createReadStream(filePath)
// filePath: path to the local file that we want to upload to Dropbox
});
let dropboxPromise = new Promise(function(resolve, reject) {
dropbox(params, function(err, result) {
if (err) {
reject(err);
} else {
resolve(result);
}
});
});
await dropboxPromise.then(function (resultObj) {
console.log("fileUpload_OK")
}).catch(function(err){
console.log(err.message)
});
You are using dropbox v1 APIs which are officially retired. Why not use v2?
For your problem, try sending the body outside of headers
headers: {
Authorization: 'Bearer ' + token,
'Content-Type': 'text/plain'
},
body: content
corrected code:
const uploadToExternalService = async function uploadToExternalService(token, content) {
try {
let res = await axios({
url: 'https://api-content.dropbox.com/1/files_put/auto/'+'file_name',
method: 'put',
// timeout: 8000,
headers: {
Authorization: 'Bearer ' + token,
'Content-Type': 'text/plain'
},
body: content
})
if(res.status == 200){
// test for status you want, etc
console.log(res.status)
}
if(res.status == 400){
console.log(res)
}
return res.data
}
catch (err) {
console.error(err);
}
}
uploadToExternalService(SECRET_KEY, req.file).then(res => console.log(res));
The Issue
The example cURL from the Dropbox documentation is:
curl -X POST https://content.dropboxapi.com/2/files/upload \
--header "Authorization: Bearer " \
--header "Dropbox-API-Arg: {\"path\": \"/Homework/math/Matrices.txt\",\"mode\": \"add\",\"autorename\": true,\"mute\": false,\"strict_conflict\": false}" \
--header "Content-Type: application/octet-stream" \
--data-binary #local_file.txt
--data-binary means the /upload endpoint requires the file to be send as binary data. In Axios, it seems the only way to do this is with the FormData() interface.
But, using the FormData() interface requires using Content-Type: multipart/form-data. The /upload endpoint requires Content-Type: application/octet-stream.
Therefore, I do not think uploading using Axios is possible in this situation.
The Alternative Solution
dropbox-v2-api is not an official API for Dropbox and there's no explanation I could find for uploading files more than 150MB. So, instead, I would use dropbox-sdk-js. The example they give for /upload is:
function uploadFile() {
const UPLOAD_FILE_SIZE_LIMIT = 150 * 1024 * 1024;
var ACCESS_TOKEN = document.getElementById('access-token').value;
var dbx = new Dropbox.Dropbox({ accessToken: ACCESS_TOKEN });
var fileInput = document.getElementById('file-upload');
var file = fileInput.files[0];
if (file.size < UPLOAD_FILE_SIZE_LIMIT) { // File is smaller than 150 Mb - use filesUpload API
dbx.filesUpload({path: '/' + file.name, contents: file})
.then(function(response) {
var results = document.getElementById('results');
var br = document.createElement("br");
results.appendChild(document.createTextNode('File uploaded!'));
results.appendChild(br);
console.log(response);
})
.catch(function(error) {
console.error(error);
});
} else { // File is bigger than 150 Mb - use filesUploadSession* API
const maxBlob = 8 * 1000 * 1000; // 8Mb - Dropbox JavaScript API suggested max file / chunk size
var workItems = [];
var offset = 0;
while (offset < file.size) {
var chunkSize = Math.min(maxBlob, file.size - offset);
workItems.push(file.slice(offset, offset + chunkSize));
offset += chunkSize;
}
const task = workItems.reduce((acc, blob, idx, items) => {
if (idx == 0) {
// Starting multipart upload of file
return acc.then(function() {
return dbx.filesUploadSessionStart({ close: false, contents: blob})
.then(response => response.session_id)
});
} else if (idx < items.length-1) {
// Append part to the upload session
return acc.then(function(sessionId) {
var cursor = { session_id: sessionId, offset: idx * maxBlob };
return dbx.filesUploadSessionAppendV2({ cursor: cursor, close: false, contents: blob }).then(() => sessionId);
});
} else {
// Last chunk of data, close session
return acc.then(function(sessionId) {
var cursor = { session_id: sessionId, offset: file.size - blob.size };
var commit = { path: '/' + file.name, mode: 'add', autorename: true, mute: false };
return dbx.filesUploadSessionFinish({ cursor: cursor, commit: commit, contents: blob });
});
}
}, Promise.resolve());
task.then(function(result) {
var results = document.getElementById('results');
results.appendChild(document.createTextNode('File uploaded!'));
}).catch(function(error) {
console.error(error);
});
}
return false;
}

Trying to call a function and returning the data in a post method

I am new to Nodejs, so finding difficult to achieve the above-mentioned task. Trying to call an async function and return its value in response for a POST method
module.exports.pdf = async(event, context, callBack) => {
const data = {
title: " Pdf generation using puppeteer",
text: " Handlebar is awesome!"
}
const executablePath = event.isOffline ?
"./node_modules/puppeteer/.local-chromium/mac-674921/chrome-mac/Chromium.app/Contents/MacOS/Chromium" :
await chromium.executablePath;
const file = fs.readFileSync(path.resolve(__dirname, "template.hbs"), 'utf8')
const template = handlebars.compile(file)
const html = template(data)
let browser = null;
try {
browser = await chromium.puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath,
headless: chromium.headless
});
const page = await browser.newPage();
page.setContent(html);
const pdf = await page.screenshot({ encoding: "base64" })
// const pdf = await page.pdf({
// format: "A4",
// printBackground: true,
// margin: { top: "1cm", right: "1cm", bottom: "1cm", left: "1cm" }
// });
// TODO: Response with PDF (or error if something went wrong )
const response = {
headers: {
"Content-type": "application/json",
"content-disposition": "attachment; filename=test.pdf"
},
statusCode: 200,
body: pdf.toString("base64"),
isBase64Encoded: true
};
const output_filename = 'pdf-demo.json';
const s3Params = {
Bucket: "pdf-demo-screenshot",
Key: `public/pdfs/${output_filename}`,
Body: pdf,
ContentType: "application/json",
ServerSideEncryption: "AES256"
};
s3.putObject(s3Params, err => {
if (err) {
console.log("err", err);
return callBack(null, { error });
}
});
context.succeed(response);
} catch (error) {
return context.fail(error);
} finally {
if (browser !== null) {
await browser.close();
}
}
};
I have the above function. I have a global variable let img_arr = []. And the below POST method where I want to call the above function and push the base64 data into the array and return the array in the response.
app.use('/screenshot', function(req, res) {
res.send(img_arr)})

Amazon S3 Remote File Upload with Axios

I am trying to write a function that would:
Take a remote URL as a parameter,
Get the file using axios
Upload the stream to amazon s3
And finally, return the uploaded url
I found help here on stackoverflow. So far, I have this:
/*
* Method to pipe the stream
*/
const uploadFromStream = (file_name, content_type) => {
const pass = new stream.PassThrough();
const obj_key = generateObjKey(file_name);
const params = { Bucket: config.bucket, ACL: config.acl, Key: obj_key, ContentType: content_type, Body: pass };
s3.upload(params, function(err, data) {
if(!err){
return data.Location;
} else {
console.log(err, data);
}
});
return pass;
}
/*
* Method to upload remote file to s3
*/
const uploadRemoteFileToS3 = async (remoteAddr) => {
axios({
method: 'get',
url: remoteAddr,
responseType: 'stream'
}).then( (response) => {
if(response.status===200){
const file_name = remoteAddr.substring(remoteAddr.lastIndexOf('/')+1);
const content_type = response.headers['content-type'];
response.data.pipe(uploadFromStream(file_name, content_type));
}
});
}
But uploadRemoteFileToS3 does not return anything (because it's a asynchronous function). How can I get the uploaded url?
UPDATE
I have further improved upon the code and wrote a class. Here is what I have now:
const config = require('../config.json');
const stream = require('stream');
const axios = require('axios');
const AWS = require('aws-sdk');
class S3RemoteUploader {
constructor(remoteAddr){
this.remoteAddr = remoteAddr;
this.stream = stream;
this.axios = axios;
this.config = config;
this.AWS = AWS;
this.AWS.config.update({
accessKeyId: this.config.api_key,
secretAccessKey: this.config.api_secret
});
this.spacesEndpoint = new this.AWS.Endpoint(this.config.endpoint);
this.s3 = new this.AWS.S3({endpoint: this.spacesEndpoint});
this.file_name = this.remoteAddr.substring(this.remoteAddr.lastIndexOf('/')+1);
this.obj_key = this.config.subfolder+'/'+this.file_name;
this.content_type = 'application/octet-stream';
this.uploadStream();
}
uploadStream(){
const pass = new this.stream.PassThrough();
this.promise = this.s3.upload({
Bucket: this.config.bucket,
Key: this.obj_key,
ACL: this.config.acl,
Body: pass,
ContentType: this.content_type
}).promise();
return pass;
}
initiateAxiosCall() {
axios({
method: 'get',
url: this.remoteAddr,
responseType: 'stream'
}).then( (response) => {
if(response.status===200){
this.content_type = response.headers['content-type'];
response.data.pipe(this.uploadStream());
}
});
}
dispatch() {
this.initiateAxiosCall();
}
async finish(){
//console.log(this.promise); /* return Promise { Pending } */
return this.promise.then( (r) => {
console.log(r.Location);
return r.Location;
}).catch( (e)=>{
console.log(e);
});
}
run() {
this.dispatch();
this.finish();
}
}
But still have no clue how to catch the result when the promise is resolved. So far, I tried these:
testUpload = new S3RemoteUploader('https://avatars2.githubusercontent.com/u/41177');
testUpload.run();
//console.log(testUpload.promise); /* Returns Promise { Pending } */
testUpload.promise.then(r => console.log); // does nothing
But none of the above works. I have a feeling I am missing something very subtle. Any clue, anyone?
After an upload you can call the getsignedurl function in s3 sdk to get the url where you can also specify the expiry of the url as well. You need to pass the key for that function. Now travelling will update with example later.
To generate a simple pre-signed URL that allows any user to view the
contents of a private object in a bucket you own, you can use the
following call to getSignedUrl():
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myKey'};
s3.getSignedUrl('getObject', params, function (err, url) {
console.log("The URL is", url);
});
Official documentation link
http://docs.amazonaws.cn/en_us/AWSJavaScriptSDK/guide/node-examples.html
Code must be something like this
function uploadFileToS3AndGenerateUrl(cb) {
const pass = new stream.PassThrough();//I have generated streams from file. Using this since this is what you have used. Must be a valid one.
var params = {
Bucket: "your-bucket", // required
Key: key , // required
Body: pass,
ContentType: 'your content type',
};
s3.upload(params, function(s3Err, data) {
if (s3Err) {
cb(s3Err)
}
console.log(`File uploaded successfully at ${data.Location}`)
const params = {
Bucket: 'your-bucket',
Key: data.key,
Expires: 180
};
s3.getSignedUrl('getObject', params, (urlErr, urlData) => {
if (urlErr) {
console.log('There was an error getting your files: ' + urlErr);
cb(urlErr);
} else {
console.log(`url: ${urlData}`);
cb(null, urlData);
}
})
})
}
Please check i have update your code might its help you.
/*
* Method to upload remote file to s3
*/
const uploadRemoteFileToS3 = async (remoteAddr) => {
const response = await axios({
method: 'get',
url: remoteAddr,
responseType: 'stream'
})
if(response.status===200){
const file_name = remoteAddr.substring(remoteAddr.lastIndexOf('/')+1);
const content_type = response.headers['content-type'];
response.data.pipe(uploadFromStream(file_name, content_type));
}
return new Promise((resolve, reject) => {
response.data.on('end', (response) => {
console.log(response)
resolve(response)
})
response.data.on('error', () => {
console.log(response);
reject(response)
})
})
};
*
* Method to pipe the stream
*/
const uploadFromStream = (file_name, content_type) => {
return new Promise((resolve, reject) => {
const pass = new stream.PassThrough();
const obj_key = generateObjKey(file_name);
const params = { Bucket: config.bucket, ACL: config.acl, Key: obj_key, ContentType: content_type, Body: pass };
s3.upload(params, function(err, data) {
if(!err){
console.log(data)
return resolve(data.Location);
} else {
console.log(err)
return reject(err);
}
});
});
}
//call uploadRemoteFileToS3
uploadRemoteFileToS3(remoteAddr)
.then((finalResponse) => {
console.log(finalResponse)
})
.catch((err) => {
console.log(err);
});

Categories

Resources