Node.JS JavaScript fetching PDF contents prints PDFPage Formats instead - javascript

I'm trying to fetch the text contents of the first page of a PDF file using NPM node module 'PDF-lib'.
However when I fetch the contents and print the results, I instead get an array of data that looks something like below;
Could you please help me spot the problem?
Thanks in advance!
The results I get after printing look like this. What I want to fetch are the actual text contents of the PDF page.
PDFPage {
fontSize: 24,
fontColor: { type: 'RGB', red: 0, green: 0, blue: 0 },
lineHeight: 24,
x: 0,
y: 0,
node: PDFPageLeaf {
dict: Map(8) {
[PDFName] => [PDFName],
[PDFName] => [PDFRef],
[PDFName] => [PDFDict],
[PDFName] => [PDFArray],
[PDFName] => [PDFRef],
[PDFName] => [PDFDict],
[PDFName] => [PDFName],
[PDFName] => [PDFNumber]
},
...
...
...
The Code:
const { resolve } = require('path');
const { PDFDocument } = require('pdf-lib'); // Library for reading PDF file
const fs = require('fs');
async function readDataset() {
try {
// Get PDF Page
const content = await PDFDocument.load(fs.readFileSync(resolve(`./app/assets/pdfs/np.pdf`)));
// Get page contents
const contentPages = content.getPages();
let pageContent = contentPages[0];
// Return data found on first page
return pageContent;
}
catch (err) {
return err;
}
}
// Read data from dataset
let dataset = await readDataset();

Not generally possible at present (2021 ) with this library see current Limitations this info is also on the npm page at https://www.npmjs.com/package/pdf-lib#limitations
#1
pdf-lib can extract the content of text fields (see PDFTextField.getText), but it cannot extract plain text on a page outside of a form field. This is a difficult feature to implement, but it is within the scope of this library and may be added to pdf-lib in the future. See #93, #137, #177, #329, and #380.
For future visitors always check the link above for current status.

Related

Render docx in React js

I would like to properly render a docx file in React JS with the correct formatting, as it would appear in Word or a similar service. Currently, when displaying the text, all formatting is removed and appears as plain text. I obtain the file from the server, and process it, by:
const url = "http://localhost:8080/files/aboutme.docx";
axios.get(url, {
responseType: 'arraybuffer',
}).then(response => {
var doc = new Docxtemplater(new PizZip(response.data), {
delimiters: {
start: 'ran',
end: 'ran'
}
});
var text = doc.getFullText();
setAboutMe(text);
})
I am using the Docxtemplater and PizZip libraries.
Docxtemplater is
a library to generate docx/pptx documents from a docx/pptx template
If you need to render a docx file I think you should use react-doc-viewer. Then you could write something like:
import DocViewer from "react-doc-viewer";
function App() {
const doc = [{ uri: "http://localhost:8080/files/aboutme.docx" }];
return <DocViewer documents={doc} />;
}

How to commit a folder and open a Pull Request via Github API?

I want to commit a pipeline config for a user via the Github API.
So far I am able to commit just a file named main.yaml into the root directory of the repo but I need the file to sit within .github/workflows/main.yaml.
The code I have so far is this:
const commitWorkflowConfig = async ({
ownerName,
repoName
}) => {
const pipelineConfig = fs.readFileSync(__dirname + '/../../../templates/main.yaml', { encoding: "utf-8" })
// Create Blob from the content
const blob = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/blobs`, {
content: pipelineConfig,
encoding: "utf-8"
})
// Get last commit hash of Master
const {
commit: {
sha: masterSha
}
} = await axios.get(`https://api.github.com/repos/${ownerName}/${repoName}/branches/master`)
// Create new branch from master
const branch = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/refs`,
{
"ref": "refs/heads/workflow-pipeline",
"sha": masterSha
})
// Create commit to new branch
const commit = await axios.put(`https://api.github.com/repos/${ownerName}/${repoName}/contents/main.yaml`, {
message: "New commit",
content: pipelineConfig,
sha: blob.sha,
branch: "workflow-pipeline"
})
// Open Pull Request
const response = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/pulls`, {
title: "New PR",
head: "workflow-pipeline",
base: "master"
})
} const commitWorkflowConfig = async ({
ownerName,
repoName
}) => {
const pipelineConfig = fs.readFileSync(__dirname + '/../../../templates/main.yaml', { encoding: "utf-8" })
// Create Blob from the content
const blob = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/blobs`, {
content: pipelineConfig,
encoding: "utf-8"
})
// Get last commit hash of Master
const {
commit: {
sha: masterSha
}
} = await axios.get(`https://api.github.com/repos/${ownerName}/${repoName}/branches/master`)
// Create new branch from master
const branch = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/refs`,
{
"ref": "refs/heads/workflow-pipeline",
"sha": masterSha
})
// Create commit to new branch
const commit = await axios.put(`https://api.github.com/repos/${ownerName}/${repoName}/contents/main.yaml`, {
message: "New commit",
content: pipelineConfig,
sha: blob.sha,
branch: "workflow-pipeline"
})
// Open Pull Request
const response = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/pulls`, {
title: "New PR",
head: "workflow-pipeline",
base: "master"
})
}
This feels like a lot of API calls all for just committing one file and opening a PR, I suspect I am doing something wrong?
When I create the new commit, it doesn't allow me to add any path or add .github/workflows/main.yaml to the end of the URL as I get a 404. Is there any way I can update this commit to commit to a folder instead of the root directory?
To summarise, How can I make a simple commit of .github/workflows/main.yaml to a new branch and open a PR for it?
Create tree example:
const tree = await this.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/trees`, {
base_tree: masterSha,
tree: [
{
path: ".github",
mode: "040000",
type: "tree"
},
{
path: ".github/workflow",
mode: "040000",
type: "tree"
},
{
path: ".github/workflow/main.yaml",
mode: "100755",
type: "tree",
content: pipelineConfig
},
]
})
Creating the tree
const { tree } = await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/trees`, {
base_tree: masterSha,
tree: [
{
path: "workflows/main.yaml",
mode: "100644",
type: "blob",
sha
}
]
})
const workflowTree = tree.find(t => t.path === "workflows");
await axios.post(`https://api.github.com/repos/${ownerName}/${repoName}/git/trees`, {
base_tree: masterSha,
tree: [
{
path: ".github/workflows",
mode: workflowTree.mode, // "040000"
type: workflowTree.type, // "tree"
sha: workflowTree.sha
}
]
})
The following code does not use the GitHub API properly
// Create commit to new branch
const commit = await axios.put(`https://api.github.com/repos/${ownerName}/${repoName}/contents/main.yaml`, {
message: "New commit",
content: pipelineConfig,
sha: blob.sha,
branch: "workflow-pipeline"
})
You cannot directly edit the contents of a file. You need to use the trees API to create an entirely new tree based off of the original tree.
Steps
Create the blob for the new file (https://docs.github.com/en/rest/reference/git#create-a-blob). You already did this step good job.
Get the tree that you want to make your new branch based off of (https://docs.github.com/en/rest/reference/repos#get-a-branch). Note that you have to get the tree sha, not the commit sha.
Create a new tree with that file added (https://docs.github.com/en/rest/reference/git#create-a-tree). I think this step will be the most complicated, because for each 'folder' tree created, a parent 'folder' tree will also need to be created to contain the newly created 'folder' tree. So if you want to modify the .github/workflows folder, you'll first have to create a new tree based on.github/workflows. Let's say that tree sha was abc.... Then you'll need to create a new tree based on the .github folder, and make the workflows dir abc..., and not the old one.
Create a commit (https://docs.github.com/en/rest/reference/git#create-a-commit). Use the sha of the root tree you created in the previous step.
Create a new branch (https://docs.github.com/en/rest/reference/git#create-a-reference). In the question code, you created it before you created the commit, which doesn't make sense. You need to create it after you create the commit, so that it's head will point to the sha of the commit you created.
Create the pull request (https://docs.github.com/en/rest/reference/pulls#create-a-pull-request). You already have this step in your code.
Here is a visual that explains the steps 2 and 3 for adding a main.yml file to .github/workflows:
Original tree | New Tree (but the '.github' tree references ^b, not b)
- sha: a --> - sha: ^a
- files: - files:
- .github --> - .github (but the 'workflows' tree references ^c, not c)
- sha: b - sha: ^b
- files: - files
- workflows --> - workflows (but with the main.yml)
- sha: c - sha: ^c
- files: - files:
- main.yml (reference the sha of the blob you created)
...
...
...
There are three -->s in the visual. Each --> is a request to make.
Start with creating the ^c tree, which is based off of the c tree and has the added main.yml file.
Create the ^b tree, which is based off of b but has ^c in it.
Create the ^a tree, which is based off of a but has ^b in it.
And those are the steps to creating a simple simple, but complicated to create pull request.
It's surprising how many API calls are needed for this. 5 + {how deep the file you want to add is}
I hope this can help you, I am working on the same topic where I need to update files inside .github/workflows folder by API and, I think I found a better and easy way.
I don't know since when but, with this endpoint https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents with the right permissions (scope workflow in your authToken), you can create and update files into the .github/workflows folder.
// to create a new file
await octokit.request('PUT /repos/{owner}/{repo}/contents/{path}', {
owner: 'octocat',
repo: 'hello-world',
path: '.github/workflows/your-file.yml',
message: 'Your commit message',
content: 'contentInBase64'
});
// to update a file
// get the sha of the file that you want to update
const {
data: contentData
} = await octokit.request('GET /repos/{owner}/{repo}/contents/{path}', {
owner: 'octocat',
repo: 'hello-world',
path: '.github/workflows/your-file.yml',
});
const shaOfCurrentFileToUpdate = contentData.sha;
await octokit.request('PUT /repos/{owner}/{repo}/contents/{path}', {
owner: 'octocat',
repo: 'hello-world',
path: '.github/workflows/your-file.yml',
message: 'Your new commit message',
content: 'newContentInBase64'
sha: shaOfCurrentFileToUpdate
});
With this, I removed a lot of lines of code and solved the issue on my side, I hope is helpful for you too.

Slash Commands Attachments

for those who still use v12, I updated the function a while ago, it sends files with it.
async function send(interaction, content) {
const { data, files } = await Discord.APIMessage.create(client.channels.resolve(interaction.channel_id), content)
.resolveData()
.resolveFiles();
return client.api.interactions(interaction.id, interaction.token).callback.post({
data: {
type: 4,
data: { ...data, files: files }
}, files
});
}
could someone help me with this function, i am working with slash commands (commands with /) and i am not able to send files, nor files in embeds. I would not like to know how to adapt to include files, or if you can already send as you are going to send. (I am very noob still in question of these commands)
async function msg(i, c) {
const m = await Discord.APIMessage.create(client.channels.resolve(i.channel_id), c)
.resolveData()
.resolveFiles();
return { ...m.data, files: m.files }
}
i = interaction, c = content
this function is part of a handler of mine, it works to send embeds and collect arguments, but I wanted to implement image manipulation in my commands.

Excel file contains {"sharedString":0} instead of actual value

I am trying to read from a excel file, manipulate and create another excel file from it, i am using stream support for this. Most of it is working fine but i see resultant excel file containing {"sharedString":0} instead of actual values.
Below is my relevant code
let ws = fs.createWriteStream(fpath);
const workbook = new ExcelJS.stream.xlsx.WorkbookWriter({stream: ws, useStyles: true, useSharedStrings: true});
const myworksheet = workbook.addWorksheet('sheet1');
const workbookReader = new ExcelJS.stream.xlsx.WorkbookReader(sheet.path, options);
workbookReader.read();
workbookReader.on('worksheet', worksheet => {
worksheet.on('row', row => {
myworksheet.addRow(row.values);
});
});
workbookReader.on('shared-strings', sharedString => {
console.log('not coming here');
});
workbookReader.on('end', async () => {
console.log('processing done...');
await workbook.commit();
});
Please see the attached file for your reference.
Any help on how to fix this will be really great, Thanks.
Once i created the WorkbookReader with below options
const options = {
sharedStrings: 'cache',
hyperlinks: 'cache',
worksheets: 'emit',
styles: 'cache',
};
it worked!

File upload blocking with bigger files

Intro
What I'm trying to achieve is a simple file upload with a progress indication with redux-saga and react). I'm having problems getting this indication because the file upload seems the be blocking - which it shouldn't be.
Expected behaviour
before the file upload starts a re render is triggered and the spinner is shown and the window is not blocked.
Current behaviour
What I have at the moment is a component with a table that show a file per row. A optimistic row gets added with a spinner as the content when the users uploads a file. As soon as the file is uploaded the optimistic row will be replaced by a real row with the file's name etc. When I'm uploading a file around 50MB the window gets blocked and shortly before the file is uploaded (around 0.5s before) the spinner appears and then the file is already uploaded and the spinner disappears again.
side notes
If you replace the file upload with new Promise(res => setTimeout(res, 5000)) it all works fine => it seems like there is a problem with the xhr / fetch.
I've implemented the same using XHR, promises and an onProgress callback to make sure the problem is not fetch.
the implementation looks very close to: https://gist.github.com/robinfehr/2f4018259bf026a468cc31100fed5c9f
Also with this implementation I've experienced the same issue - blocking until almost the end of the upload.
If I put log statements into the render function of the component to see if it's getting re rendered before the file is uploaded, I see (as soon as the block stops and the file is uploaded) that the log statements in the render function are actually correctly triggered with a timestamp before the file upload was done.
In this implementation I'm using the same reducer: optimistic event as well as the real event that reverts the optimistic event, they go trough the same reducer (named fileReducer here).
using a second reducer and concatination instead of the optimistic revert logic helps to displaying the spinner earlier but does not help with the blocking. It therefore seems like the middleware also gets blocked by the blocking call.
saga: (postData uses fetch)
function* createDocument(partnerId, { payload, meta }) {
const siteId = getSiteIdFromRoute();
const {
mediaGroupId,
customArticleId,
logicalComponentId,
type,
name,
documentSrc,
meta: metaFromFrontEnd
} = payload;
const commonEventId = uuid();
const hans = {
optimistic: true
};
const payloadBasic = {
id: commonEventId,
version: 0,
aggregate: {
id: uuid(),
name: 'document'
},
context: {
name: 'contentManagement'
},
payload: {
name,
type,
links: {
partnerId,
siteId,
logicalComponentId,
customArticleId,
mediaGroupId
}
}
};
// creates the optimistic (fake) row with a spinner in the file list component - action marked as optimistic which will be reverted.
yield put(actions.denormalizeEvent({
...payloadBasic,
name: 'documentCreated',
optimistic: true,
payload: {
...payloadBasic.payload,
uploading: true
}
}));
yield fork(executeDocumentUpload, type, siteId, partnerId, documentSrc, payloadBasic);
}
function* executeDocumentUpload(type, siteId, partnerId, documentSrc, payloadBasic) {
const req = yield call(uploadDocument, type, siteId, partnerId, documentSrc);
const body = yield req.json();
const { meta: metaFromFileUpload, id } = body.response;
// removes the optimistic (fake) row from the file list component and and adds the real row with more file information (optimistic event gets reverted in middleware)
yield put(actions.sendCommandSuccess({
...payloadBasic,
name: 'createDocument',
payload: {
...payloadBasic.payload,
meta: metaFromFileUpload
}
}));
}
function uploadDocument(type, siteId, partnerId, documentSrc) {
let url;
if (type === 'site' || type === 'mediaGroup' || type === 'logicalComponent') {
url = `/file/site/${siteId}/document`;
} else if (type === 'customArticle') {
url = `/file/partner/${partnerId}/document`;
}
return postData(url, documentSrc);
}
The problem was that I did send the file as a base64 encode string and set up the request with the wrong content-type.
'Content-Type': 'text/plain;charset=UTF-8'
putting the file into a FormData object and send the request without the mentioned content-type lead to a non-blocking request.

Categories

Resources