PDF.js get images of one page and display them as HTML

PDF.js get images of one page and display them as HTML - javascript

according to this: [Extract images from PDF file with JavaScript
I tried to filter the JPEG images from PDF. It works in the way, that I get the name of the images of one page in an array.
What I want to do: Display the images next to the PDF as HTML . The solution mentioned above doesn't work, I do not know, how to get the image itself and not just the name.
<script type="text/javascript" src="pdf.js"></script>
<script type="text/javascript">
PDFJS.workerSrc = "/path/to/pdf.worker.js";
</script>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="the-canvas" style="width: 500px;"></canvas>
<div id="images"></div>
<script type="text/javascript">
PDFJS.workerSrc = "pdf.worker.js";
</script>
<script src="js.js"></script>
js.js:
// URL of PDF document
var url = "test/pdf_one.pdf";
var pageNum = 11;
var pageCount = 0;
loadPage(pageNum);
function loadPage(number){
// Asynchronous download PDF
PDFJS.getDocument(url)
.then(function(pdf) {
pageCount = pdf.numPages;
document.getElementById("page_num").innerHTML = "" + pageNum;
document.getElementById("page_count").innerHTML = "" + pageCount;
return pdf.getPage(number);
})
.then(function(page) {
// Set scale (zoom) level
var scale = 1.5;
// Get viewport (dimensions)
var viewport = page.getViewport(scale);
// Get canvas#the-canvas
var canvas = document.getElementById('the-canvas');
// Fetch canvas' 2d context
var context = canvas.getContext('2d');
// Set dimensions to Canvas
canvas.height = viewport.height;
canvas.width = viewport.width;
// Prepare object needed by render method
var renderContext = {
canvasContext: context,
viewport: viewport
};
// Render PDF page
page.render(renderContext);
getImages(page);
});
}
function getImages(page){
var test = [];
page.getOperatorList().then(function (ops) {
for (var i=0; i < ops.fnArray.length; i++) {
if (ops.fnArray[i] == PDFJS.OPS.paintJpegXObject) {
console.log(ops.argsArray[i][0]);
document.getElementById("images").innerHTML = "<img src='"+ops.argsArray[i][0]+".jpg'>";
test.push(ops.argsArray[i][0])
}
}
});
console.log(test);
if(pageNum === 13){
console.log("IMAGES");
console.log(test[0]);
document.getElementById("images").innerHTML = "<img src='"+test[0]+".jpg'>";
}
}
document.getElementById('prev').addEventListener('click', onPrevPage);
document.getElementById('next').addEventListener('click', onNextPage);
function onNextPage() {
if (pageNum >= pageCount) {
return;
}
pageNum++;
document.getElementById("page_num").innerHTML = "" + pageNum;
loadPage(pageNum);
}
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
document.getElementById("page_num").innerHTML = "" + pageNum;
loadPage(pageNum);
}

Related

How to fetch Google Drive PDF url using pdf.js script on new web page?

I have an example of two websites that uses google drive and whenever they have a new document they just change unique id (drive uploaded document) in URL (after ?id=) and than the documentis display in on a canvas. It's also SEO friendly and indexed by google. So what should be it's raw code?
Demo - Click Here | I want this One {Check Code by Inspect Tool}
Demo Website Image/Inspect code
I just want to use google drive for storage and call document on my website's pdf.js canvas for increase page view, user time, and display ads on PDF web pages like the demo that above.

Here are is the demo for PDF.JS showing PDF in the webpage
var url = 'https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf'; //Loaading the PDF from URL
// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];
// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
scale = 0.8,
canvas = document.getElementById('canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* #param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport({scale: scale});
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
#canvas {
border: 1px solid black;
}
button{
border: none;
background: #000;
color: #fff;
margin: 10px;
padding: 10px;
}
<script src="//mozilla.github.io/pdf.js/build/pdf.js"></script>
<h1>PDF.js Example</h1>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="canvas"></canvas>
For more details and documentation visit:
https://mozilla.github.io/pdf.js/getting_started/

Changed pdf URLs to see the result
Tested here:
https://codepen.io/max-makhrov/pen/JjpLxXP
Here're failed URLs:
https://drive.google.com/open?id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O
https://drive.google.com/1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O/export?exportFormat=pdf&format=pdf
https://drive.google.com/export?id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O
https://drive.google.com/uc?export=download&id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O&exportFormat=pdf&format=pdf
https://drive.google.com/uc?export=download&format=pdf&id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O
https://drive.google.com/uc?export?format=pdf&id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O
https://docs.google.com/viewer?srcid=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O&pid=explorer&efh=false&a=v&chrome=false&embedded=true
https://drive.google.com/file/d/1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O/
https://drive.google.com/file/export?format=pdf&id=1MUgtKfMqpyTtTFt2hEff23Emcg14Cj4O
Here's a surprise: you may use a range from existing Google Sheet if it has an Access. This URL works:
https://docs.google.com/spreadsheets/d/1qNQ6iCMoBCQgJzBB63ymtBQ6BedXZFhjgZZKGItjeVA/export?exportFormat=pdf&format=pdf&size=1.87x1.386&portrait=true&scale=2&top_margin=0&bottom_margin=0&left_margin=0&right_margin=0&sheetnames=false&printtitle=false&horizontal_alignment=LEFT&gridlines=false&fmcmd=12&gid=0&r1=1&r2=7&c1=1&c2=4

I added a demo to load pdf with lazy load. We need to implement drive api to fetch the pdf from drive or it will give cross-origin error.
<!DOCTYPE html>
<html>
<head>
<title>PDF Demo for Qdev</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.jsdelivr.net/npm/pdfjs-dist#2.2.228/build/pdf.min.js"></script>
<style>
body {
margin: 0;
background-color: #dfdfdf; /* Grey background */
}
.center {
text-align: center; /* center pages */
}
</style>
</head>
<body>
<div id="pages" class="center"></div>
<script>
// download the pdf
var pdfTask = pdfjsLib.getDocument("https://atikur-rabbi.github.io/data/sample.pdf");
// parse the get params
var queryString = window.location.hash.split("#")[1];
// default params
var params = {pg: 1};
var pagen = 0;
// if get params are present
if (queryString) {
// update the default settings
params = Object.assign(params, parseParams(queryString));
}
// Load the pages div from the DOM
var pages = document.getElementById("pages");
// handle the pdf once loaded (asyncronous)
pdfTask.promise.then(function(pdf) {
// globalize pdf
window.pdf = pdf;
// load the first page
loadPage(pdf, parseInt(params.pg));
pagen++;
});
// stores the page number of the page currently in the viewport
var curpage = parseInt(params.pg) + pagen;
// every half a second, check for scroll updates
setInterval(function() {
// if we're within 200px of the bottom of the page
if (document.body.scrollHeight - (window.scrollY + window.innerHeight) < 200) {
if (window.pdf != undefined) {
// load 5 more pages
for (var i=0; i<5; i++) {
loadPage(pdf, parseInt(params.pg)+pagen);
pagen++;
}
}
}
// if the page in our viewport has changed, update the url
if (curpage != parseInt(params.pg) + parseInt(window.scrollY /
(document.body.scrollHeight / pagen))) {
curpage = parseInt(params.pg) + parseInt(window.scrollY /
(document.body.scrollHeight / pagen));
window.location.hash = "#pg=" + curpage;
}
}, 500);
// render `pg` from `pdf` and append it to the pages div
function loadPage(pdf, pg){
pdf.getPage(pg).then(function(page){
var scale = 1.7;
var viewport = page.getViewport({scale: scale});
var canvas = document.createElement('canvas');
var context = canvas.getContext('2d');
canvas.id = "page-" + pg;
canvas.height += viewport.height;
canvas.width = viewport.width;
pages.appendChild(canvas);
var renderContext = {
canvasContext: context,
viewport: viewport
};
page.render(renderContext);
});
}
// parses get params from a query string into an object
function parseParams(queryString) {
var query = {};
var pairs = (queryString[0] === '?' ? queryString.substr(1) : queryString).split('&');
for (var i = 0; i < pairs.length; i++) {
var pair = pairs[i].split('=');
query[decodeURIComponent(pair[0])] = decodeURIComponent(pair[1] || '');
}
return query;
}
</script>
</body>
</html>

ReferenceError: PDFJS is not defined Asp.net

I'm trying to display a pdf using canvas and PDF.JS Stable v.2.2.228 but when I execute my webform it shows this error in the console: ReferenceError: PDFJS is not defined.
I read something about the global PDFJS object being removed but I can't find the correct syntax [Kinda new in JS]. Any suggestion is very appreciated
I was following this example in case is needed : https://usefulangle.com/post/20/pdfjs-tutorial-1-preview-pdf-during-upload-wih-next-prev-buttons
Js code:
function showPDF(pdf_url) {
PDFJS.getDocument({ url: pdf_url }).then(function (pdf_doc) {
__PDF_DOC = pdf_doc;
__TOTAL_PAGES = __PDF_DOC.numPages;
// Show the first page
showPage(1);
}).catch(function (error) {
// If error re-show the upload button
alert(error.message);
});;
}
function showPage(page_no) {
__PAGE_RENDERING_IN_PROGRESS = 1;
__CURRENT_PAGE = page_no;
__PDF_DOC.getPage(page_no).then(function (page) {
// As the canvas is of a fixed width we need to set the scale of the viewport accordingly
var scale_required = __CANVAS.width / page.getViewport(1).width;
// Get viewport of the page at required scale
var viewport = page.getViewport(scale_required);
// Set canvas height
__CANVAS.height = viewport.height;
var renderContext = {
canvasContext: __CANVAS_CTX,
viewport: viewport
};
page.render(renderContext).then(function () {
__PAGE_RENDERING_IN_PROGRESS = 0;
// Show the canvas and hide the page loader
$("#pdf-canvas").show();
});
});
}
function ValidateFileUpload() {
var fuData = document.getElementById('FileUpload1');
var FileUploadPath = fuData.value;
//To check if user upload any file
if (FileUploadPath == '') {
alert("Por favor subir un archivo");
} else {
var Extension = FileUploadPath.substring(
FileUploadPath.lastIndexOf('.') + 1).toLowerCase();
//The file uploaded is an image
if (Extension == "png" || Extension == "jpeg" || Extension == "jpg" || Extension == "gif" || Extension == "jfif") {
// To Display
if (fuData.files && fuData.files[0]) {
var reader = new FileReader();
reader.onload = function (e) {
$('#ImgPreview').attr('src', e.target.result);
//document.getElementById('ImgPreview').files[0].name;
var nombre= document.getElementById('ImgPreview').files[0].name;
document.querySelector('#LblFileupload').innerText = nombre;
}
reader.readAsDataURL(fuData.files[0]);
}
}
else if (Extension == "pdf") {
var __PDF_DOC,
__CURRENT_PAGE,
__TOTAL_PAGES,
__PAGE_RENDERING_IN_PROGRESS = 0,
__CANVAS = $('#pdf-canvas').get(0),
__CANVAS_CTX = __CANVAS.getContext('2d');
showPDF(URL.createObjectURL($("#FileUpload1").get(0).files[0]));
}
//The file upload is NOT an image
else {
alert("Solo se aceptan archivos en formato .JPG - .PNG - .JPEG - .GIF - .JFIF");
}
}
}
HTML :
<asp:FileUpload ID="FileUpload1" runat="server" accept="image/*" onchange="return ValidateFileUpload()" Visible="true" />
<asp:Image ID="ImgPreview" runat="server" Height="600px" Width="500px" />
<canvas id="pdf-canvas" width="400"> </canvas>

You should change PDFJS to pdfjsLib. Also you should try adding the lines var __CANVAS = $('#pdf-canvas').get(0); and var __CANVAS_CTX = __CANVAS.getContext('2d'); under __CURRENT_PAGE = page_no; in that function because you're not initializing those variables before you try to call and use them. Also you need to add var in front of the other two items you're using in that function at the top.
So it should look like:
pdfjsLib.getDocument(URL.createObjectURL($("#FileUpload1").get(0).files[0])).then(doc => {
console.log("This file has " + doc._pdfInfo.numPages + "pages");
doc.getPage(1).then(page => {
var myCanvas = document.getElementById('pdf-canvas');
var context = myCanvas.getContext('2d');
var viewport = page.getViewport(1);
myCanvas.width = viewport.width;
myCanvas.height = viewport.height;
page.render({
canvasContext: context,
viewport : viewport
}
);
});
});

capture whole div with svg in javascript

I want to capture whole div as image and save on local for proof.I have searched and read many articles about svg to image or div to image.
I have tried some js library for this.But when i try to capture image from div then some captures only div content and some captures only svg content.
s.jpg , a.jpg
html2canvas(contentDiv, {
onrendered: function(can) {
dirty.appendChild(can);
}
});
// first convert your svg to png
exportInlineSVG(svg, function(data, canvas) {
svg.parentNode.replaceChild(canvas, svg);
// then call html2canvas
html2canvas(contentDiv, {
onrendered: function(can) {
can.id = 'canvas';
clean.appendChild(can);
}
});
})
function exportInlineSVG(svg, receiver, params, quality) {
if (!svg || !svg.nodeName || svg.nodeName !== 'svg') {
console.error('Wrong arguments : should be \n exportSVG(SVGElement, function([dataURL],[canvasElement]) || IMGElement || CanvasElement [, String_toDataURL_Params, Float_Params_quality])')
return;
}
var xlinkNS = "http://www.w3.org/1999/xlink";
var clone;
// This will convert an external image to a dataURL
var toDataURL = function(image) {
var img = new Image();
// CORS workaround, this won't work in IE<11
// If you are sure you don't need it, remove the next line and the double onerror handler
// First try with crossorigin set, it should fire an error if not needed
img.crossOrigin = 'Anonymous';
img.onload = function() {
// we should now be able to draw it without tainting the canvas
var canvas = document.createElement('canvas');
var bbox = image.getBBox();
canvas.width = bbox.width;
canvas.height = bbox.height;
// draw the loaded image
canvas.getContext('2d').drawImage(this, 0, 0, bbox.width, bbox.height);
// set our original <image>'s href attribute to the dataURL of our canvas
image.setAttributeNS(xlinkNS, 'href', canvas.toDataURL());
// that was the last one
if (++encoded === total) exportDoc()
}
// No CORS set in the response
img.onerror = function() {
// save the src
var oldSrc = this.src;
// there is an other problem
this.onerror = function() {
console.warn('failed to load an image at : ', this.src);
if (--total === encoded && encoded > 0) exportDoc();
}
// remove the crossorigin attribute
this.removeAttribute('crossorigin');
// retry
this.src = '';
this.src = oldSrc;
}
// load our external image into our img
img.src = image.getAttributeNS(xlinkNS, 'href');
}
// The final function that will export our svgNode to our receiver
var exportDoc = function() {
// check if our svgNode has width and height properties set to absolute values
// otherwise, canvas won't be able to draw it
var bbox = svg.getBBox();
// avoid modifying the original one
clone = svg.cloneNode(true);
if (svg.width.baseVal.unitType !== 1) clone.setAttribute('width', bbox.width);
if (svg.height.baseVal.unitType !== 1) clone.setAttribute('height', bbox.height);
parseStyles();
// serialize our node
var svgData = (new XMLSerializer()).serializeToString(clone);
// remember to encode special chars
var svgURL = 'data:image/svg+xml; charset=utf8, ' + encodeURIComponent(svgData);
var svgImg = new Image();
svgImg.onload = function() {
// if we set a canvas as receiver, then use it
// otherwise create a new one
var canvas = (receiver && receiver.nodeName === 'CANVAS') ? receiver : document.createElement('canvas');
// IE11 doesn't set a width on svg images...
canvas.width = this.width || bbox.width;
canvas.height = this.height || bbox.height;
canvas.getContext('2d').drawImage(this, 0, 0, canvas.width, canvas.height);
// try to catch IE
try {
// if we set an <img> as receiver
if (receiver.nodeName === 'IMG') {
// make the img looks like the svg
receiver.setAttribute('style', getSVGStyles(receiver));
receiver.src = canvas.toDataURL(params, quality);
} else {
// make the canvas looks like the canvas
canvas.setAttribute('style', getSVGStyles(canvas));
// a container element
if (receiver.appendChild && receiver !== canvas)
receiver.appendChild(canvas);
// if we set a function
else if (typeof receiver === 'function')
receiver(canvas.toDataURL(params, quality), canvas);
}
} catch (ie) {
console.warn("Your ~browser~ has tainted the canvas.\n The canvas is returned");
if (receiver.nodeName === 'IMG') receiver.parentNode.replaceChild(canvas, receiver);
else receiver(null, canvas);
}
}
svgImg.onerror = function(e) {
if (svg._cleanedNS) {
console.error("Couldn't export svg, please check that the svgElement passed is a valid svg document.");
return;
}
// Some non-standard NameSpaces can cause this issues
// This will remove them all
function cleanNS(el) {
var attr = el.attributes;
for (var i = 0; i < attr.length; i++) {
if (attr[i].name.indexOf(':') > -1) el.removeAttribute(attr[i].name)
}
}
cleanNS(svg);
for (var i = 0; i < svg.children.length; i++)
cleanNS(svg.children[i]);
svg._cleanedNS = true;
// retry the export
exportDoc();
}
svgImg.src = svgURL;
}
// ToDo : find a way to get only usefull rules
var parseStyles = function() {
var styleS = [],i;
// transform the live StyleSheetList to an array to avoid endless loop
for (i = 0; i < document.styleSheets.length; i++)
styleS.push(document.styleSheets[i]);
// Do we have a `<defs>` element already ?
var defs = clone.querySelector('defs') || document.createElementNS('http://www.w3.org/2000/svg', 'defs');
if (!defs.parentNode)
clone.insertBefore(defs, clone.firstElementChild);
// iterate through all document's stylesheets
for (i = 0; i < styleS.length; i++) {
var style = document.createElement('style');
var rules = styleS[i].cssRules,
l = rules.length;
for (var j = 0; j < l; j++)
style.innerHTML += rules[j].cssText + '\n';
defs.appendChild(style);
}
// small hack to avoid border and margins being applied inside the <img>
var s = clone.style;
s.border = s.padding = s.margin = 0;
s.transform = 'initial';
}
var getSVGStyles = function(node) {
var dest = node.cloneNode(true);
svg.parentNode.insertBefore(dest, svg);
var dest_comp = getComputedStyle(dest);
var svg_comp = getComputedStyle(svg);
var mods = "";
for (var i = 0; i < svg_comp.length; i++) {
if (svg_comp[svg_comp[i]] !== dest_comp[svg_comp[i]])
mods += svg_comp[i] + ':' + svg_comp[svg_comp[i]] + ';';
}
svg.parentNode.removeChild(dest);
return mods;
}
var images = svg.querySelectorAll('image'),
total = images.length,
encoded = 0;
// Loop through all our <images> elements
for (var i = 0; i < images.length; i++) {
// check if the image is external
if (images[i].getAttributeNS(xlinkNS, 'href').indexOf('data:image') < 0)
toDataURL(images[i]);
// else increment our counter
else if (++encoded === total) exportDoc()
}
// if there were no <image> element
if (total === 0) exportDoc();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/0.4.1/html2canvas.min.js"></script>
<div id="contentDiv" style="width: 50%;">
<img class="" src="s.jpg" width="75%">
<svg xmlns="http://www.w3.org/2000/svg" id="svg" height="100%">
<defs>
<clipPath id="my-path">
<text id="texty" style="font-weight:bold;" x="60" y="300" font-size="60">test</text>
</clipPath>
</defs>
<image xlink:href="a.jpg" clip-path="url(#my-path)" width="100%" height="100%" id="filler" preserveAspectRatio="none"></image>
</svg>
</div>
<div id="clean">clean:<br></div>
<div id="dirty">dirty :<br></div>
<style type="text/css">
svg {
position: relative;
top: -531px;
left: 120px;
}
</style>
I have attached three images. s.jpg image is my main image which is inside in main div. It is main image where user can write their name with texture color. To write text i have used svg inside main div and for texture i have used a.jpg as hidden image.
I used html2canvas js library to convert div into image but i did not get my desired output. Please help me to find out solution for this problem. Thanx in advance

Try this, very easy to use and works everytime :
Dom-To-Image

you can use saveSvgAsPng.js where you have to pass the svg element id along with file name to the image
in my case
saveSvgAsPng(document.getElementById('canvassvg'), "structure.png");
which stores the image in your temp or browser downloads location
(OR)
if you want to store it on server
convert the innerhtml content of of your main tag , i.e., from svg to /svg to base64 and use the base64 to image converter in the backend
var svgData = new XMLSerializer().serializeToString( svgobj );
var canvas = document.createElement( "canvas" );
var ctx = canvas.getContext( "2d" );
var img = document.createElement( "img" );
function toSolidBytes(match, p1) {
return String.fromCharCode('0x' + p1);
})));
svgData= btoa(encodeURIComponent(svgData).replace(/%([0-9A-F]{2})/g,
function toSolidBytes(match, p1) {
return String.fromCharCode('0x' + p1);
}));
img.setAttribute( "src", "data:image/svg+xml;base64," + svgData );
var canvasd='';
img.onload = function() {
canvas.width = img.width;
canvas.height = img.height;
ctx.drawImage( img, 0, 0 );
canvasd = canvas.toDataURL( "image/svg+xml" );
-base64 data is in canvasd
-ajax call to store to backend
};
return;

For Loop - Changing src every loop

I'm trying to load pdfs and render every single page into a canvas.
It works when I'm only loading a one paged .pdf but not when I'm using a multiple page .pdf
The for-loop is supposed to run as often as there are pages in the pdf,
var total =
Every loop I'm creating a
<canvas class=''></canvas>
which should have the matching page number as class.
can.className = 'pdfpage' + i
But for some reason it always gets the total page count as class. E.g. when loading a five paged .pdf it creates...
<canvas class='pdfpage5'></canvas>
...5 times and not 'pdfpage0', 'pdfpage1', etc.
I'm pretty new to JS so I'm not exactly sure if I'm thinking in the right direction with 'i'. I added
console.log('i is: ' + i);
to confirm that i really adds +1 every turn.
<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script>
<script src="http://html2canvas.hertzen.com/build/html2canvas.js"></script>
<script type="text/javascript" src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<script type="text/javascript" src='./jsPDF/jspdf.js'></script>
<script src="./jsPDF/plugins/addimage.js"></script>
<script src="./jsPDF/plugins/png_support.js"></script>
<script src="./jsPDF/plugins/canvas.js"></script>
<script type="text/javascript" src='./FileSaver.js-master/FileSaver.js'></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript" src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<div id='1'>
<input id='pdf' type='file'/>
<script type="text/javascript">
PDFJS.disableWorker = true;
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
var total = pdf.numPages;
for (var i = 0; i < total; i++) {
console.log('i is: ' + i);
console.log('total pages: ' + total);
//
// Fetch the first page
//
console.log(pdf)
pdf.getPage(i + 1).then(function getPageHelloWorld(page) {
var scale = 1.0;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var div = document.getElementById('1');
var can = document.createElement('canvas');
can.className = 'pdfpage' +i;
div.appendChild(can);
canvas = document.getElementsByClassName('pdfpage' +i);
var context = canvas[0].getContext('2d');
canvas[0].height = viewport.height;
canvas[0].width = viewport.width;
//
// Render PDF page into canvas context
//
var task = page.render({canvasContext: context, viewport: viewport})
task.promise.then(function(){
console.log(canvas[0].toDataURL('image/jpeg'));
var dataURL = canvas[0].toDataURL('image/jpeg');
});
});
}
}, function(error){
console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
</script>
</div>

Try this, it should add the required the classnames
<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script>
<script src="http://html2canvas.hertzen.com/build/html2canvas.js"></script>
<script type="text/javascript" src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<script type="text/javascript" src='./jsPDF/jspdf.js'></script>
<script src="./jsPDF/plugins/addimage.js"></script>
<script src="./jsPDF/plugins/png_support.js"></script>
<script src="./jsPDF/plugins/canvas.js"></script>
<script type="text/javascript" src='./FileSaver.js-master/FileSaver.js'></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript" src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<div id='1'>
<input id='pdf' type='file'/>
<script type="text/javascript">
PDFJS.disableWorker = true;
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
var total = pdf.numPages;
for (var i = 0; i < total; i++) {
console.log('i is: ' + i);
console.log('total pages: ' + total);
//
// Fetch the first page
//
console.log(pdf)
pdf.getPage(i + 1).then(function getPageHelloWorld(page) {
var scale = 1.0;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var div = document.getElementById('1');
var can = document.createElement('canvas');
can.className = 'pdfpage' +(page.pageIndex + 1);
div.appendChild(can);
canvas = document.getElementsByClassName('pdfpage' +(page.pageIndex + 1));
var context = canvas[0].getContext('2d');
canvas[0].height = viewport.height;
canvas[0].width = viewport.width;
//
// Render PDF page into canvas context
//
var task = page.render({canvasContext: context, viewport: viewport})
task.promise.then(function(){
console.log(canvas[0].toDataURL('image/jpeg'));
var dataURL = canvas[0].toDataURL('image/jpeg');
});
});
}
}, function(error){
console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
</script>
</div>
Basically, all I have done is replace i with page.pageIndex +1 in the callback function

Is it possible to cache canvas pages?

I am using the code in http://jsfiddle.net/epistemex/LUNaJ/
PDFJS.disableWorker = true; // due to CORS
var canvas = document.createElement('canvas'), // single off-screen canvas
ctx = canvas.getContext('2d'), // to render to
pages = [],
currentPage = 1,
url = 'http://www.corsproxy.com/www.ohio.edu/technology/training/upload/Java-Script-Reference-Guide.pdf';
PDFJS.getDocument(url).then(function (pdf) {
PROGRESS.max = pdf.numPages; // just for demo
PROGRESS.value = 1; // just for demo
// init parsing of first page
if (currentPage <= pdf.numPages) getPage();
// main entry point/function for loop
function getPage() {
// when promise is returned do as usual
pdf.getPage(currentPage).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
// now, tap into the returned promise from render:
page.render(renderContext).then(function() {
// store compressed image data in array
pages.push(canvas.toDataURL());
if (currentPage < pdf.numPages) {
currentPage++;
PROGRESS.value = currentPage; // just for demo
getPage(); // get next page
}
else {
done(); // call done() when all pages are parsed
}
});
});
}
});
function done() {
// NOTE: Just for demo - correct order is not guaranteed here
// as the drawPage is async. use same method as above to make
// sure the order is correct (not for-loop, but use the callback
// to get next page). To present a single page it won't be
// a problem though... (just use drawPage() directly)
for(var i = 0; i < pages.length; i++) {
drawPage(i, addPage);
}
}
function addPage(img) {
img.style.width = '100px';
img.style.height = '120px';
document.body.appendChild(img);
}
function drawPage(index, callback) {
var img = new Image;
img.onload = function() {
ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
callback(this); // invoke callback when we're done
}
img.src = pages[index]; // start loading the data-uri as source
}
to render pdf pages to canvas. The problem with this is that it takes along time if the pdf file has large number of files. Is it possible to cache/save these generated files in the users computer/bowser so that if he runs the code a secondary time, he doesn't have to generate them again and instead they can be displayed immediately.

No, dataURI are not "cached" by browser since there is no download involved.
What you can do however, is to store all your pages into a globally accessible array, and check if you already have it before you call PDFJS.getDocument(url) :
PDFJS.disableWorker = true; // due to CORS
var canvas = document.createElement('canvas'), // single off-screen canvas
ctx = canvas.getContext('2d'), // to render to
docs = {}, // an object that will store our pdf documents
urls = ["https://www.ohio.edu/technology/training/upload/html-tag-reference-guide.pdf", "https://www.ohio.edu/technology/training/upload/Java-Script-Reference-Guide.pdf"];
btn0.onclick = getDoc;
btn1.onclick = getDoc;
function getDoc() {
// get the doc's url
var url = urls[+this.id.split('btn')[1]];
// clear the result div
result.innerHTML = '';
// we already have it
if (docs[url]) {
// simply call the callback
done(docs[url]);
}
else {
// create our array for this document
docs[url] = [];
// download and parse the doc
PDFJS.getDocument(url).then(function(pdf) {
PROGRESS.max = pdf.numPages; // just for demo
PROGRESS.value = 1; // just for demo
var currentPage = 1;
// init parsing of first page
if (currentPage <= pdf.numPages) getPage();
// main entry point/function for loop
function getPage() {
// when promise is returned do as usual
pdf.getPage(currentPage).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
// now, tap into the returned promise from render:
page.render(renderContext).then(function() {
// store compressed image data in array
docs[url].push(canvas.toDataURL());
if (currentPage < pdf.numPages) {
currentPage++;
PROGRESS.value = currentPage; // just for demo
getPage(); // get next page
} else {
done(docs[url]); // call done() when all pages are parsed
}
});
});
}
});
}
}
function done(pages) {
for (var i = 0; i < pages.length; i++) {
drawPage(pages[i], addPage);
}
}
function addPage(img) {
img.style.width = '100px';
img.style.height = '120px';
result.appendChild(img);
}
function drawPage(dataURI, callback) {
var img = new Image;
img.onload = function() {
ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
callback(this); // invoke callback when we're done
}
img.src = dataURI; // start loading the data-uri as source
}
#PROGRESS {
width: 100%
}
<script src="https://rawgit.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<button id="btn0">1st Doc</button>
<button id="btn1">2nd Doc</button>
<progress id="PROGRESS" value=0></progress>
<div id="result"></div>

Develop Reference

JavaScript is the programming language of the Web.

PDF.js get images of one page and display them as HTML - javascript

Related

How to fetch Google Drive PDF url using pdf.js script on new web page?

ReferenceError: PDFJS is not defined Asp.net

capture whole div with svg in javascript

For Loop - Changing src every loop

Is it possible to cache canvas pages?

Categories

Resources