I am trying to develop a Video Calling/Conferencing application using WebRTC and node.js.
Right now there is no facility to control bandwidth during during video call. Is there any way to control/reduce bandwidth.
(like I want make whole my web application to work on 150 kbps while video conferencing).
Any suggestions are highly appreciated.
Thanks in advance.
Try this demo. You can inject bandwidth attributes (b=AS) in the session descriptions:
audioBandwidth = 50;
videoBandwidth = 256;
function setBandwidth(sdp) {
sdp = sdp.replace(/a=mid:audio\r\n/g, 'a=mid:audio\r\nb=AS:' + audioBandwidth + '\r\n');
sdp = sdp.replace(/a=mid:video\r\n/g, 'a=mid:video\r\nb=AS:' + videoBandwidth + '\r\n');
return sdp;
}
// ----------------------------------------------------------
peer.createOffer(function (sessionDescription) {
sessionDescription.sdp = setBandwidth(sessionDescription.sdp);
peer.setLocalDescription(sessionDescription);
}, null, constraints);
peer.createAnswer(function (sessionDescription) {
sessionDescription.sdp = setBandwidth(sessionDescription.sdp);
peer.setLocalDescription(sessionDescription);
}, null, constraints);
b=AS is already present in sdp for data m-line; its default value is 50.
Updated at Sept 23, 2015
Here is a library that provides full control over both audio/video tracks' bitrates:
// here is how to use it
var bandwidth = {
screen: 300, // 300kbits minimum
audio: 50, // 50kbits minimum
video: 256 // 256kbits (both min-max)
};
var isScreenSharing = false;
sdp = BandwidthHandler.setApplicationSpecificBandwidth(sdp, bandwidth, isScreenSharing);
sdp = BandwidthHandler.setVideoBitrates(sdp, {
min: bandwidth.video,
max: bandwidth.video
});
sdp = BandwidthHandler.setOpusAttributes(sdp);
Here is the library code. Its quite big but it works!
// BandwidthHandler.js
var BandwidthHandler = (function() {
function setBAS(sdp, bandwidth, isScreen) {
if (!!navigator.mozGetUserMedia || !bandwidth) {
return sdp;
}
if (isScreen) {
if (!bandwidth.screen) {
console.warn('It seems that you are not using bandwidth for screen. Screen sharing is expected to fail.');
} else if (bandwidth.screen < 300) {
console.warn('It seems that you are using wrong bandwidth value for screen. Screen sharing is expected to fail.');
}
}
// if screen; must use at least 300kbs
if (bandwidth.screen && isScreen) {
sdp = sdp.replace(/b=AS([^\r\n]+\r\n)/g, '');
sdp = sdp.replace(/a=mid:video\r\n/g, 'a=mid:video\r\nb=AS:' + bandwidth.screen + '\r\n');
}
// remove existing bandwidth lines
if (bandwidth.audio || bandwidth.video || bandwidth.data) {
sdp = sdp.replace(/b=AS([^\r\n]+\r\n)/g, '');
}
if (bandwidth.audio) {
sdp = sdp.replace(/a=mid:audio\r\n/g, 'a=mid:audio\r\nb=AS:' + bandwidth.audio + '\r\n');
}
if (bandwidth.video) {
sdp = sdp.replace(/a=mid:video\r\n/g, 'a=mid:video\r\nb=AS:' + (isScreen ? bandwidth.screen : bandwidth.video) + '\r\n');
}
return sdp;
}
// Find the line in sdpLines that starts with |prefix|, and, if specified,
// contains |substr| (case-insensitive search).
function findLine(sdpLines, prefix, substr) {
return findLineInRange(sdpLines, 0, -1, prefix, substr);
}
// Find the line in sdpLines[startLine...endLine - 1] that starts with |prefix|
// and, if specified, contains |substr| (case-insensitive search).
function findLineInRange(sdpLines, startLine, endLine, prefix, substr) {
var realEndLine = endLine !== -1 ? endLine : sdpLines.length;
for (var i = startLine; i < realEndLine; ++i) {
if (sdpLines[i].indexOf(prefix) === 0) {
if (!substr ||
sdpLines[i].toLowerCase().indexOf(substr.toLowerCase()) !== -1) {
return i;
}
}
}
return null;
}
// Gets the codec payload type from an a=rtpmap:X line.
function getCodecPayloadType(sdpLine) {
var pattern = new RegExp('a=rtpmap:(\\d+) \\w+\\/\\d+');
var result = sdpLine.match(pattern);
return (result && result.length === 2) ? result[1] : null;
}
function setVideoBitrates(sdp, params) {
params = params || {};
var xgoogle_min_bitrate = params.min;
var xgoogle_max_bitrate = params.max;
var sdpLines = sdp.split('\r\n');
// VP8
var vp8Index = findLine(sdpLines, 'a=rtpmap', 'VP8/90000');
var vp8Payload;
if (vp8Index) {
vp8Payload = getCodecPayloadType(sdpLines[vp8Index]);
}
if (!vp8Payload) {
return sdp;
}
var rtxIndex = findLine(sdpLines, 'a=rtpmap', 'rtx/90000');
var rtxPayload;
if (rtxIndex) {
rtxPayload = getCodecPayloadType(sdpLines[rtxIndex]);
}
if (!rtxIndex) {
return sdp;
}
var rtxFmtpLineIndex = findLine(sdpLines, 'a=fmtp:' + rtxPayload.toString());
if (rtxFmtpLineIndex !== null) {
var appendrtxNext = '\r\n';
appendrtxNext += 'a=fmtp:' + vp8Payload + ' x-google-min-bitrate=' + (xgoogle_min_bitrate || '228') + '; x-google-max-bitrate=' + (xgoogle_max_bitrate || '228');
sdpLines[rtxFmtpLineIndex] = sdpLines[rtxFmtpLineIndex].concat(appendrtxNext);
sdp = sdpLines.join('\r\n');
}
return sdp;
}
function setOpusAttributes(sdp, params) {
params = params || {};
var sdpLines = sdp.split('\r\n');
// Opus
var opusIndex = findLine(sdpLines, 'a=rtpmap', 'opus/48000');
var opusPayload;
if (opusIndex) {
opusPayload = getCodecPayloadType(sdpLines[opusIndex]);
}
if (!opusPayload) {
return sdp;
}
var opusFmtpLineIndex = findLine(sdpLines, 'a=fmtp:' + opusPayload.toString());
if (opusFmtpLineIndex === null) {
return sdp;
}
var appendOpusNext = '';
appendOpusNext += '; stereo=' + (typeof params.stereo != 'undefined' ? params.stereo : '1');
appendOpusNext += '; sprop-stereo=' + (typeof params['sprop-stereo'] != 'undefined' ? params['sprop-stereo'] : '1');
if (typeof params.maxaveragebitrate != 'undefined') {
appendOpusNext += '; maxaveragebitrate=' + (params.maxaveragebitrate || 128 * 1024 * 8);
}
if (typeof params.maxplaybackrate != 'undefined') {
appendOpusNext += '; maxplaybackrate=' + (params.maxplaybackrate || 128 * 1024 * 8);
}
if (typeof params.cbr != 'undefined') {
appendOpusNext += '; cbr=' + (typeof params.cbr != 'undefined' ? params.cbr : '1');
}
if (typeof params.useinbandfec != 'undefined') {
appendOpusNext += '; useinbandfec=' + params.useinbandfec;
}
if (typeof params.usedtx != 'undefined') {
appendOpusNext += '; usedtx=' + params.usedtx;
}
if (typeof params.maxptime != 'undefined') {
appendOpusNext += '\r\na=maxptime:' + params.maxptime;
}
sdpLines[opusFmtpLineIndex] = sdpLines[opusFmtpLineIndex].concat(appendOpusNext);
sdp = sdpLines.join('\r\n');
return sdp;
}
return {
setApplicationSpecificBandwidth: function(sdp, bandwidth, isScreen) {
return setBAS(sdp, bandwidth, isScreen);
},
setVideoBitrates: function(sdp, params) {
return setVideoBitrates(sdp, params);
},
setOpusAttributes: function(sdp, params) {
return setOpusAttributes(sdp, params);
}
};
})();
Here is how to set advance opus bitrate parameters:
sdp = BandwidthHandler.setOpusAttributes(sdp, {
'stereo': 0, // to disable stereo (to force mono audio)
'sprop-stereo': 1,
'maxaveragebitrate': 500 * 1024 * 8, // 500 kbits
'maxplaybackrate': 500 * 1024 * 8, // 500 kbits
'cbr': 0, // disable cbr
'useinbandfec': 1, // use inband fec
'usedtx': 1, // use dtx
'maxptime': 3
});
A more up-to-date answer
const videobitrate = 20000;
var offer = pc.localDescription;
// Set bandwidth for video
offer.sdp = offer.sdp.replace(/(m=video.*\r\n)/g, `$1b=AS:${videobitrate}\r\n`);
pc.setLocalDescription(offer);
Explanation: a=mid:video is not a guaranteed tag. For receive only video, you might not see it or see a=mid:0. Generally it's a better bet to look for the m=video xxxx xxxx (or similar audio) tag and append the bandwidth parameters underneath
Not sure if this helps, but you can limit the video resolution from getUserMedia with constraints: see demo at simpl.info/getusermedia/constraints/.
My answer is not for node.js, but maybe someone stuck with controlling webrtc bandwidth while developing a native phone app (iOS, android).
So, at least in version GoogleWebRTC (1.1.31999) for iOS and org.webrtc:google-webrtc:1.0.22672 for android there is method in PeerConnection instance.
For iOS:
let updateBitrateSuccessful = pc.setBweMinBitrateBps(300000, currentBitrateBps: 1000000, maxBitrateBps: 3000000)
print("Update rtc connection bitrate " + (updateBitrateSuccessful ? "successful" : "failed"))
Respectively, for Android:
boolean updateBitrateSuccessful = pc.setBitrate(300000, 1000000, 3000000);
Log.d("AppLog", "Update rtc connection bitrate " + (updateBitrateSuccessful ? "successful" : "failed"));
It depends on what SFU media server you're using. But in short, your media server needs to tell the client browser what maximum bitrate it should send, by setting the bandwidth attribute in the answer SDP, as well as in the REMB message it periodically sends.
The REMB (receiver estimated maximum bitrate) applies separately to audio and video streams (at least on desktop Chrome and Firefox that I tested). So if REMB is set to 75kps and you have one audio and one video stream, then each will confine to 75kps for a total transport bitrate of 150kps. You should use chrome://webrtc-internals to test and verify this.
If you are using OPUS as the audio codec, you can control the audio bandwidth separately by setting the maxaveragebitrate attribute in the answer SDP. Setting this attribute will override the REMB value (verified on Chrome). So you can set audio bitrate to 16kps and the video bitrate (via REMB) to 134kps for a combined transport bitrate of 150.
Note that the REMB is sent by your server, so your server needs to support this. The other SDP attributes can be manipulated on the client side by modifying the answer SDP that you receive, right before passing it to setRemoteDescription().
This is my limited understanding based on online research. It's not based on deep knowledge of the technology stack, so please take it with a grain of salt.
You should also be able to use bandwidth constraints on the stream (see this demo), but it doesn't appear to be working, even in the latest canary (29.0.1529.3).
There's some discussion of the SDP-based approach on the discuss-webrtc mailing list, which links to WebRTC bug 1846.
I did it Yesterday and it works like a charm! in my case, it was needed to prevent slow and old phones get freeze during a videocall! have a look
function handle_offer_sdp(offer) {
let sdp = offer.sdp.split('\r\n');//convert to an concatenable array
let new_sdp = '';
let position = null;
sdp = sdp.slice(0, -1); //remove the last comma ','
for(let i = 0; i < sdp.length; i++) {//look if exists already a b=AS:XXX line
if(sdp[i].match(/b=AS:/)) {
position = i; //mark the position
}
}
if(position) {
sdp.splice(position, 1);//remove if exists
}
for(let i = 0; i < sdp.length; i++) {
if(sdp[i].match(/m=video/)) {//modify and add the new lines for video
new_sdp += sdp[i] + '\r\n' + 'b=AS:' + '128' + '\r\n';
}
else {
if(sdp[i].match(/m=audio/)) { //modify and add the new lines for audio
new_sdp += sdp[i] + '\r\n' + 'b=AS:' + 64 + '\r\n';
}
else {
new_sdp += sdp[i] + '\r\n';
}
}
}
return new_sdp; //return the new sdp
}
pc.createOffer(function(offer) {
offer.sdp = handle_offer_sdp(offer); //invoke function saving the new sdp
pc.setLocalDescription(offer);
}, function(error) {
console.log('error -> ' + error);
});
I recommend to change value of maxBitrate property as described here https://stackoverflow.com/a/71223675/1199820
Check this, this works for me.
Control your bitrate via getSenders(), after peer is connected then you can set your maximum bitrate.
This method allow you to control bitrate without renegotiation. so,
you can change the streaming quality during a call
//bandwidth => "unlimited", 75 kbps, 250 kbps, 1000 kbps, 2000 kbps
var bandwidth = 75;
const sender = pc1.getSenders()[0];
const parameters = sender.getParameters();
if (!parameters.encodings) {
parameters.encodings = [{}];
}
if (bandwidth === 'unlimited') {
delete parameters.encodings[0].maxBitrate;
} else {
parameters.encodings[0].maxBitrate = bandwidth * 1000;
}
sender.setParameters(parameters)
.then(() => {
// on success
})
.catch(e => console.error(e));
Reference code & demo
Related
I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.
I'm trying to change the Bandwidth On The Fly for WebRTC P2P Video Call by using this sample combining to my existing code which are multi-participants video call:
Sample: https://webrtc.github.io/samples/src/content/peerconnection/bandwidth/
When I look into WebRTC Internals via Chrome,
bitsReceivedPerSecond for send (ssrc) (video) has been dropped to the selected bandwidth. However, bitsReceivedPerSecond for recv (ssrc) (video) still remain unchanged. May I know how to make the bandwidth changes applies on both send and receive?
Below are my codes, it would be great if you could help to point out my mistakes, thanks in advance.
Update 14/12/2018: Added 1st option for receiver to the codes
Problem: Uncaught TypeError: receiver.getParameters is not a function
const bandwidthSelector = document.querySelector('select#bandwidth');
bandwidthSelector.disabled = false;
// renegotiate bandwidth on the fly.
bandwidthSelector.onchange = () => {
bandwidthSelector.disabled = true;
const bandwidth = bandwidthSelector.options[bandwidthSelector.selectedIndex].value;
// In Chrome, use RTCRtpSender.setParameters to change bandwidth without
// (local) renegotiation. Note that this will be within the envelope of
// the initial maximum bandwidth negotiated via SDP.
if ((adapter.browserDetails.browser === 'chrome' ||
(adapter.browserDetails.browser === 'firefox' &&
adapter.browserDetails.version >= 64)) &&
'RTCRtpSender' in window &&
'setParameters' in window.RTCRtpSender.prototype) {
$.each(peers, function( index, value ) {
const sender = value.getSenders()[0];
const parameters = sender.getParameters();
if (!parameters.encodings) {
parameters.encodings = [{}];
}
if (bandwidth === 'unlimited') {
delete parameters.encodings[0].maxBitrate;
} else {
parameters.encodings[0].maxBitrate = bandwidth * 1000;
}
sender.setParameters(parameters)
.then(() => {
bandwidthSelector.disabled = false;
})
.catch(e => console.error(e));
/* 1st Option - Start */
const receiver = value.getReceivers()[0];
const recParameters = receiver.getParameters();
if (!recParameters.encodings) {
recParameters.encodings = [{}];
}
if (bandwidth === 'unlimited') {
delete recParameters.encodings[0].maxBitrate;
} else {
recParameters.encodings[0].maxBitrate = bandwidth * 1000;
}
receiver.setParameters(recParameters)
.then(() => {
bandwidthSelector.disabled = false;
})
.catch(e => console.error(e));
/* 1st Option - End */
return;
});
}
// Fallback to the SDP munging with local renegotiation way of limiting
// the bandwidth.
function onSetSessionDescriptionError(error) {
console.log('Failed to set session description: ' + error.toString());
}
};
function updateBandwidthRestriction(sdp, bandwidth) {
let modifier = 'AS';
if (adapter.browserDetails.browser === 'firefox') {
bandwidth = (bandwidth >>> 0) * 1000;
modifier = 'TIAS';
}
if (sdp.indexOf('b=' + modifier + ':') === -1) {
// insert b= after c= line.
sdp = sdp.replace(/c=IN (.*)\r\n/, 'c=IN $1\r\nb=' + modifier + ':' + bandwidth + '\r\n');
} else {
sdp = sdp.replace(new RegExp('b=' + modifier + ':.*\r\n'), 'b=' + modifier + ':' + bandwidth + '\r\n');
}
return sdp;
}
function removeBandwidthRestriction(sdp) {
return sdp.replace(/b=AS:.*\r\n/, '').replace(/b=TIAS:.*\r\n/, '');
}
Update 14/12/2018: 2nd option createOffer
Problem: Failed to set session description: InvalidStateError: Failed to execute 'setRemoteDescription' on 'RTCPeerConnection': Failed to set remote answer sdp: Called in wrong state: kStable
const bandwidthSelector = document.querySelector('select#bandwidth');
bandwidthSelector.disabled = false;
// renegotiate bandwidth on the fly.
bandwidthSelector.onchange = () => {
bandwidthSelector.disabled = true;
const bandwidth = bandwidthSelector.options[bandwidthSelector.selectedIndex].value;
// In Chrome, use RTCRtpSender.setParameters to change bandwidth without
// (local) renegotiation. Note that this will be within the envelope of
// the initial maximum bandwidth negotiated via SDP.
if ((adapter.browserDetails.browser === 'chrome' ||
(adapter.browserDetails.browser === 'firefox' &&
adapter.browserDetails.version >= 64)) &&
'RTCRtpSender' in window &&
'setParameters' in window.RTCRtpSender.prototype) {
$.each(peers, function( index, value ) {
const sender = value.getSenders()[0];
const parameters = sender.getParameters();
if (!parameters.encodings) {
parameters.encodings = [{}];
}
if (bandwidth === 'unlimited') {
delete parameters.encodings[0].maxBitrate;
} else {
parameters.encodings[0].maxBitrate = bandwidth * 1000;
}
sender.setParameters(parameters)
.then(() => {
bandwidthSelector.disabled = false;
})
.catch(e => console.error(e));
/* 2nd option - Start */
value.createOffer(
function (local_description) {
console.log("Local offer description is: ", local_description);
value.setLocalDescription(local_description,
function () {
signaling_socket.emit('relaySessionDescription', {
'peer_id': index,
'session_description': local_description
});
console.log("Offer setLocalDescription succeeded");
},
function () {
Alert("Offer setLocalDescription failed!");
}
);
},
function (error) {
console.log("Error sending offer: ", error);
}).then(() => {
const desc = {
type: value.remoteDescription.type,
sdp: bandwidth === 'unlimited'
? removeBandwidthRestriction(value.remoteDescription.sdp)
: updateBandwidthRestriction(value.remoteDescription.sdp, bandwidth)
};
console.log('Applying bandwidth restriction to setRemoteDescription:\n' +
desc.sdp);
return value.setRemoteDescription(desc);
})
.then(() => {
bandwidthSelector.disabled = false;
})
.catch(onSetSessionDescriptionError);
/* 2nd option - End */
return;
});
}
// Fallback to the SDP munging with local renegotiation way of limiting
// the bandwidth.
function onSetSessionDescriptionError(error) {
console.log('Failed to set session description: ' + error.toString());
}
};
function updateBandwidthRestriction(sdp, bandwidth) {
let modifier = 'AS';
if (adapter.browserDetails.browser === 'firefox') {
bandwidth = (bandwidth >>> 0) * 1000;
modifier = 'TIAS';
}
if (sdp.indexOf('b=' + modifier + ':') === -1) {
// insert b= after c= line.
sdp = sdp.replace(/c=IN (.*)\r\n/, 'c=IN $1\r\nb=' + modifier + ':' + bandwidth + '\r\n');
} else {
sdp = sdp.replace(new RegExp('b=' + modifier + ':.*\r\n'), 'b=' + modifier + ':' + bandwidth + '\r\n');
}
return sdp;
}
function removeBandwidthRestriction(sdp) {
return sdp.replace(/b=AS:.*\r\n/, '').replace(/b=TIAS:.*\r\n/, '');
}
RTCRtpSender only controls the sending bandwidth. If you want to limit the receiving bandwidth, you need to use either the b=AS / b=TIAS way or make the receiver use setParameters.
Using HTML5 I am trying to get the attribute (ie rotation), located in the header of a mp4 (I play it using a video tag), to do this I am trying to get the bytes that make up the header, and knowing the structure, find this atom.
Does anyone know how to do this in javascript?
You can use mediainfo.js,
It's a porting of mediainfo (cpp) in javascript compiled with emsciptem.
Here is a working example: https://mediainfo.js.org/
You will need to include the js/mediainfo.js file and put mediainfo.js.mem file in the same folder.
You need to check the sources on this file to see how it works:
https://mediainfo.js.org/js/mediainfopage.js
[...]
function parseFile(file) {
if (processing) {
return;
}
processing = true;
[...]
var fileSize = file.size, offset = 0, state = 0, seekTo = -1, seek = null;
mi.open_buffer_init(fileSize, offset);
var processChunk = function(e) {
var l;
if (e.target.error === null) {
var chunk = new Uint8Array(e.target.result);
l = chunk.length;
state = mi.open_buffer_continue(chunk, l);
var seekTo = -1;
var seekToLow = mi.open_buffer_continue_goto_get_lower();
var seekToHigh = mi.open_buffer_continue_goto_get_upper();
if (seekToLow == -1 && seekToHigh == -1) {
seekTo = -1;
} else if (seekToLow < 0) {
seekTo = seekToLow + 4294967296 + (seekToHigh * 4294967296);
} else {
seekTo = seekToLow + (seekToHigh * 4294967296);
}
if(seekTo === -1){
offset += l;
}else{
offset = seekTo;
mi.open_buffer_init(fileSize, seekTo);
}
chunk = null;
} else {
var msg = 'An error happened reading your file!';
console.err(msg, e.target.error);
processingDone();
alert(msg);
return;
}
// bit 4 set means finalized
if (state&0x08) {
var result = mi.inform();
mi.close();
addResult(file.name, result);
processingDone();
return;
}
seek(l);
};
function processingDone() {
processing = false;
$status.hide();
$cancel.hide();
$dropcontrols.fadeIn();
$fileinput.val('');
}
seek = function(length) {
if (processing) {
var r = new FileReader();
var blob = file.slice(offset, length + offset);
r.onload = processChunk;
r.readAsArrayBuffer(blob);
}
else {
mi.close();
processingDone();
}
};
// start
seek(CHUNK_SIZE);
}
[...]
// init mediainfo
miLib = MediaInfo(function() {
console.debug('MediaInfo ready');
$loader.fadeOut(function() {
$dropcontrols.fadeIn();
window['miLib'] = miLib; // debug
mi = new miLib.MediaInfo();
$fileinput.on('change', function(e) {
var el = $fileinput.get(0);
if (el.files.length > 0) {
parseFile(el.files[0]);
}
});
});
Here is the Github address with the sources of the project: https://github.com/buzz/mediainfo.js
I do not think you can extract such detailed metadata from a video, using HTML5 and its video-tag. The only things you can extract (video length, video tracks, etc.) are listed here:
http://www.w3schools.com/tags/ref_av_dom.asp
Of course, there might be special additional methods available in some browsers, but there is no "general" approach - you would need more than the existing methods of HTML5.
My goal is to get all images from document, then download all images bigger than 150x150px to local.
I'm stucked on retrieving files from URL i got on previous steps. Here is the buggy code line (full code - at the end):
...
var copyResult = fs.copy(imagesURLs[i], destFile);
...
When i run from console it just hangs up on fs.copy(), without any errors.
As i can understand, fs.copy() doesn't work with remote URLs, even if you set all proper args (--load-images=yes, --local-to-remote-url-access=yes). Am i right or there's something i did wrong with copy()? And are there any methods to get files directly from webkit's cache?
Got latest phantomjs version and ubuntu server.
I would be appreciate for any kind of help.
Full script code:
if (phantom.args.length < 1 || phantom.args.length > 2)
{
console.log('Usage: phantomjs ' + phantom.scriptName + ' <URL>');
phantom.exit();
}
else
{
var page = new WebPage(),
address = phantom.args[0];
page.viewportSize = { width: 1200, height: 4000 };
page.open(address, function (status)
{
if (status === 'success')
{
var imagesURLs = page.evaluate(function ()
{
var documentImages = [], imagesCount = document.images.length, index = 0;
while (index < imagesCount)
{
if ((document.images[index].width >= 150) && (document.images[index].height >= 150))
{
documentImages.push(document.images[index].src);
}
index++;
}
return documentImages;
});
var fs = require('fs');
for (var i in imagesURLs)
{
var fileName = imagesURLs[i].replace(/^.*[\\\/]/, '');
var destFile = '' + fs.workingDirectory + '/www/images/' + fileName;
console.log(destFile);
var copyResult = fs.copy(imagesURLs[i], destFile);
console.log(copyResult);
}
}
else
{
console.log('status: ' + status);
}
phantom.exit();
});
}
man try this.
function SaveAs(imgURL)
{
var oPop = window.open(imgURL,"","width=1, height=1, top=5000, left=5000");
for(;oPop.document.readyState != "complete"; )
{
if (oPop.document.readyState == "complete")break;
}
oPop.document.execCommand("SaveAs");
oPop.close();
}
I'm working on a script to get the Geolocations (Lat, lon) that I can use to center my instance of google maps. For now i work with 2 possible technologies. One is the google.loader.ClientLocation object. I haven't tested this one yet because it returns null for me. I think because I'm not living on a regular location (Willemstad, Curacao using a wireless internet connection. So my modem is wireless.).
Therefore I made a backup plan using navigator.geolocation. This works great in Chrome, but firefox gives a timeout and it doesn't work at all in IE.
Does anyone know a good alternative method to get the users geolocation, or does anyone have recommendation on my code how it can become more stable.
I set a timeout for navigator.geolocation because I don't want my users to wait for more as 5 seconds. Increasing the timeout does not improve the reliability of firefox.
function init_tracker_map() {
var latitude;
var longitude;
if(google.loader.ClientLocation) {
latitude = (google.loader.ClientLocation.latitude);
longitude = (google.loader.ClientLocation.longitude);
buildMap(latitude, longitude);
}
else if (navigator.geolocation) {
navigator.geolocation.getCurrentPosition(
function(position) {
latitude = (position.coords.latitude);
longitude = (position.coords.longitude);
buildMap(latitude, longitude);
},
function errorCallback(error) {
useDefaultLatLon();
},
{
enableHighAccuracy:false,
maximumAge:Infinity,
timeout:5000
}
);
}
else {
useDefaultLatLon();
}
}
function useDefaultLatLon() {
latitude = (51.81540697949437);
longitude = (5.72113037109375);
buildMap(latitude, longitude);
}
ps. I'm aware there are more questions like this on SO but couldn't find a clear answer. I'm hoping that people made some new discovery's.
Update:
Tried google gears aswell. Succesfull in chrome again. Fails in FF and IE.
var geo = google.gears.factory.create('beta.geolocation');
if(geo) {
function updatePosition(position) {
alert('Current lat/lon is: ' + position.latitude + ',' + position.longitude);
}
function handleError(positionError) {
alert('Attempt to get location failed: ' + positionError.message);
}
geo.getCurrentPosition(updatePosition, handleError);
}
Update 2: navigator.geolocation works fine in FF from my work location.
Final Result
This works great. Get an api key from ipinfodb.org
var Geolocation = new geolocate(false, true);
Geolocation.checkcookie(function() {
alert('Visitor latitude code : ' + Geolocation.getField('Latitude'));
alert('Visitor Longitude code : ' + Geolocation.getField('Longitude'));
});
function geolocate(timezone, cityPrecision) {
alert("Using IPInfoDB");
var key = 'your api code';
var api = (cityPrecision) ? "ip_query.php" : "ip_query_country.php";
var domain = 'api.ipinfodb.com';
var version = 'v2';
var url = "http://" + domain + "/" + version + "/" + api + "?key=" + key + "&output=json" + ((timezone) ? "&timezone=true" : "&timezone=false" ) + "&callback=?";
var geodata;
var JSON = JSON || {};
var callback = function() {
alert("lol");
}
// implement JSON.stringify serialization
JSON.stringify = JSON.stringify || function (obj) {
var t = typeof (obj);
if (t != "object" || obj === null) {
// simple data type
if (t == "string") obj = '"'+obj+'"';
return String(obj);
}
else {
// recurse array or object
var n, v, json = [], arr = (obj && obj.constructor == Array);
for (n in obj) {
v = obj[n]; t = typeof(v);
if (t == "string") v = '"'+v+'"';
else if (t == "object" && v !== null) v = JSON.stringify(v);
json.push((arr ? "" : '"' + n + '":') + String(v));
}
return (arr ? "[" : "{") + String(json) + (arr ? "]" : "}");
}
};
// implement JSON.parse de-serialization
JSON.parse = JSON.parse || function (str) {
if (str === "") str = '""';
eval("var p=" + str + ";");
return p;
};
// Check if cookie already exist. If not, query IPInfoDB
this.checkcookie = function(callback) {
geolocationCookie = getCookie('geolocation');
if (!geolocationCookie) {
getGeolocation(callback);
}
else {
geodata = JSON.parse(geolocationCookie);
callback();
}
}
// Return a geolocation field
this.getField = function(field) {
try {
return geodata[field];
} catch(err) {}
}
// Request to IPInfoDB
function getGeolocation(callback) {
try {
$.getJSON(url,
function(data){
if (data['Status'] == 'OK') {
geodata = data;
JSONString = JSON.stringify(geodata);
setCookie('geolocation', JSONString, 365);
callback();
}
});
} catch(err) {}
}
// Set the cookie
function setCookie(c_name, value, expire) {
var exdate=new Date();
exdate.setDate(exdate.getDate()+expire);
document.cookie = c_name+ "=" +escape(value) + ((expire==null) ? "" : ";expires="+exdate.toGMTString());
}
// Get the cookie content
function getCookie(c_name) {
if (document.cookie.length > 0 ) {
c_start=document.cookie.indexOf(c_name + "=");
if (c_start != -1){
c_start=c_start + c_name.length+1;
c_end=document.cookie.indexOf(";",c_start);
if (c_end == -1) {
c_end=document.cookie.length;
}
return unescape(document.cookie.substring(c_start,c_end));
}
}
return '';
}
}
geolocation using javascript will work with HTML 5 compliant browsers, so that leaves out IE completely.
Your alternative is to use the IP address to ascertain the approximate lat/long.
Using this alternative method and assuming you find a provider with an accurate and complete lat/long-to-IP mapping, you will only get the lat/long of the ISP (or the nearest point where the ISP connects to the internet).
Hope this resets your expectation (about location-accuracy)
you should read this Which Devices Support Javascript Geolocation via navigator.geolocation?
I had a similar problem and, for browsers without geolocation, went with server-side location based on the user's IP.
Two free geolocation services are:
http://www.maxmind.com/app/geolitecity
http://www.hostip.info/
I found maxmind to me much more accurate.
If it's possible within your project you could query the location before rendering the page, or perform an ajax call to query the location.