Fast JS Pagination for long texts - javascript

I'm trying to create a pagination system with JavaScript.
Basic situation: I have a database, which holds fairly long texts (story chapters, 5000 words+). I want to display these chapters on a website...however not the entire text at once, because that would pretty much kill the readability, but in pages.
I have no problem displaying the text, but rather with getting the pages right.
I've been looking around, and came across a JQuery code, which does about what I want it to do...however there's a major caveat for this method. It takes about 10 seconds to finish paginating the text, which is far too long a wait.
What the code basically does:
It splits the text into words (separated by spaces).
It then tries adding one word after the other to a innerHTML, checking back if the text is now bigger than the container it's supposed to fit in.
Each time it breaks the boundary, it reverts back to the previous string and creates a new page. (By encapsulating the text into a span, which can then be hidden/shown at a moments notice) This works, however it is too slow, because it has to run these checks 5000+ times.
I have tried creating an approximation system, which basically takes the amount of words, divides it by the factor 0.5, checks if the buffer is larger than the required size, and repeats this process, until the buffer is 'smaller' than the required size for the first time, and from that position on, it fills the buffer, until it's full.
However it just doesn't seem to work right (double words, lines, which aren't completely full, and it's still too slow.)
This is the code I'm currently using, I'd be grateful for any fixes and suggestions how to make it easier, and especially: Faster.
Oh and: No, paging it serverside is not an option, since it's supposed to fit into variable browser formats...in a fullscreen browser at 1280x768 resolution it will be less pages, than in a small browser at a 1024x768 resolution.
function CreateChild(contentBox, Len, pageText, words) {
var Child = document.createElement("span");
Child.innerHTML = pageText;
contentBox.appendChild(Child);
if(Len == 0) ++Len;
words.splice(0, Len);
return words.length;
}
$(document).ready(function(){
var src = document.getElementById('Source');
var contentBox = document.getElementById('content');
var inner = document.getElementById('inner');
//get the text as an array of word-like things
var words = src.innerHTML.replace(/ +/g, " ").split(' '), wCount = words.length;
//start off with no page text
var pageText = null, cHeight = contentBox.offsetHeight;
while(words.length > 0) {
var Found = false;
pageText = words[0]; //Prevents constant checking for empty
wCount *= 0.5; //Searches, until the words fit in.
for(var i = 1; i < wCount; ++i) pageText += ' ' + words[i];
inner.innerHTML = pageText;
Distance = inner.offsetHeight - cHeight;
if(Distance < 40) { //Less than two lines
wCount = Math.floor(wCount);
if(Distance < 0) { //Already shorter than required. Fill.
for(var i = wCount; i < words.length; ++i) {
//add the next word to the pageText
var betterPageText = pageText + ' ' + words[i];
inner.innerHTML = betterPageText;
//Checks, whether the new words makes the buffer too big.
if(inner.offsetHeight > cHeight) {
wCount = CreateChild(contentBox, i, pageText, words);
Found = true;
break;
} else {
//this longer text still fits
pageText = betterPageText;
}
}
} else {
for(var i = wCount; i >= 0; --i) {
//Removes the last word from the text
var betterPageText = pageText.slice(0, pageText.length - words[i].length - 1);
inner.innerHTML = betterPageText;
//Is the text now short enough?
if(inner.offsetHeight <= cHeight) {
wCount = CreateChild(contentBox, i, pageText, words);
Found = true;
break;
} else {
pageText = betterPageText;
}
}
}
if(!Found) CreateChild(contentBox, i, pageText, words);
}
}
//Creates the final block with the remaining text.
Child = document.createElement("span");
Child.innerHTML = pageText;
contentBox.appendChild(Child);
//Removes the source and the temporary buffer, only the result remains.
contentBox.removeChild(inner);
src.parentNode.removeChild(src);
//The rest is the actual pagination code, but not the issue
});

I managed to solve my problem, also thanks to Rich's suggestion.
What I'm doing: First off, I'm getting the text from the 'Source' (alternatively, I could write the entire text straight into the JS, the effect is the same).
Next I'm getting references to my target any my temporary buffer, the temporary buffer is located inside the target buffer, so it will retain the width information.
After that, I split the entire text into words (standard RegEx, after replacing multiple spaces with a single one). After this, I create some variables, which are meant to buffer function results, so the function calls won't have to be repeated unnecessarily.
Now the main difference: I take chunks of 20 words, checking whether the current chunk exceeds the boundary (again, buffering the results in variables, so they don't get called multiple times, function calls equal valuable microseconds).
Once the boundary is crossed (or the total number of characters is reached), the loop is stopped, and (assuming the boundary caused the 'stop'), the text is shortened by one word per run, until the text fits in again.
Finally, the new text gets added to a new span-element, which is added to the content box (but made invisible, I'll explain why in a bit), the words I just 'used' get removed from the word array and the wCount variable gets decremented by the number of words.
Rinse and repeat, until all pages are rendered.
You can exchange the '20' with any other value, the script will work with any arbitrary number, however please remember, that a too low number will cause a lot of runs in the 'adding segment', and a too big number will cause a lot of runs in the 'backtracking segment'.
As for the invisible: If the span is left visible, sooner or later it WILL cause scrollbars to appear, effectively narrowing the width of the browser window.
In turn, this will allow less words to fit in, and all following pages will be distorted (because they will be matched to the window with scrollbars, while the 'paged result' will not have scrollbars).
Below is the code I used, I hope it will help someone in the future.
var src = document.getElementById('Source');
var contentBox = document.getElementById('content');
var inner = document.getElementById('inner');
//get the text as an array of word-like things
var words = src.innerHTML.replace(/ +/g, " ").split(' ');
//start off with no page text
var cHeight = contentBox.offsetHeight, wCount = words.length;
while(wCount > 0) {
var Len = 1, Overflow = false;
var pageText = words[0]; //Prevents the continued check on 'is pageText set'.
while(!Overflow && Len < wCount) { //Adds to the text, until the boundary is breached.
//20 words per run, but never more than the total amount of words.
for(var j = 0; j < 20 && Len < wCount; ++Len, ++j) pageText += ' ' + words[Len];
inner.innerHTML = pageText;
Overflow = (inner.offsetHeight > cHeight); //Determines, whether the boundary has been crossed.
}
if(Overflow) { //Will only be executed, if the boundary has been broken.
for(--Len; Len >= 0; --Len) { //Removes the last word of the text, until it fits again.
var pageText = pageText.slice(0, -(words[Len].length + 1)); //Shortens the text in question.
inner.innerHTML = pageText;
//Checks, whether the text still is too long.
if(inner.offsetHeight <= cHeight) break;//Breaks the loop
}
}
var Child = document.createElement("span");
Child.style.display = "none"; //Prevents the sidebars from showing (and distorting the following pages)
Child.innerHTML = pageText;
contentBox.appendChild(Child);
words.splice(0, Len);
wCount -= Len;
}

Create an absolutely-positioned container that is the width of a single page. Give it height of 'auto'. Position the container somewhere off screen, like left: -10000px so users can't see it. Split the original text into 20-word chunks. (Look up the regex that accomplishes this.) Append one chunk at a time to the string in the container until the height of the container reaches the max height of a single page. Once it reaches the max height, the string in the container is basically one page of text. Push the string in the container onto an array called 'pages'. Empty the container and start creating page 2 by appending the 20-word chunks again, continuing to iterate through the array from where you left off on the previous page. Continue this process until you reach the end of the 20-word array, pushing each new page onto the array of pages whenever the container's string reaches the max height. You should now have an array of pages, each item of which contains the text of each page.

Having not searched in advance, I worked out an alternative solution with getClientRects (https://developer.mozilla.org/en-US/docs/Web/API/Element/getClientRects). If someone's interested in the details, I'll post more.

Related

"innerText" sometimes contains more newlines than the user has pressed enters

I'm trying to build a PicoBlaze simulator in JavaScript into which the user enters the assembly code. I've tried to make an interface which displays the line numbers in the assembly code that the user enters and highlights the assembly code syntactically. However, the line numbering doesn't work correctly in some cases. If you click into the div with contenteditable that the user is supposed to type the assembly code in (called assemblyCode), and you press enter two times, it will claim there are four lines of code, even though there are just three of them. The JavaScript which controls the div left to it that displays the lines of code (called lineNumbers) is this:
function setUpLineNumbers() {
const assemblyCode = document.getElementById("assemblyCode").innerText;
const numberOfLines = Math.max(
(assemblyCode.match(/\n/g) || []).length,
1
);
let lineNumbersHTML = "";
for (let i = 1; i <= numberOfLines; i++)
lineNumbersHTML += i + ".<br/>";
document.getElementById("lineNumbers").innerHTML = lineNumbersHTML;
}
Can you figure out what is going on there?
It seems that the structure of the code panel is one <div> per line and some of the <div> contents can end with <br>. .innerText converts this to a string such that each <div> generates a \n and each <br> also generates a \n.
Rather than trying to count the number of \n, count the number of <div>.
Change your function to:
function setUpLineNumbers() {
const assemblyCode = document.getElementById("assemblyCode");
const numberOfLines = assembly.childNodes.length;
let lineNumbersHTML = "";
for (let i = 1; i <= numberOfLines; i++)
lineNumbersHTML += i + ".<br/>";
document.getElementById("lineNumbers").innerHTML = lineNumbersHTML;
}

How do I make the font size in the table adaptive?

I have a form now, and the number of words in the form is different.Now assume that the table size does not change and I want the font size in the table to change with the number of words.That is to say, the more words, the smaller the font and the last words, the bigger the front. Could you please tell me how to achieve this?
There may be a better way to do this, but you could use javascript. Example below:
var allTDs = document.querySelectorAll('td');
for (i=0; i<allTDs.length; i++){
var tdLength = allTDs[i].innerText.length;
if (tdLength<=100){
allTDs[i].style.fontSize="large";
} else if (tdLength>100 && tdLength<=200){
allTDs[i].style.fontSize="medium";
} else if (tdLength>200 && tdLength<=400){
allTDs[i].style.fontSize="small";
} else if (tdLength>400){
allTDs[i].style.fontSize="x-small";
};
};
Note: This example counts the number of characters in the innerText instead of the number of words. You could change it to count words instead - one way to get a rough count of words is to actually count the spaces " " in the innerText:
var tdLength = allTDs[i].innerText.split(" ").length;

How to cut string after a specific line number?

I have a string - project description ( as part of an object ) coming from a user form submission that is shown on a page of a report. If the line numbers exceed 24 I want to show the rest of the string on a new page. My initial idea was to cut it based on characters but this can't be done precisely as if line breaks are made when submitting the form, the characters can't be calculated as we don´t know if the line break was made in the middle of a line or the end or wherever. I don't know what could be the solution?
How can I cut a string based on number of lines?
This is what I have done so far:
function countLines (el) {
let projectDetails = $rootScope.report.description;
var el = document.getElementById(el);
var divHeight = el.offsetHeight
var lines = divHeight / 17;
//console.log("Lines counted: " + lines);
if(lines > 24) {
$scope.secondDescriptionPage = true;
$scope.projectDetailsTextFirstPart = // this should be calculated
//$scope.projectDetailsTextSecondPart = // this should be calculated )
}
}
With the -webkit-line-clamp CSS property you can cut text by a certain number of lines. See MDN for details. It will not work in IE11 however.

to apply sequential line numbering to lines in a paragraph: is it possible?

Is it possible to have jquery/javascript insert sequential line number at the start of all lines in a paragraph and, better still, to follow the sequence through to subsequent paragraphs?
I want to be able to refer students quickly to particular lines of an article (in a classroom setting). I have lots of articles to which I would like to apply this functionality, each of which has varying numbers of paragraphs.
I was hoping this might be possible, even in a responsive page, where the width of the paragraphs changes, depending on the display device, and the consequent number of lines in each paragraph becomes greater or fewer.
Thanks in advance to anyone who can help.
Here is one approach that may suit your purposes.
Get the height of a one-line paragraph, for reference.
For each paragraph, get the actual height, and infer the number of lines.
Loop through the lines and add the numbering at absolute positions.
var refHeight = $("p").eq(0).height();
$("p").eq(0).remove();
var cnt = 1;
$("p").each(function(index) {
var pos = $(this).position();
var h = $(this).height();
var lines = h / refHeight;
var lineHeight = h / lines;
for (var i=pos.top ; i<pos.top+h ; i += lineHeight) {
var num = $("<p>", { class: "number" });
num.text(cnt++);
num.css("top", i);
$(this).before(num);
console.log(i);
}
});
(Fiddle)
Edit
If you wanted to use a fixed line length (so that everyone is seeing the same numbers), you could combine the above with the following:
Break the paragraphs into lines.
Wrap each line in a span/div, and re-append.
Block the browser from text wrapping.
$("p").each(function() {
var text = $(this).text();
$(this).html("");
var i=0;
while (i<text.length) {
lineCharWidth = charWidth;
while (i+lineCharWidth < text.length && text[i+lineCharWidth] != ' ') {
lineCharWidth++;
}
var line = $("<span>", { class: "line" }).text(text.substr(i, lineCharWidth));
$(this).append(line).append("<br/>");
i += lineCharWidth;
}
});
(Fiddle)
Here's a solution that uses a function to split the paragraph text on space characters based on a pre-determined line length and then replaces the text with an <ol> comprised of <li> elements each containing one line of text:
var lineNum = 1;
function splitLines(text, lineLen) {
var words = text.split(/\s/g), line = '', lines = [];
$.each(words, function(idx) {
line += this + ' ';
if (line.length > lineLen || idx == words.length - 1) {
lines.push(line);
line = '';
lineNum += 1;
}
});
return lines;
}
$('p').each(function() {
var $p = $(this), $ol = $('<ol start="' + lineNum + '">'), lineLen = 50;
$.each(splitLines($p.text(), lineLen), function(idx) {
$ol.append('<li>' + this + '</li>');
});
$p.text('').append($ol);
});
I'm not sure about the support for the start attribute of the <ol>. It does work in Chrome. Even still, I like using the list element because it's a little more semantically meaningful, in my opinion.
Sure. Just make sure you're encoding your line returns and use it to split up the text with a simple replace.
Sample text:
The quick
brown fox
jumped over
the lazy dog
for this, the actual string would be the following:
The quick\r\nbrown fox\r\njumped over\r\nthe lazy dog
I think something like this would work (without the document.write, and there could be performance improvements):
var input = '\r\nThe quick\r\nbrown fox\r\njumped over\r\nthe lazy dog';
input = input.replace(/\r\n/g, '<div class=\'index\'></div>');
document.write(input);
var idx = 0;
$('.index').each(function(){
$(this).text(idx++);
});
If I'm not mistaken, this should write out an index number on each line. Could use some testing/debugging, though :)
For an example of how this is done, check out Github's diff pages.

Prefer new lines to long lines

I would like to control the way a text is displayed in box, preferring new lines to long lines, but still allowing long lines.
Here's some examples: http://codepen.io/anon/pen/jiCxo
In #1, there's a "long" text and long lines. That is how I want it to behave.
In #2, there's a short text and one long line. I don't like it.
I would like:
2 to be like #3 without having to add that <br> manually.
to use the same HTML & CSS for both "long" and short texts.
Also, I would like the first line to be the shortest, not the last one: http://codepen.io/anon/pen/irFcK.
Any ideas?
(if, as I fear, it is not possible using only CSS, I am open to a nice JavaScript solution)
I made a quick function that should do what you want. I commented it so you know what's going on.
$(".box h1").each(function() {
// Check if one line
if($(this).height() <= parseInt($(this).css('line-height'))){
// Check if width is greater than %50 of parent
if($(this).width() >= $(this).parent().width()/2){
// Adjust width to put it on two lines
$(this).width($(this).parent().width()/2)
}
}
});
EDIT:
To have the first line shorter than the second line, you have to do something a bit more complex. I used cut from this answer. This should be pretty close to what you want.
$(".box h1").each(function() {
// Check if one line
if($(this).height() <= parseInt($(this).css('line-height'))){
// Check if width is greater than %50 of parent
if($(this).width() >= $(this).parent().width()/2){
// First find approximately where you want it
place = Math.round($(this).val().length/2); // Might need a parseFloat
// Find nearest end of word in correct direction
original = $(this);
first = original.text(cut(place));
end = first.val().length
start = $(this).val().length - end
second = $(this).substr(start,end)
// Place a break tag in the middle to put it on two lines
$(this).html(first + <br> + second)
}
}
});
Here's cut
function cut(n) {
return function textCutter(i, text) {
var short = text.substr(0, n);
if (/^\S/.test(text.substr(n)))
return short.replace(/\s+\S*$/, "");
return short;
};
}
This code uses a <br> to break up two lines (with the second longer)
EDIT2:
It's impossible to have the line lengths be different without a <br> or some other way of adding a new line. If you like it better I can change it so it uses multiple <h1> tags, which I think will automatically kick the each addition tag to a new line
I have come up with a solution which utilizes a still fairly unknown JS library named MediaClass which enables the use of media queries with specific elements on the page.
I think it looks pretty good the way I've set the values but you might want to fine-tune it a little by changing widths in the JS or the CSS. Here's a jsFiddle for your tinkering pleasure.
The way it works:
JS:
MediaClass("large", "h1:media(this-min-width: 300px)");
MediaClass("small", "h1:media(this-min-width: 200px and this-max-width: 300px)");
These lines ensure that a small class is added to h1 if h1's width is between 200px and 300px and a large class if h1 is wider than 300px.
CSS:
.large:before {
content:"\A";
width: 50%;
display: inline-block;
}
.small:before {
content:"\A";
width: 30%;
display: inline-block;
}
This bit adds a :before pseudo-element of a width depending on the width of the h1 inside the h1, before the text, this moves the first line inside the h1 over, which changes the flow of the text.
Edit: I fixed up the post and the fiddle to better demonstrate how this solution answers the question asked.
You need JavaScript to do this. CSS cannot format text based on a complex set of rules.
I don't think you can do that with only CSS. Here's a Javscript snippet that you can try implementing to parse your strings. It's clunky and untested but allows you to have a preset both for the char length you want, as well as a word length. I'm sure you could clean it up and make it fit your needs.
// requires h1 field to have line-height property set
$(".box h1").each(function()
{
// splits if over this % width
var preset = 50;
// indents first line to same as preset percentage
// to attempt to make first line shorter
var indent = String(preset) + 'px';
// height of a standard line
var lh = parseInt($(this).css('line-height'));
// width of the box
var w = $(this).parent().width() * (preset/100);
// if the text field is one line heigh & over the preset
if(($(this).height() <= lh) && ($(this).width() >= w))
{
$(this).width(w);
$(this).css('text-indent' , indent);
}
// otherwise it's fine
});
Here's a link: http://codepen.io/jnickg/pen/szIpd
Inspired by TMP's answer and this answer to another question, I came up with this script that works great in most situations but still has some quirks…
var minimum_gap = 5; // Minimum gap on the last line to try to put it on the first.
$(".box h1").each(function() {
var h1 = $(this),
text = h1.text(),
words = text.split(' '),
lines = [],
first_word = [],
l = 0,
i = 0;
// Adding a span to measure the width
h1.wrapInner('<span>');
var span = h1.find('span');
// Put the first word in the span and save the height
span.text(words[0]);
var height = h1.height();
// Measure width of each line
for(var i = 1; i < words.length; i++){
span.append(' ' + words[i]);
// If there's a new line
if(h1.height() > height){
lines[l] = span.width();
span.text(words[i]);
l++;
}
}
// Last line
lines[l] = span.width();
var nb_lines = lines.length;
ideal_line_width = 0;
if(nb_lines == 1) {
// If the line is "long", make it two
if(span.width() >= h1.parent().width()/2) {
ideal_line_width = h1.width()/2;
nb_lines = 2;
}
}
else {
// Compute the average lines width (except for the last one)
var sum = 0;
for(l=0;l<(nb_lines-1);l++) {
sum += lines[l];
}
var avg_width = sum/(nb_lines-1);
// If last line is shorter than the average
if(lines[nb_lines-1] < avg_width) {
var gap = avg_width - lines[nb_lines-1];
// Spread the gap among the lines
gap /= nb_lines;
if(gap > minimum_gap) {
ideal_line_width = avg_width - gap;
}
}
}
// Let's make the wanted adjustments
if(ideal_line_width != 0) {
// Determining which is the first word of each line, beginning at the end in order to get the shortest one first
l = nb_lines-1;
span.empty();
for(i=words.length-1;i>=0;i--) {
span.prepend(words[i] + ' ');
if(span.width() > ideal_line_width) {
// If there's a new line, we cancel the last word
if(h1.height() > height) {
i++;
}
span.empty();
first_word[l] = i;
l--;
}
}
// Applying the results
span.remove();
l = 1;
for(i=0;i<words.length;i++) {
h1.append(' ' + words[i]);
if(first_word[l] == i+1) {
h1.append('<br>');
l++;
}
}
}
// Or just display the text
else {
span.remove();
h1.text(text);
}
});
You can see it in action here. Unfortunately, I still don't like #8 and #13. Tips and improvements are welcome.

Categories

Resources