I have a string that (potentially) contains HTML tags.
I want to split it into smaller valid HTML strings based on (text) character length. The use case is essentially pagination. I know the length of text that can fit on a single page. So I want to divide the target string into "chunks" or pages based on that character length. But I need each of the resulting pages to contain valid HTML without unclosed tags, etc.
So for example:
const pageCharacterSize = 10
const testString = 'some <strong>text with HTML</strong> tags
function paginate(string, pageSize) { //#TODO }
const pages = paginate(testString, pageCharacterSize)
console.log(pages)
// ['some <strong>text </strong>', '<strong>with HTML</strong> ', 'tags']
I think this is possible to do with a DocumentFragment or Range but I can't figure out how slice the pages based on character offsets.
This MDN page has a demo that does something close to what I need. But it uses caretPositionFromPoint() which takes X, Y coordinates as arguments.
Update
For the purposes of clarity, here are the tests I'm working with:
import { expect, test } from 'vitest'
import paginate from './paginate'
// 1
test('it should chunk plain text', () => {
// a
const testString = 'aa bb cc dd ee';
const expected = ['aa', 'bb', 'cc', 'dd', 'ee']
expect(paginate(testString, 2)).toStrictEqual(expected)
// b
const testString2 = 'a a b b c c';
const expected2 = ['a a', 'b b', 'c c']
expect(paginate(testString2, 3)).toStrictEqual(expected2)
// c
const testString3 = 'aa aa bb bb cc cc';
const expected3 = ['aa aa', 'bb bb', 'cc cc']
expect(paginate(testString3, 5)).toStrictEqual(expected3)
// d
const testString4 = 'aa bb cc';
const expected4 = ['aa', 'bb', 'cc']
expect(paginate(testString4, 4)).toStrictEqual(expected4)
// e
const testString5 = 'a b c d e f g';
const expected5 = ['a b c', 'd e f', 'g']
expect(paginate(testString5, 5)).toStrictEqual(expected5)
// f
const testString6 = 'aa bb cc';
const expected6 = ['aa bb', 'cc']
expect(paginate(testString6, 7)).toStrictEqual(expected6)
})
// 2
test('it should chunk an HTML string without stranding tags', () => {
const testString = 'aa <strong>bb</strong> <em>cc dd</em>';
const expected = ['aa', '<strong>bb</strong>', '<em>cc</em>', '<em>dd</em>']
expect(paginate(testString, 3)).toStrictEqual(expected)
})
// 3
test('it should handle tags that straddle pages', () => {
const testString = '<strong>aa bb cc</strong>';
const expected = ['<strong>aa</strong>', '<strong>bb</strong>', '<strong>cc</strong>']
expect(paginate(testString, 2)).toStrictEqual(expected)
})
Here is a solution that assumes and supports the following:
tags without attributes (you could tweak the regex to support that)
well formed tags assumed, e.g. not: <b><i>wrong nesting</b></i>, missing <b>end tag, missing start</b> tag
tags may be nested
tags are removed & later restored for proper characters per page count
page split is done by looking backwards for first space
function paginate(html, pageSize) {
let splitRegex = new RegExp('\\s*[\\s\\S]{1,' + pageSize + '}(?!\\S)', 'g');
let tagsInfo = []; // saved tags
let tagOffset = 0; // running offset of tag in plain text
let pageOffset = 0; // page offset in plain text
let openTags = []; // open tags carried over to next page
let pages = html.replace(/<\/?[a-z][a-z0-9]*>/gi, (tag, pos) => {
let obj = { tag: tag, pos: pos - tagOffset };
tagsInfo.push(obj);
tagOffset += tag.length;
return '';
}).match(splitRegex).map(page => {
let nextOffset = pageOffset + page.length;
let prefix = openTags.join('');
tagsInfo.slice().reverse().forEach(obj => {
if(obj.pos >= pageOffset && obj.pos < nextOffset) {
// restore tags in reverse order to maintain proper position
page = page.substring(0, obj.pos - pageOffset) + obj.tag + page.substring(obj.pos - pageOffset);
}
});
tagsInfo.forEach(obj => {
let tag = obj.tag;
if(obj.pos >= pageOffset && obj.pos < nextOffset) {
if(tag.match(/<\//)) {
// remove tag from openTags list
tag = tag.replace(/<\//, '<');
let index = openTags.indexOf(tag);
if(index >= 0) {
openTags.splice(index, 1);
}
} else {
// add tag to openTags list
openTags.push(tag);
}
}
});
pageOffset = nextOffset;
let postfix = openTags.slice().reverse().map(tag => tag.replace(/</, '</')).join('');
page = prefix + page.trim() + postfix;
return page.replace(/<(\w+)><\/\1>/g, ''); // remove tags with empty content
});
return pages;
}
[
{ str: 'some <strong>text <i>with</i> HTML</strong> tags, and <i>some <b>nested tags</b> sould be <b>supported</b> as well</i>.', size: 16 },
{ str: 'a a b b c c', size: 3 },
{ str: 'aa aa bb bb cc cc', size: 5 },
{ str: 'aa bb cc', size: 4 },
{ str: 'aa <strong>bb</strong> <em>cc dd</em>', size: 3 },
{ str: '<strong>aa bb cc</strong>', size: 2 }
].forEach(o => {
let pages = paginate(o.str, o.size);
console.log(pages);
});
Output:
[
"some <strong>text <i>with</i></strong>",
"<strong> HTML</strong> tags, and",
"<i>some <b>nested tags</b></i>",
"<i> sould be</i>",
"<i><b>supported</b> as</i>",
"<i>well</i>."
]
[
"a a",
"b b",
"c c"
]
[
"aa aa",
"bb bb",
"cc cc"
]
[
"aa",
"bb",
"cc"
]
[
"aa",
"<strong>bb</strong>",
" <em>cc</em>",
"<em>dd</em>"
]
[
"<strong>aa</strong>",
"<strong>bb</strong>",
"<strong>cc</strong>"
]
Update
Based on new request in comment I fixed the split regex from '[\\s\\S]{1,' + pageSize + '}(?!\\S)' to '\\s*[\\s\\S]{1,' + pageSize + '}(?!\\S)', e.g. added \\s* to catch leading spaces. I also added a page.trim() to remove leading spaces. Finally I added a few of the OP examples.
Related
I have this script that generates a random order for a group of numbers when the page is refreshed, and I would like to randomize the colors of the numbers (each number a different color) as well. It also could be that each number has a fixed color, and they just appear random by virtue of the numbers getting randomized. I can't figure out how to do that either. Any help is greatly appreciated.
var contents=new Array()
contents[0]='0'
contents[1]='1'
contents[2]='2'
contents[3]='3'
contents[4]='4'
contents[5]='5'
contents[6]='6'
contents[7]='7'
contents[8]='8'
contents[9]='9'
contents[10]='10'
contents[11]='11'
contents[12]='12'
contents[13]='13'
contents[14]='14'
contents[15]='15'
contents[16]='16'
contents[17]='17'
contents[18]='18'
contents[19]='19'
contents[20]='20'
var spacing="<br />"
var the_one
var z=0
while (z<contents.length){
the_one=Math.floor(Math.random()*contents.length)
if (contents[the_one]!="_selected!"){
document.write(contents[the_one]+spacing)
contents[the_one]="_selected!"
z++
}
}
Adjust with the following:
const c = [1,2,3].map(ele => Math.floor(Math.random() * 216));
document.write(`<span style="color: rgb(${c.join(",")});">${contents[the_one]+spacing}<span>`);
See example:
var contents = new Array()
contents[0] = '0'
contents[1] = '1'
contents[2] = '2'
contents[3] = '3'
contents[4] = '4'
contents[5] = '5'
contents[6] = '6'
contents[7] = '7'
contents[8] = '8'
contents[9] = '9'
contents[10] = '10'
contents[11] = '11'
contents[12] = '12'
contents[13] = '13'
contents[14] = '14'
contents[15] = '15'
contents[16] = '16'
contents[17] = '17'
contents[18] = '18'
contents[19] = '19'
contents[20] = '20'
var spacing = "<br />"
var the_one
var z = 0
while (z < contents.length) {
the_one = Math.floor(Math.random() * contents.length)
if (contents[the_one] != "_selected!") {
const c = [1,2,3].map(ele => Math.floor(Math.random() * 216));
document.write(`<span style="color: rgb(${c.join(",")});">${contents[the_one]+spacing}<span>`);
contents[the_one] = "_selected!"
z++
}
}
Edit: re-read the question and it had different needs. But I'll leave this answer here since it explains on how to get randomized colors (the actual topic) in JavaScript.
Here are the functions you'll need to create random hex color values:
const getRandomHex = () => Math.floor(Math.random() * 256).toString(16).padStart(2, '0');
const getRandomRGB = ({R = getRandomHex(), G = getRandomHex(), B = getRandomHex(), A } = {}) => ['#', R, G, B, A].join('');
console.log(getRandomRGB());
console.log(getRandomRGB({ A: '00' }));
The first function getRandomHex will convert random (0-255) numeric value as Base16 aka hexadecimal string. And if necessary, adds an additional zero in front.
The main function getRandomRGB will append three (red, green, blue) values to an array and then join the array as single string.
In addition, you can override R/G/B (and alpha channel) values as passed properties.
The reason I selected this approach is simply that it allows manipulating the generated R/G/B values if needed (like in use case: "keep the 'red value' static - while green and blue values are randomised").
I have an array of people
const family = [{name: 'Mike', age: 1}, {name: 'Monique', age: 99}]
family.map(member => ??)
the desired output is
Mike ......... 1
Monique ..... 99
the number of . is different betwen line 1 an 2. can you help me think about this? thanks!
const maxLength = 100;
family.map(member => {
let line = new Array(maxLength - (member.name + member.age).length).fill('.');
console.log(member.name + line.join('') + member.age);
})
goodluck
#params: objArray [Array of Objects]
lineSize [Number] length of each output line
.map() each Object
get the length of name value
get the length of score after its converted to String
subtract the total of both lengths and two spaces from the given lineSize
fill an Array of dots equal to the difference from previous step and then .join('') them into a String.
return each line as: obj.name .... obj.score into an Array
Create a documentFragment and a <ul> and ensure that a monospaced font is applied to <ul>. Monospaced fonts will make the list perfectly even.
.forEach() String of the previous Array:
create a <li>
add a line to <li>
append <li> to <ul>
Append <ul> to documentFragment then the documentFragment to <body>
Demo
const scores = [{
name: 'Mike',
score: 1
}, {
name: 'Monique',
score: 99
}, {
name: 'Matt',
score: 5150
}, {
name: 'Lynda',
score: 2112
}];
/* Step 1 */
const memberList = (objArray, lineSize) => {
/* Step 2 */
let scoreArray = objArray.map(obj => {
let nameSize = obj['name'].length;
let scoreSize = obj['score'].toString().length;
let subTotal = lineSize - (nameSize + scoreSize);
let delimiters = ` ${Array(subTotal).fill('.').join('')} `;
return `${obj.name}${delimiters}${obj.score}`;
});
/* Step 3 */
const docFrag = document.createDocumentFragment();
const list = document.createElement('ul');
list.style.cssText = `font: 400 3vw/1.5 Consolas;list-style: none;`
/* Step 4 */
scoreArray.forEach(line => {
const item = document.createElement('li');
item.textContent = line;
list.appendChild(item);
});
/* Step 5 */
docFrag.appendChild(list);
document.body.appendChild(docFrag);
}
memberList(scores, 20);
based on #tdjprog approach I wrote this
const separateWords = (string1, string2, length = 40) => {
const times = length - (string1 + string2).length;
return string1 + " " + ".".repeat(times) + " " + string2;
};
hope it helps someone. cheers
I have a list of products that contains UOM in the product title. It needs automatically detect the UOM in the title by using Regex.
Expectations
Banana Yogurt 70ml returns ml
Fish Nuggets 200G returns g
Potato Wedges 200 G returns g
I have this function below
detectMetricUnit = (title) => {
let unit,
regex = new RegExp(/(?:\d)/mg),
measurement = title.match(regex) && title.match(regex)[0],
matches = measurement && title.split(measurement)[1];
if(matches) {
if(/millilitre|milliliter|ml/.test(matches.toLowerCase())){
unit = 'ml';
} else if(/litre|liter|l/.test(matches.toLowerCase())){
unit = 'l';
} else if (/kilogram|kg/.test(matches.toLowerCase())) {
unit = 'kg';
} else if (/gram|g/.test(matches.toLowerCase())) {
unit = 'g';
}
}
return unit;
}
However I have some problematic strings such as
Chocolate Drink 330ML X 24 matches 3 and return null UOM
which I am expecting to get ml.
Appreciate if someone could point out my mistake in my regex. How do I actually get the full integers and find the UOM attached next to it even with a space?
You may define a dictionary of possible UOMs you want to detect and then build a regex similar to
/(\d+(?:\.\d+)?)\s?(millilitre|milliliter|ml|litre|liter|l|kilogram|kg|gram|g)\b/i
See the regex demo. The (\d+(?:\.\d+)?) part will capture an integer or float value into Group 1, then \s? match an optional whitespace (change to \s* to match 0 or more whitespaces), and then (millilitre|milliliter|ml|litre|liter|l|kilogram|kg|gram|g)\b will capture UOM unit into Group 2 as a whole word (due to \b word boundary).
Here is the JS implementation to get the first UOM from string:
let strs = ['Banana Yogurt 70ml', 'Fish Nuggets 200G', 'Potato Wedges 200 G', 'Chocolate Drink 330ML X 24']
let dct = {millilitre: 'ml', milliliter: 'ml', ml: 'ml', litre:'l', liter: 'l', l: 'l', kilogram: 'kg', kg: 'kg', gram: 'g', g: 'g'}
detectMetricUnit = (title) => {
let unit, match, val,
regex = new RegExp("(\\d+(?:\\.\\d+)?)\\s?(" + Object.keys(dct).join("|") + ")\\b", "i");
match = title.match(regex);
if (match) {
val = match[1];
unit = dct[match[2].toLowerCase()]
}
return [val, unit];
}
strs.forEach(x => console.log(detectMetricUnit(x)) )
To get all of them, multiple occurrences:
let strs = ['Banana Yogurt 70ml and Fish Nuggets 200G', 'Potato Wedges 200 G and Chocolate Drink 330ML X 24']
let dct = {millilitre: 'ml', milliliter: 'ml', ml: 'ml', litre:'l', liter: 'l', l: 'l', kilogram: 'kg', kg: 'kg', gram: 'g', g: 'g'}
detectMetricUnit = (title) => {
let match, results = [],
regex = new RegExp("(\\d+(?:\\.\\d+)?)\\s?(" + Object.keys(dct).join("|") + ")\\b", "ig");
while (match=regex.exec(title)) {
results.push([ match[1], dct[match[2].toLowerCase()] ]);
}
return results;
}
strs.forEach(x => console.log(x, detectMetricUnit(x)) )
I'm working on generating playoff brackets, and have gotten so far but am having trouble concatenating arrays. I can console.log the first array, console.log the second array, and console.log the concatenated array and it APPEARS to have the right number of items, but the first two are not in it when I look in the console.
My code (edited to add more functions):
startBracket(event: String, sortedTeams: Playoff[]) {
console.log('start bracket by filling teams...', this.BracketOrder.order);
console.log('contestants: ', sortedTeams);
// empty arrays
const leftMatches = [];
const rightMatches = [];
this.resetBracketData();
this.leftBracket.length = 0;
console.log('left bracket ', this.leftBracket);
console.log('right bracket ', this.rightBracket);
console.log('this.teams: ', this.teams);
console.log('this.BracketTeams: ', this.BracketOrder.teams);
// add teams to brackets
for (let i = 1; i <= this.BracketOrder.teams; i += 2) {
this.resetMatch();
// console.log(`Create matches: i=${i}`);
// Add team 1 to match
const place1 = this.BracketOrder.order.shift();
// console.log(place1);
const team1: String = `${place1} ${sortedTeams[place1 - 1].players}`;
this.Match.push(team1);
// Add team 2 to match
const place2 = this.BracketOrder.order.shift();
// console.log(place2);
// console.log('this Bracketorder.order ', this.BracketOrder.order);
const team2: String = `${place2} ${sortedTeams[place2 - 1].players}`;
this.Match.push(team2);
// console.log(this.Match);
// Add match to left or right side
if (i <= this.BracketOrder.teams / 2) {
leftMatches.push(this.Match);
} else {
rightMatches.push(this.Match);
}
}
// Add round to left and right brackets
// console.log(leftMatches);
// console.log(rightMatches);
this.pushRoundintoBrackets(leftMatches, rightMatches);
// fill remaining bracket rounds
this.fillBracket(event, sortedTeams);
}
pushRoundintoBrackets(leftMatches: String[][], rightMatches: String[][]) {
this.leftBracket.push(leftMatches);
this.rightBracket.push(rightMatches);
console.log('left bracket length ', this.leftBracket.length, this.leftBracket);
console.log('right bracket length ', this.rightBracket.length, this.rightBracket);
return true;
}
fillBracket(event: String, sortedTeams: Playoff[]) {
console.log('fillBracket begin...');
const nextRound = this.rounds - 1;
// for each round
let round = 0;
for (let x = nextRound; x >= 0; x--) {
const roundMatches = [];
const leftside: String[][] = [];
const rightside: String[][] = [];
const matches = Math.pow(x, 2);
let thisround = [];
thisround.length = 0;
console.log('starting round x: ', x, ' matches: ', matches);
console.log(`round: ${round}`);
// previous round (combined left and right bracket rounds)
console.log(this.leftBracket[round]);
console.log(this.rightBracket[round]);
thisround = [...this.leftBracket[round], ...this.rightBracket[round]];
console.log(`ROUND${round + 1}`, thisround);
// // add winners from previous round to next round
for (let y = 1; y <= matches * 2; y += 2) {
console.log(`MATCHES ${y} AND ${y + 1}`);
this.resetMatch();
const match1 = thisround.shift();
const match2 = thisround.shift();
// if bye, grab winner OR use placeholder
if (x > 0) {
console.log('X>0', x);
const winner1 =
match1.findIndex(e => e.includes('BYE')) === 1 ? match1[0].split(' ')[1] : `Winner of Match ${y}`;
const winner2 =
match2.findIndex(e => e.includes('BYE')) === 1 ? match2[0].split(' ')[1] : `Winner of Match ${y + 1}`;
// console.log(`winner1 ${winner1}`);
this.Match.push(winner1);
// console.log(`winner2 ${winner2}`);
this.Match.push(winner2);
console.log(`NEW MATCH ${this.Match}`);
// push match to correct side
if (y <= matches) {
leftside.push(this.Match);
console.log('left matches ', leftside);
} else {
rightside.push(this.Match);
console.log('right matches ', rightside);
}
} else {
console.log('SEMI-FINAL');
const winner1 = `Winner of Semi-Final`;
this.Match.push(winner1);
console.log(`NEW MATCH ${this.Match}`);
leftside.push(this.Match);
rightside.push(this.Match);
}
console.log(this.Match);
}
// // add the completed next round to the bracket
this.pushRoundintoBrackets(leftside, rightside);
round++;
}
// this.saveBracket(event);
}
Here is the console.log of that code:
What I SHOULD see is an array with the following (playoffs.page.ts: 775):
0: (2) ["Teisher / Tolentino", 'Winner of Match 2"],
1: (2) ["Oh / Collazos", "Winner of Match 4"],
3: (2) ["Petrillo / Cheney", "Winner of Match 6"],
4: (2) ["Tavernia / Schneider", "Winner of Match 8"]
But I'm only getting the last two and the first 2 matches don't show up.
Can someone please help me understand what I'm doing wrong? It is inside a loop, but that shouldn't matter since the logged parts are correct, right?
I'm trying to make a tool for one game (Victoria II), the save game (the entire save game hasn't this format, only the part which i intend to use, the rest of save game hasn't any importance now) of Victoria II has this format:
AAA = {
B = {
random_text = 1000.00
another_one = 400.00
}
C= {
no_importance = 222
}
D = {
random_text = 5.00
another_one = 10.00
}
}
How the tool will work? The tool will calculate the GDP of the country AAA (there are 100 countries, firstly i want to calculate the GDP of AAA, but i will want to calculate of every one. The code of every country is three letters in uppercase), the B parameter is the domestic production of some goods (random_text and another_one), the C parameter has no importance, so, the tool will ignore it. The D parameter is the price of some goods (in this case, random_text and another_one, if not mistaken there are 20 goods). So, the tool (in JavaScript) must multiply the production of goods in the country with the price that each goods has, and then make a table with the GDP of every country. The question is: How i can do this with JavaScript? I'm trying to use regexp, but I'm always failing, the code captures the parameter C and makes the tool fails. For every country, i want to insert its name in the table and its respective GDP.
Note: In the link above, there are more comments about the working of the tool.
Assuming the format you show above is reliable, you could do a quick-and-dirty conversion to JSON via a few calls to .replace(), then parse that JSON and process the resulting object as required.
Not sure I understood what you were saying about the GDP calculation, but I think you mean that the GDP for AAA would be the sum of each B value multiplied by its corresponding D value, i.e., 1000.00 * 5.00 + 400.00 * 10.00 = 9000.
var input = ' /* your input here */ ';
var json = '{'
+ input.replace(/([^\s]+)\s*=/g,'"$1":')
.replace(/([^{\s])\n(\s*[^{}])/g,'$1,\n$2')
.replace(/,(\s*})/g,'$1')
+ '}';
var obj = JSON.parse(json);
var output = Object.keys(obj).map(function(v) {
return {
country: v,
gdp: Object.keys(obj[v].B).reduce(function(p, c) {
return p + obj[v].B[c] * obj[v].D[c];
}, 0)
};
});
After running the above, the output variable will be an array of objects with details in this format:
[ { "country": "AAA", "gdp": 9000 }, //etc. ]
Expand the following and run it to see it working with three countries:
var input = `AAA = {
B = {
random_text = 1000.00
another_one = 400.00
}
C= {
no_importance = 222
}
D = {
random_text = 5.00
another_one = 10.00
}
}
BBB = {
B = {
random_text = 111.00
another_one = 222.00
}
C= {
no_importance = 222
}
D = {
random_text = 3.00
another_one = 4.00
}
}
CCC = {
B = {
x = 10.0
y = 20.0
z = 30.0
}
C= {
no_importance = 222
}
D = {
x = 1.00
y = 2.00
z = 3.00
}
}`;
var json = '{'
+ input.replace(/([^\s]+)\s*=/g,'"$1":')
.replace(/([^{\s])\n(\s*[^{}])/g,'$1,\n$2')
.replace(/,(\s*})/g,'$1')
+ '}';
var obj = JSON.parse(json);
var output = Object.keys(obj).map(function(v) {
return {
country: v,
gdp: Object.keys(obj[v].B).reduce(function(p, c) {
return p + obj[v].B[c] * obj[v].D[c];
}, 0)
};
});
console.log(output);