How to fetch urls in array from paragraphs in javascript - javascript

I want to fetch all urls available in paragraph or sentence in javascript in array. For example check paragraph below:
Please checkout http://stackoverflow.com. It has very cool logo https://d13yacurqjgara.cloudfront.net/users/1249/screenshots/2247671/stackoverflow.png.
From above string, We have to get array of these two url.Solution 1: Solution 1, I know is to split paragraph with space, iterate over array and check for url one by one and push into url's array. But, It's a time taking solution.Are there better solution for finding it or above solution is fastest and good to go?Thank you.

Is this what you are looking for?
var list = [];
var sentence = "Please checkout http://stackoverflow.com. It has very cool logo https://d13yacurqjgara.cloudfront.net/users/1249/screenshots/2247671/stackoverflow.png.";
var result = checkForURL(sentence);
function checkForURL(text) {
var urlRegex = /(https?:\/\/[^\s]+)/g;
return text.replace(urlRegex, function (url) {
return '<a>' + url + '</a>';
})
}
var number = result.split('<a>');
for (var i = 1; i < number.length; i++) {
list.push(number[i].split(".</a>")[0]);
}
alert(list);

You might want to split on :// to get a smaller array to iterate over.
Example:
Demo
JSFiddle
HTML
<p id='p'>
Please checkout http://stackoverflow.com. It has very cool logo https://d13yacurqjgara.cloudfront.net/users/1249/screenshots/2247671/stackoverflow.png.
</p>
<h4>
URLs
</h4>
<ol id='results'>
</ol>
Javascript
findUrls();
function findUrls(){
var p = document.getElementById('p');
var res = document.getElementById('results');
var pStr = p.innerText;
var parts = pStr.split(/:\/\//);
if (parts.length < 2)
return;
for (var i = 1 ; i < parts.length ; i++){
var part = parts[i];
var lastPart = parts[i-1];
if (lastPart.length < 4 )
continue;
if (lastPart.length >= 4 && lastPart.substr(-4) == 'http')
part = 'http://' + part;
else if (lastPart.length >= 5 && lastPart.substr(-5) == 'https')
part = 'https://' + part;
var firstSpace = part.indexOf(' ');
if (firstSpace > -1)
part = part.substring(0, firstSpace);
var lastChar = part.charAt(part.length - 1);
if (lastChar == ',' || lastChar == '.' /* || ... */)
part = part.substring(0,part.length - 1);
res.innerHTML += '<li>' + part + '</li>'; // or push part to some result array
}
}

Try this approach. It might need some fine tuning..
var paragraphs = document.getElementsByTagName('p')
var regex = /(https?:\/\/.*?)(\s|$)/g;
var urls = [];
var badLastChars = [".", ","];
for (var i = 0; i < paragraphs.length; i++) {
var p = paragraphs[i].innerText;
var match;
while (match = regex.exec(p)) {
var url = match[1];
var lastChar = url[url.length-1];
if (badLastChars.indexOf(lastChar) > -1 ) {
url = url.slice(0,url.length-1);
}
console.log(url);
urls.push(url);
}
}
<p> Please checkout http://stackoverflow.com. It has very cool logo https://d13yacurqjgara.cloudfront.net/users/1249/screenshots/2247671/stackoverflow.png.</p>
<p> Another paragraph https://stackexchange.com. and here is another url I am making up: https://mycoolurlexample.com/this/is/an/example</p>

Related

URL parameters are reordered when using anchor links with Inheriting UTMS

I am using a javascript on my site, which always inherits the UTM parameters to the links on the site.
However, this is not working, when the links are anchor links to a section of the site and the link the visitor used to visit the page contains the "gclid" parameter from google.
For example:
A visitor uses this link to visit a site:
domain.com?utm_source=test&utm_medium=test&utm_campaign=test&gclid=12345
The button link on the site with the anchor link will look like the following:
domain.com&gclid=12345?utm_source=test&utm_medium=test&utm_campaign=test#anchor
For some reason the "&gclid" part changes its position.
I've tested it with a link without an anchor and in this case the "gclid" parameter doesn't get inherited and the link works.
Of course, the second domain isn't working anymore and leads to a 404 error.
Does someone have an idea what could be the cause for this?
This is the javascript I am using to inherit the UTMs:
(function() {
var utmInheritingDomain = "grundl-institut.de"
utmRegExp = /(\&|\?)utm_[A-Za-z]+=[A-Za-z0-9]+/gi,
links = document.getElementsByTagName("a"),
utms = [
"utm_medium={{URL - utm_medium}}",
"utm_source={{URL - utm_source}}",
"utm_campaign={{URL - utm_campaign}}"
];
for (var index = 0; index < links.length; index += 1) {
var tempLink = links[index].href,
tempParts;
if (tempLink.indexOf(utmInheritingDomain) > 0) {
tempLink = tempLink.replace(utmRegExp, "");
tempParts = tempLink.split("#");
if (tempParts[0].indexOf("?") < 0) {
tempParts[0] += "?" + utms.join("&");
} else {
tempParts[0] += "&" + utms.join("&");
}
tempLink = tempParts.join("#");
}
links[index].href = tempLink;
}
}());
EDIT: It seems like the following script don`t causes this problem:
<script>
(function() {
var domainsToDecorate = [
'domain.com',
],
queryParams = [
'utm_medium',
'utm_source',
'utm_campaign',
]
var links = document.querySelectorAll('a');
for (var linkIndex = 0; linkIndex < links.length; linkIndex++) {
for (var domainIndex = 0; domainIndex < domainsToDecorate.length; domainIndex++) {
if (links[linkIndex].href.indexOf(domainsToDecorate[domainIndex]) > -1 && links[linkIndex].href.indexOf("#") === -1) {
links[linkIndex].href = decorateUrl(links[linkIndex].href);
}
}
}
function decorateUrl(urlToDecorate) {
urlToDecorate = (urlToDecorate.indexOf('?') === -1) ? urlToDecorate + '?' : urlToDecorate + '&';
var collectedQueryParams = [];
for (var queryIndex = 0; queryIndex < queryParams.length; queryIndex++) {
if (getQueryParam(queryParams[queryIndex])) {
collectedQueryParams.push(queryParams[queryIndex] + '=' + getQueryParam(queryParams[queryIndex]))
}
}
return urlToDecorate + collectedQueryParams.join('&');
}
// borrowed from https://stackoverflow.com/questions/831030/
// a function that retrieves the value of a query parameter
function getQueryParam(name) {
if (name = (new RegExp('[?&]' + encodeURIComponent(name) + '=([^&]*)')).exec(window.location.search))
return decodeURIComponent(name[1]);
}
})();
</script>
You really should not change URLs with regexp and string manipulation.
Here is the recommended way
const url = new URL(location.href); // change to tempLink
utms = [
"utm_medium=med",
"utm_source=src",
"utm_campaign=camp"
];
utms.forEach(utm => url.searchParams.set(...utm.split("=")))
console.log(url.toString())

How to dynamically add <a> tags given an index of HTML page's string?

I'm making a search function for my website. So far, I've found the string the user searches for in the whole website, and I'm able to print the string and the context of the string. I have achieved this by using $.get on my HTML pages, then stripping the HTML to leave the pure text I want to search in. I then find the index of the string I'm looking for, then use substr to find the context of the input string (a few indexes ahead and behind).
Now, I need to link to the original page when a user clicks on a search result. My research says to use <a> tags, but how do I dynamically insert those into the HTML page with the index I have? And the index I have isn't even the complete page; it's stripped of tags.
These are the relevant parts of my code:
JavaScript:
function getIndicesOf(searchStr, str) { //get the indices of searchStr inside of str
var searchStrLen = searchStr.length;
if (searchStrLen == 0) {
return [];
}
var startIndex = 0, index, indices = [];
str = str.toLowerCase();
searchStr = searchStr.toLowerCase();
while ((index = str.indexOf(searchStr, startIndex)) > -1) {
indices.push(index);
startIndex = index + searchStrLen;
}
return indices;
}
function search() {
obj=document.getElementById("searchButton");
obj.onclick = function() {
var searchInput = document.getElementById('searchBox').value;
var allPageContent = ['chap/telem.php', 'chap/nestor.php', 'chap/aeolus.php', 'chap/calypso.php', 'chap/circe.php', 'chap/cyclops.php', 'chap/eumaeus.php', 'chap/hades.php','chap/ithaca.php', 'chap/lestry.php', 'chap/lotus.php', 'chap/nausicaa.php', 'chap/oxen.php', 'chap/penelope.php', 'chap/proteus.php', 'chap/scylla.php', 'chap/sirens.php', 'chap/wrocks.php']; //contains all text
var allText = '';
for (var i = 0; i < allPageContent.length; i++){
$.get(allPageContent[i], function(data){
var div = document.createElement("div");
div.innerHTML = data;
//allText = div.textContent || div.innerText || ""; //gets the text to search in, stripped of html
alltext = data;
allText = allText.replace(/(\r\n\t|\n|\r\t)/gm," ");
console.log(data);
var indices = getIndicesOf(searchInput, allText); //the variable indices is the array that contains the indices of the searched text in the main text
indices.forEach(findContext);
})
}
localStorage.output = '';
function findContext(currentValue, index) {
if (currentValue <= 16) {
searchContext = "..." + allText.substr(currentValue, 100) + "...";
} else {
searchContext = "..." + allText.substr(currentValue-15, 100) + "...";
}
localStorage.output = localStorage.output + searchContext + "<br /><br />";
}
console.log(localStorage.output);
};
};
HTML:
<script>document.getElementById("output").innerHTML = localStorage.output;</script>
It's a bit confusing what you're trying to achieve, considering your HTML, but replying to this
My research says to use <a> tags, but how do I dynamically insert
those into the HTML page with the index I have?
this would do the trick
var output = document.getElementById("output");
var a = document.createElement("a");
var linkText = document.createTextNode("my linked text");
a.appendChild(linkText);
a.href = "http://example.com";
output.appendChild(a);

How can I add minus sign in jquery?

My html code :-
<input type="text" id="test">
<span class="display"></span>
My jquery code :
$("#test").keyup(function(e){
$('span.display').text(formatCurrency($(this).val()));
this.value = this.value.replace(/[^0-9\.]/g,'');
});
Demo and full code is like this : https://jsfiddle.net/oscar11/nbLbb037/
I want the input text can enter this : -10000000
And the result who displayed in class display is : -10.000.000
How can I add minus (-) sign?
Need to change code like below:-
output = output.reverse();
if(output[1] == '.'){
output.splice(1, 1);
formatted = output.join("");
}else{
formatted = output.join("");
}
And every thing will be fine.
Example:-
$("#test").keyup(function(e){
$('span.display').text(formatCurrency($(this).val()));
this.value = this.value.replace(/[^0-9\.-]/g,'');
});
// format currency on pagu and revisi
var formatCurrency = function(num){
var str = num.toString().replace("$", ""), parts = false, output = [], i = 1, formatted = null;
if(str.indexOf(",") > 0) {
parts = str.split(",");
str = parts[0];
}
str = str.split("").reverse();
for(var j = 0, len = str.length; j < len; j++) {
if(str[j] != ".") {
output.push(str[j]);
if(i%3 == 0 && j < (len - 1)) {
output.push(".");
}
i++;
}
}
output = output.reverse();
if(output[1] == '.'){
output.splice(1, 1);
formatted = output.join("");
}else{
formatted = output.join("");
}
return(formatted + ((parts) ? "," + parts[1].substr(0, 1) : ""));
};
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<input type="text" id="test">
<span class="display"></span>
adding a minus sign is very easy, just replace
this.value = this.value.replace(/[^0-9\.]/g,'');
with following
this.value = this.value.replace(/[^0-9\.-]/g,'');
updated js fiddle is https://jsfiddle.net/nbLbb037/3/
But. You need to use regular expression test to verify exact currency test. Try exploring js regular expression test for your "formatCurrency" function. Of course thats purly upto you. Current solution will let you move further.
I have updated the regular expression js code on fiddle. Refer to https://jsfiddle.net/nbLbb037/5/

Regular expression to remove all the that is coming before and after <div> tag

I have seen references in this site. But I have this problem that is particular to my code.
I have some variable like
viewSourceText = "koushik ↵<div id="okokoko">some value </div> "
now i want to remove "&nbsp" appearing before and after the tag.So that output would be like this:
viewSourceText = "koushik<div id="okokoko">some value </div>"
now my code sample is:
viewSourceText.replace(/ \n<div/g, "<div>");
viewSourceText.replace(/</div> /g, "</div>");
But not working properly.
Here the proper way to do it in the DOM without regular expressions:
function removeNbspAroundDivs (start) {
var divs = start.getElementsByTagName("div");
for (var i = 0, len = divs.length; i < len; i++) {
var div = divs[i];
var element = div;
while ((element = element.previousSibling) && element.nodeType == 3) {
element.nodeValue = element.nodeValue.replace(/[\u00A0\n]+$/, "");
if (element.nodeValue.length > 0) break;
}
var element = div;
while ((element = element.nextSibling) && element.nodeType == 3) {
element.nodeValue = element.nodeValue.replace(/^[\u00A0\n]+/, "");
if (element.nodeValue.length > 0) break;
}
}
};
Fiddle

Long string pagination done right

I want to split a long text into smaller chunks, that will act as pages.
var longText = document.getElementById('content').innerHTML;
for (i=0; i<10; i++) {
var page = longText.substring(i*100,(i+1)*100);
document.write(page + "<br /><hr />");
}
See it here on jsfiddle.
This code splits the text, but in a stupid way, cutting also words in half.
It would be far better, for example, creating substrings ending at the last space in a certain number of characters (count 100 characters, then go back to the last space).
How would you achieve it?
Second shot
Third shot
I would use:
function paginate(longText, pageSize) {
var parts = longText.split(/[ \n\t]/g);
if (parts.length == 0)
return [];
var pages = [parts.unshift()];
for (var i = 0; i < parts.length; i += 1) {
var part = parts[i];
if (part.length + pages[pages.length - 1].length < pageSize) {
pages[pages.length - 1] += " " + part;
} else {
pages.push(part);
}
}
return parts;
}
For those looking for a working answer:
<div id="long-text">Lorem ipsum [...]</div>
<script>
var splitter = function(id) {
var longText = document.getElementById(id).innerHTML;
var pageLenght = 200;
var charsDone = 0;
var htmlBefore = "<p>";
var htmlAfter = "</p>";
while (charsDone <= longText.length && (pageLenght+charsDone)<longText.length) {
var pageBox = longText.substr(lastSpace,pageLenght);
var lastSpace = charsDone + pageBox.lastIndexOf(" ");
var page = longText.substring(charsDone,lastSpace);
document.write(htmlBefore + page + htmlAfter);
charsDone = lastSpace;
}
document.write(longText.substr(lastSpace,pageLenght));
}
splitter("#long-text");
You can easily use arrays instead of writing to document.
You will also want to set your html to your needs, do it in htmlBefore and htmlAfter.
See it in action here.

Categories

Resources