I have a Dynamic Breadcrumb set up with JavaScript. All I want is to do Initial Caps for each word.
Example:
Home > Some > Page
Currently I have them all converted to lowercase and have removed all - from the strings in pages that have multiple words. I just need to convert the string to Initial Caps. Here is my code that I have working so far:
var path = "",
href = document.location.href,
domain = href.match(/:\/\/(.[^/]+)/)[1],
replacedomain = 'http://' + domain + '/',
s = href.replace(/-/gi, " ").split("/"),
lastElement = document.location.href.split('/').splice(-1,1);
for (var i = 2; i < (s.length - 1); i++) {
path += "<a class='bc' href=\"" + href.substring(0, href.indexOf("/" + s[i]) + s[i].length + 1) + "/\">" + s[i] + "</a> > ";
if (i > 0) {
breadcrumb = path;
}
}
i = s.length - 1;
breadcrumb += "<span>" + s[i] + "</span>";
var breadcrumbl = breadcrumb.toLowerCase(),
domain = breadcrumbl.match(/:\/\/(.[^/]+)/)[1],
breadcrumb2 = breadcrumbl.replace(domain, "").replace(domain, ""),
breadcrumbs = breadcrumb2,
url = 'Home' + breadcrumbs;
document.getElementById('breadcrumb1').innerHTML=url;
I think the solution is with a regular expression but I'm not good at writing them and I'm having a hard time with the concept. Also if anyone thinks this script can be optimized further your feedback is welcome. I'll will make variable names more semantic for production.
You could use css:
span.breadcrump {
text-transform: capitalize;
}
I recently wrote this helper method to do this for me:
function autocase ( text ) {
return text.replace(/(&)?([a-z])([a-z]{2,})(;)?/ig,function ( all, prefix, letter, word, suffix ) {
if (prefix && suffix) {
return all;
}
return letter.toUpperCase() + word.toLowerCase();
});
}
It takes into account things such as ™
Edit: To use this method, simply pass it a string, and it will return the string auto cased. It does not work on html strings.
//...
document.getElementById('breadcrumb1').innerHTML=url;
function autocase ( text ) {
return text.replace(/(&)?([a-z])([a-z]{2,})(;)?/ig,function ( all, prefix, letter, word, suffix ) {
if (prefix && suffix) {
return all;
}
return letter.toUpperCase() + word.toLowerCase();
});
}
$("#breadcrumb1 a").text(function(i,text){
return autoCase(text);
});
Try using css property text-transform:capitalize; for the breadcrumb.
Mostlikely in you case it should be,
.breadcrumb1 a {
text-transform: capitalize;
}
My first thought is:
breadcrumb += "<span>" + s[i].substring(0,1).toUpperCase() + s[i].substring(1) + "</span>";
But #Esailija's answer is much easier.
Reference:
toUpperCase().
not to be a punk but why not just use css?
text-transform: capitalize;
Sorry if you really need JS but in CSS you can easily use text-transform:Capitalize;
Since the accepted answer didn't actually answer the question as it was asked, I figured this might help; it's a solution using Regex with JavaScript, originally found here: Regex capitalize first letter every word
Here's the snippet I found useful:
var re = /(\b[a-z](?!\s))/g;
var s = "fort collins, croton-on-hudson, harper's ferry, coeur d'alene, o'fallon";
s = s.replace(re, function(x){return x.toUpperCase();});
console.log(s); // "Fort Collins, Croton-On-Hudson, Harper's Ferry, Coeur D'Alene, O'Fallon"
Here's one:
s.replace(/(^[a-zA-Z]{1})/,function (s){
return s.toUpperCase();
});
Related
Not certain if this can be done in regexp under javascript, but thought it would be interesting to see if it is possible.
So thought I would clean up a piece of html to remove most tags, literally just dropping them, so <H1><img><a href ....>. And that would be relatively simple (well, stole the basis from another post, thanks karim79 Remove HTML Tags in Javascript with Regex).
function(inString, maxlength, callback){
console.log("Sting is " + inString)
console.log("Its " + inString.length)
var regex = /(<([^>]+)>)/ig
var outString = inString.replace(regex, "");
console.log("No HTML sting " + outString);
if ( outString.length < maxlength){
callback(outString)
} else {
console.log("Lets cut first bit")
}
}
But then I started thinking, is there a way where I can control regex execution. So lets say that I want to keep certain tabs, like b,br,i and maybe change H1-6 to b. So in pseudo code, something like:
for ( var i in inString.regex.hits ) {
if ( hits[i] == H1 ) {
hits[i] = b;
}
}
The issue is that I want the text thats not HTML tags to stay as it is, and I want it to just cut out by default. One option would of course be to change the ones I want to keep. Say change <b> to [[b]], once that is done to all the ones of interest. Then put them back to <b> once all unknown have been removed. So like this (only for b, and not certain the code below would work):
function(inString, maxlength, callback){
console.log("Sting is " + inString)
console.log("Its " + inString.length)
var regex-remHTML = /(<([^>]+)>)/ig
var regex-hideB = /(<b>)/ig
var regex-showB = /([b])/ig
var outString = inString.replace(regex-hideB, "[b]");
outString = outString.replace(regex-remHTML, "");
outString = outString.replace(regex-showB, "<b>");
console.log("No HTML sting " + outString);
if ( outString.length < maxlength){
callback(outString)
} else {
console.log("Lets cut first bit")
}
}
But would it be possible to be smarter, writing cod ethat says here is a peice of HTML tag, run this code against the match.
As Tim Biegeleisen sai in its comment, maybe a better solution could be using a parser instead of a Regex...
By the way, if you want to control what is going to be changed by the regex you can pass a callback to the String.prototype.replace:
var input = "<div><h1>CIAO Bello</h1></div>";
var output = input.replace(/(<([^>]+)>)/gi, (val) => {
if(val.indexOf("div") > -1) {
return "";
}
return val;
})
;
console.log("output", output);
else if( (m = url.match(/^(https?:\/\/ww[\d]+\.sinaimg\.cn\/)([^\/]+)(\/.+)$/i)) ) {
if(m[2] != "large") {
document.location = m[1] + "large" + m[3];
}
}
So the above is a working piece of Javascript... And I am trying to make a regex of another URL that fits into the code... But I couldn't seemd to quite get how it works...
What is m[1] and m[3] BTW?
This is the URL I intend to change about:
a.up.w.com/i/EF8W-P67s6dqRPIj7cLlMQ!/fw/300/format/jpg/quality/90
How can I make a Regex that replaces 300 to 3000, jpg to png (or not) and 90 to 100?
I made this one but nothing seemed to work...
else if( (m = url.match(/^(https?:\/\/a\+\.up\.w\.com\/i\/)([^\/]+)(\/fw\/)([^\/]+)(\/format\/)([^\/+])(\/quality\/+)([^\/]+)$/i)) ) {
Thank you very much!!!
/^(https?://ww[\d]+.sinaimg.cn/)([^/]+)(/.+)$/i
To read regular expressions most people need some automated tool as a helper.
I created your regex with an example: https://regex101.com/r/mP5zI9/3
Example caught link: https://ww1.sinaimg.cn/aaa/bbb.jpg
1. https://ww1.sinaimg.cn/
2. aaa
3. /bbb.jpg
the m array contains the different parts of the match.
For full javascript details see: MDN documentation for String.prototype.match()
You might be going about this the wrong way. Sometimes it's easier to break a task down.
url = "http://a.up.w.com/i/EF8W-P67s6dqRPIj7cLlMQ!/fw/300/format/jpg/quality/90";
m = url.replace(
/\/fw\/\d+/,"/fw/3000"
).replace(
/\/format\/jpg/,"/format/png"
).replace(
/\/quality\/\d+/,"/quality/100"
);
May be you only need to care about this part /300/format/jpg/quality/90,
Then use replace like this.
url.replace(/(\d+)\/format\/jpg\/quality\/(\d+)/, function($0, $1, $2) {
return 3000 + '/format/png/quality/' + 100;
}
else if( (m = url.match(/^(https?:\/\/a+\.up+\.w+\.com\/i\/+)([^\/]+)(\/fw\/+)([^\/]+)(\/format\/+)([^\/+]+)(\/quality\/+)([^\/]+)$/i)) ) {
if(m[4] != "3000" || m[8] !=100) {
document.location = m[1] + m[2] + m[3] + "3000" + m[5] + m[6] + m[7] + "100";
}
}
Thanks everyone! I've got it! :P
Special thanks to #Lavi Avigdor for the inspiration (again)
What I am doing:
In NodeJS I am creating an email template by using MustacheJS, using data from an array of JSON objects.
The text/message that goes in the template can contain text along with basic html tags (such as b p & a).
Due to limitation of space I need to only show an excerpt of the message. For that I do a word count, and after lets say 20 words (checked by spaces) I truncate the string and append View more anchor tag. This links it to the website's post page, that contains the complete post. Something like:
Hey this is a sample post text <b>message</b>. Lorem ipsum dolor sit
amit... View more
The problem:
During word count and truncation, it is possible that I truncate the string in between an html tag as I am simply calculating words on basis of space. Something like:
I am sharing a link with you. <a style="color:... View more
Now this will break the html.
Possible solution:
Before truncating string, run a regex on it to find all the html tags in it.
Use indexOf() (or some other method) to find starting and ending indices of each tag.
After word count, get the index where I need to truncate it.
Now see that if the index intersects with any of the tags region.
If it does intersect, simply move the truncate index to the start or end of the html tag.
Question:
Is there a better way to do this. I don't know what search terms I should be searching on google, to get help with this.
P.S. The code is flexible and I can change the flow if there is a significantly better solution. Also, I am not good with post titles. If you can, please modify it to something that reflects the question.
EDIT:
This is what I came up with after Alex's answer. Hope it helps someone else:
/**
* Counter: Takes a string and returns words and characters count
* #param value
* #returns obj: {
* 'wordCount': (int),
* 'totalChars': (int),
* 'charCount': (int),
* 'charCountNoSpace': (int)
* }
*/
var counter = function(value){
var regex = /\s+/gi;
if (!value.length) {
return {
wordCount: 0,
totalChars: 0,
charCount: 0,
charCountNoSpace: 0
};
}
else {
return {
wordCount: value.trim().replace(regex, ' ').split(' ').length,
totalChars: value.length,
charCount: value.trim().length,
charCountNoSpace: value.replace(regex, '').length
};
}
}
/**
* htmlSubString - Creates excerpt from markup(or even plain text) without creating malformed HTML tags
* #param markup {string} - Markup/text to take excerpt out of
* #param limit {int} - Total word count of excerpt. Note that only text (not the html tag) counts as a valid word.
* #returns {string} - Excerpt
*/
var htmlSubString = function(markup, limit){
var htmlParser = require("htmlparser2");
var tagCount = 0;
var wordCount = 0;
var excerpt = '';
function addToExcerpt(type, text, attribs) {
if ((wordCount >= limit && tagCount == 0) || (tagCount === 1 && type === 'tagOpen' && wordCount >= limit)) {
return false;
}
else if (wordCount < limit || tagCount) {
if (type === 'text') {
var wordCountSubString = $scope.counter(text).wordCount;
if (wordCountSubString + wordCount > limit && tagCount === 0) {
var length = limit - wordCount;
var wordList = text.trim().split(' ');
for (var i = 0; i < length; i++) {
excerpt += ' ' + wordList[i];
wordCount++;
}
} else {
wordCount += wordCountSubString;
excerpt += text;
}
} else if (type === 'tagOpen') {
excerpt += '<' + text;
for (var prop in attribs) {
excerpt += ' ' + prop + '="' + attribs[prop] + '"';
}
excerpt += '>';
} else if (type === 'tagClose') {
excerpt += '</' + text + '>';
}
}
return true;
}
var parser = new htmlParser.Parser({
onopentag: function (name, attribs) {
if(wordCount < limit){
++tagCount;
addToExcerpt('tagOpen', name, attribs);
}
},
ontext: function (text) {
if(wordCount < limit){
addToExcerpt('text', text);
}
},
onclosetag: function (tagName) {
if(wordCount < limit || tagCount > 0){
addToExcerpt('tagClose', tagName);
--tagCount;
}
}
});
parser.write(markup);
parser.end();
return excerpt;
}
Usage:
var wordCountLimit = 20;
var markup = "/* some markup/text */";
var excerpt = htmlSubString(markup, wordCountLimit);
Now, you'll definitely be able to find some HTML tag matching regular expressions. That said, I don't recommend it. At first you'll be all happy and everything will work just fine. Then tomorrow you'll find a small edge-case. "No worries!" You'll say, as you modify the expression to account for the discrepancy. Then the next day, a new tweak, and a new one, and yet another, etc etc until you can't take it anymore.
I highly recommend you find an already established HTML parsing library. There appears to be quite a few on npm. This one seems to be fairly popular.
PS - You did fine with your question. I wish more questions took as much time and provided as much detail :)
The problem I need to solve is to shorten file paths given by the user. If you didn't know, sometimes it's not possible to enter in paths with spaces in the command prompt. You are required to either put the path in quotes or rename the paths with spaces to "abcdef~1".
Example: "C:\Some Folder\Some File.exe" should become "C:\SomeFo~1\SomeFi~1.exe" (case insensitive).
I'm making a function in JavaScript to attempt to shorten file paths using this idea.
function ShortenFilePath(FilePath){
var Sections = FilePath.split("\\")
for (Index = 0; Index < Sections.length; Index++){
while (Sections[Index].length > 6 && Sections[Index].match(" ") && !Sections[Index].match("~1")){
alert(Sections[Index])
Sections[Index] = Sections[Index].replace(" ","")
Sections[Index] = Sections[Index].substring(0,6)
Sections[Index] = Sections[Index] + "~1"
alert(Sections[Index])
}
}
var FilePath = Sections.join("\\")
alert(FilePath)
return FilePath
}
The problem is, it will leave out the file extension and spit out "C:\SomeFo~1\SomeFi~1". I need help obtaining that file extension (probably through regular expression). If you feel that this function can be optimized, please do share your thoughts.
UPDATE: I believe the problem has been resolved.
UPDATE 2: There were some problems with the previous code, so I revised it a little.
UPDATE 3: Fresh new problem. Yikes. If the name of the file itself without the extension is under 7 letters, then it will turn up as "name.e~1.exe".
UPDATE 4: I think I've finally fixed the problem. I THINK.
function ShortenFilePath(FilePath){
var Sections = FilePath.split("\\")
Sections[Sections.length - 1] = Sections[Sections.length - 1].substring(0,Sections[Sections.length - 1].lastIndexOf("."))
for (Index = 0; Index < Sections.length; Index++){
while (Index > 0 && Sections[Index].match(" ") && !Sections[Index].match("~1")){
Sections[Index] = Sections[Index].replace(/ /gm,"")
Sections[Index] = Sections[Index].substring(0,6) + "~1"
}
}
return Sections.join("\\") + FilePath.substring(FilePath.lastIndexOf("."))
}
I would use this to get the extension:
someString.substring(someString.lastIndexOf("."))
you also asked for some code review, so:
1 - Your JS conventions are a little off, it looks more like C# :) Why the capital letter in variable and method names?
2 - You said you can go with the quotes option instead of using ~1, seems easier, why did you decide not to?
3 - Why do you need something like this in your JS?
How about this:
function ShortenFilePath(FilePath){
var Sections = FilePath.split("\\")
var suffix = FilePath.match(/(\..*$)/)
for (Index = 0; Index < Sections.length; Index++){
while (Sections[Index].length > 6 && Sections[Index].match(" ") && !Sections[Index].match("~1")){
alert(Sections[Index])
Sections[Index] = Sections[Index].replace(" ","")
Sections[Index] = Sections[Index].substring(0,6)
Sections[Index] = Sections[Index] + "~1"
alert(Sections[Index])
}
}
var FilePath = Sections.join("\\") + (suffix? suffix[1] : '')
alert(FilePath)
return FilePath
}
You could use String.prototype.replace with a callback:
function ShortenFilePath(FilePath){
return FilePath.replace(/([^:\\]+)([\\|\.[^\\]+)/g, function(text,match1, match2){
return match1.length > 8 ? match1.replace(' ', '').substr(0, 6) + '~1' + match2 : match1.replace(' ', '') + match2;
});
}
I am not 100% sure that this will output excactly what you need but probably you get the idea :)
jsFiddle
Fix for Update #3 problem:
if (FilePath.lastIndexOf(".") > 6){
Sections[Index] = Sections[Index].substring(0,6) + "~1"
} else {
Sections[Index] = Sections[Index].substring(0, FilePath.lastIndexOf(".")) + "~1"
}
And btw, this:
while (Sections[Index].match(" ")){
Sections[Index] = Sections[Index].replace(" ","")
}
Should probably look like this:
Sections[Index] = Sections[Index].replace(/ /gm, "");
How to replace all HTML tags from <anything> to \n<anything> and </anything> to <anything>\n
var text = "<anything>welcome</anything><anything>Hello</anything>";
result
var text = "\n<anything>welcome</anything>\n\n<anything>Hello</anything>\n";
This code will help you (match all tags)
</?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>
You can prettify xml without regex:
var text = "<anything>welcome</anything><anything>Hello</anything>";
var xml = new XML("<root>" + text + "</root>");
console.log(xml.children().toXMLString());
output:
<anything>welcome</anything>
<anything>Hello</anything>
Just don't parse HTML using regex. Read this: http://www.codinghorror.com/blog/2009/11/parsing-html-the-cthulhu-way.html
In JavaScript, you can turn HTML into DOM using the .innerHTML property, and after that you can use other DOM methods to traverse it.
Simple example (needs Firebug):
var div = document.createElement('div');
var html = '<p>foo <span>bar</span><br /></p>';
div.innerHTML = html;
function scan(node, depth)
{
depth = depth || 0;
var is_tag = node.nodeType == 1;
var self_contained = false;
if (is_tag) {
self_contained = node.childNodes.length == 0;
var tag_name = node.tagName.toLowerCase();
console.log('<' + tag_name + (self_contained ? ' /' : '') + '>', depth);
} else {
console.log(node.data);
}
for (var i = 0, n = node.childNodes.length; i < n; i++) {
scan(node.childNodes[i], depth + 1);
}
if (!self_contained && is_tag) {
console.log('</' + tag_name + '>', depth);
}
}
scan(div);
Output:
<div> 0
<p> 1
foo
<span> 2
bar
</span> 2
<br /> 2
</p> 1
</div> 0
You could also modify this to output attributes and use the depth argument for indentation.
Try this:
str.replace(/<(\/?)[a-zA-Z]+(?:[^>"']+|"[^"]*"|'[^']*')*>/g, function($0, $1) {
return $1 === "/" ? $0+"\n" : "\n"+$0;
})
Expanding on #Amarghosh's answer:
Assuming the HTML you are trying to parse is more complicated than your example (which I would guess it is) you may want to convert your HTML page into XHTML. This will allow you to use treat it as XML and do a number of things including:
Use an XSL to transform the data
Use .NET's extensive set of XML
libraries to extract and manipulate the data.
I have done this in the past with a free .NET library called SGML.
text = text.replace(/<(?!\/)/g, "\n<"); // replace every < (which are not followed by /) by \n<