UIWebView JavaScript Highlight - how to Highlight special characters? (utf8 encoding?) - javascript

I was trying to wrap my head around this but I kind of gave up searching. I don't know much about Javascript but I have a .js highlight function for my UIWebView.
My problem is that it does not Highlight text if it includes any special characters like: "',à etc."
I am parsing a NSString onto the .js function which probably is causing the trouble but I can't figure out if I have to parse a utf8 char or if I have to convert the string to utf8 inside my .js.
Here is my .js code
function MyApp_HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break; // not found, abort
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
span.appendChild(text);
span.setAttribute("class","MyAppHighlight");
span.style.backgroundColor="#C4B695";
span.style.color="black";
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
span.scrollIntoView();
MyApp_SearchResultCount++; // update the counter
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
MyApp_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function MyApp_HighlightAllOccurencesOfString(keyword) {
// MyApp_RemoveAllHighlights();
MyApp_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
Please let me know what other information I can provide.

in case anyone else was wondering about that, the solution was a simple escape character. Thanks everyone.
editedSearchString = [editedSearchString stringByReplacingOccurrencesOfString:#"á" withString:#"\\á"];

Related

How do I break up a string without creating malformed HTML tags?

What I am doing:
In NodeJS I am creating an email template by using MustacheJS, using data from an array of JSON objects.
The text/message that goes in the template can contain text along with basic html tags (such as b p & a).
Due to limitation of space I need to only show an excerpt of the message. For that I do a word count, and after lets say 20 words (checked by spaces) I truncate the string and append View more anchor tag. This links it to the website's post page, that contains the complete post. Something like:
Hey this is a sample post text <b>message</b>. Lorem ipsum dolor sit
amit... View more
The problem:
During word count and truncation, it is possible that I truncate the string in between an html tag as I am simply calculating words on basis of space. Something like:
I am sharing a link with you. <a style="color:... View more
Now this will break the html.
Possible solution:
Before truncating string, run a regex on it to find all the html tags in it.
Use indexOf() (or some other method) to find starting and ending indices of each tag.
After word count, get the index where I need to truncate it.
Now see that if the index intersects with any of the tags region.
If it does intersect, simply move the truncate index to the start or end of the html tag.
Question:
Is there a better way to do this. I don't know what search terms I should be searching on google, to get help with this.
P.S. The code is flexible and I can change the flow if there is a significantly better solution. Also, I am not good with post titles. If you can, please modify it to something that reflects the question.
EDIT:
This is what I came up with after Alex's answer. Hope it helps someone else:
/**
* Counter: Takes a string and returns words and characters count
* #param value
* #returns obj: {
* 'wordCount': (int),
* 'totalChars': (int),
* 'charCount': (int),
* 'charCountNoSpace': (int)
* }
*/
var counter = function(value){
var regex = /\s+/gi;
if (!value.length) {
return {
wordCount: 0,
totalChars: 0,
charCount: 0,
charCountNoSpace: 0
};
}
else {
return {
wordCount: value.trim().replace(regex, ' ').split(' ').length,
totalChars: value.length,
charCount: value.trim().length,
charCountNoSpace: value.replace(regex, '').length
};
}
}
/**
* htmlSubString - Creates excerpt from markup(or even plain text) without creating malformed HTML tags
* #param markup {string} - Markup/text to take excerpt out of
* #param limit {int} - Total word count of excerpt. Note that only text (not the html tag) counts as a valid word.
* #returns {string} - Excerpt
*/
var htmlSubString = function(markup, limit){
var htmlParser = require("htmlparser2");
var tagCount = 0;
var wordCount = 0;
var excerpt = '';
function addToExcerpt(type, text, attribs) {
if ((wordCount >= limit && tagCount == 0) || (tagCount === 1 && type === 'tagOpen' && wordCount >= limit)) {
return false;
}
else if (wordCount < limit || tagCount) {
if (type === 'text') {
var wordCountSubString = $scope.counter(text).wordCount;
if (wordCountSubString + wordCount > limit && tagCount === 0) {
var length = limit - wordCount;
var wordList = text.trim().split(' ');
for (var i = 0; i < length; i++) {
excerpt += ' ' + wordList[i];
wordCount++;
}
} else {
wordCount += wordCountSubString;
excerpt += text;
}
} else if (type === 'tagOpen') {
excerpt += '<' + text;
for (var prop in attribs) {
excerpt += ' ' + prop + '="' + attribs[prop] + '"';
}
excerpt += '>';
} else if (type === 'tagClose') {
excerpt += '</' + text + '>';
}
}
return true;
}
var parser = new htmlParser.Parser({
onopentag: function (name, attribs) {
if(wordCount < limit){
++tagCount;
addToExcerpt('tagOpen', name, attribs);
}
},
ontext: function (text) {
if(wordCount < limit){
addToExcerpt('text', text);
}
},
onclosetag: function (tagName) {
if(wordCount < limit || tagCount > 0){
addToExcerpt('tagClose', tagName);
--tagCount;
}
}
});
parser.write(markup);
parser.end();
return excerpt;
}
Usage:
var wordCountLimit = 20;
var markup = "/* some markup/text */";
var excerpt = htmlSubString(markup, wordCountLimit);
Now, you'll definitely be able to find some HTML tag matching regular expressions. That said, I don't recommend it. At first you'll be all happy and everything will work just fine. Then tomorrow you'll find a small edge-case. "No worries!" You'll say, as you modify the expression to account for the discrepancy. Then the next day, a new tweak, and a new one, and yet another, etc etc until you can't take it anymore.
I highly recommend you find an already established HTML parsing library. There appears to be quite a few on npm. This one seems to be fairly popular.
PS - You did fine with your question. I wish more questions took as much time and provided as much detail :)

jQuery write each character with formatting

let´s say I have a string in JavaScript:
var str = '<span class="color:red;">Hello</span> my name is <span class="color:red;">Julian</span>';
So I would like to print each 300ms one character so that it looks as if it is being entered. Sure I can make a for-loop for each character and print it inside an element, but the problem is the formatting. If I use the for-loop it will even print the span-tag separately, but that will causing problems.
How to print every character after a while with formatting?
This quite an evil trick but you can use a white div on top of your string and move it step by step every 300ms. In this way a letter appears every 300ms. The only problem is to determine how big each step needs to be since the width of each character will vary.
A way to determine the width is to load all the characters separate in a div and measure the width. Of course you first need to strip the html. In order to so you could use How to strip HTML tags with jQuery?
You could split all characters into an array and then loop like this:
var str = '<span class="red">Hello</span> my name is <span class="red">Julian</span>',
AllChars = [],
SetTxt = true,
newstr = '';
for (var i = 0; i < str.length; i++) {
newstr += str.substr(i,1);
if((str.substr(i,1) == '<') || (str.substr(i,1) == '&')){
SetTxt = false;
}else if(SetTxt){
AllChars.push(newstr);
}else if((str.substr(i,1) == '>') || (str.substr(i,1) == ';')){
if(str.length == (i+1)){
AllChars.push(newstr);
}
SetTxt = true;
}
}
for (var i in AllChars){
setTimeout(function(i){
$('#text').html(AllChars[i]);
},300 * i,i);
}
Check the jsfiddle for a working example: http://jsfiddle.net/2R9Dk/1/
You need to parse html tags and text separately. Something like:
var str = '<span class="colored">Hello</span> my name is <span class="colored bold">Julian</span>';
function printTextByLetter(text, selector, speed) {
var html = text.match(/(<[^<>]*>)/gi),
sel = selector || 'body',
arr = text.replace(/(<[^<>]*>)/gi, '{!!}').match(/(\{!!\}|.)/gi),
counter = 0, cursor = jQuery(sel), insideTag,
interval = setInterval(printChar, speed);
function printChar() {
if(arr[0]){
if(arr[0] === '{!!}') {
if(!insideTag) {
insideTag = true;
cursor.append(html[0], html[1]);
html.shift();
html.shift();
cursor = cursor.children().eq(counter);
} else {
insideTag = false;
cursor = cursor.parent();
counter++;
}
} else {
cursor.append(arr[0]);
}
arr.shift();
} else {
clearInterval(interval);
}
}
}
// DOM ready
jQuery(function($){
printTextByLetter(str, '#target', 300);
});
And don't forget to clear intervals - it does affect performance.
Example on JSFiddle: http://jsfiddle.net/36kLf/7/

Search for Special Characters in Text

I have this Javascript code below that searches for any word entered into a textfield. Now, the text that needs to be searched through contains special characters like the apostrophe and dot in this sample text: "And the tribe of Zeb′u·lun."
Now, how can I adopt my JS code to include those special characters? If I type Zebulun with no special characters in my textfield the search function cannot find it.
var SearchResultCount = 0;
var a = new Array();
var oneTime = false;
// helper function, recursively searches in elements and their child nodes
function HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword;
if (idx < 0) break; // not found, abort
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
span.appendChild(text);
span.setAttribute("class","MyAppHighlight");
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
span.scrollIntoView();
span.style.background= "-webkit-linear-gradient(top, #FAE309, #FFF7AA)";
span.style.fontWeight = "bold";
span.style.padding = "2px";
span.style.borderRadius = "5px";
span.style.boxShadow = "0px 0px 2px black";
a.push(span); // SET THIS CODE HERE
SearchResultCount++; // update the counter
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function HighlightAllOccurencesOfString(keyword) {
RemoveAllHighlights();
HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
First off, there's a closing parenthesis missing in the following line:
var idx = value.toLowerCase().indexOf(keyword;
So I wouldn't be surprised if the function didn't work at all.
To answer your question, one way to do this is to wash out every character except alphabetic characters using the String variable's native replace() function. You'd have to do this with both the search term and the text you're searching, so you'll have to pass both your value and your keyword variables through the function. Something like this:
keyword = cleanUp(keyword);
var value = cleanUp(element.nodeValue);
...
function cleanUp(toClean) {
cleaned = toClean.replace([^a-zA-Z],""); //Deletes non-alphabetic characters (including spaces) by replacing them with nothing. If you want to leave spaces intact, use [^a-zA-Z ] instead.
return cleaned;
}
Once this is done, use the same function you've got going to compare the two strings.

JS Highlighting Of Text Strings

Using a small plugin to highlight text strings from an input field within a form.
JavaScript text higlighting jQuery plugin
I have modified the code slightly to allow the user to add multiple strimgs into the input field by splitting them with a comma which will highlight multiple keywords.
This works great in this instance: stack,overflow,js
However if I was to type in stack,overflow,js, (note the , after the last string) it hangs the browser and becomes unresponsive.
The code I am using is:
$(function() {
if ( $("input#searchterm").val().length > 0 ) {
$("input#searchterm").addClass('marked-purple');
var arrayOfKeyWords= $("input#searchterm").val().split(',');
for (var i=0;i<arrayOfKeyWords.length;i++) {
$('.message p.messagecontent').highlight(arrayOfKeyWords[i]);
}
}
});
Does anyone have an idea of how to ignore the last comma if the user has added it?
Thanks in advance
You could do an empty value check before calling highlight(), like this:
if ($("#searchterm").val().length > 0) {
$("#searchterm").addClass('marked-purple');
var arrayOfKeyWords = $("#searchterm").val().split(',');
for (var i = 0; i < arrayOfKeyWords.length; i++) {
if (arrayOfKeyWords[i] !== "") { // ensure there is a value to highlight
$('.message p.messagecontent').highlight(arrayOfKeyWords[i]);
}
}
}
Alternatively you could strip the trailing commas if there are any.
if ($("#searchterm").val().length > 0) {
$("#searchterm").addClass('marked-purple');
var arrayOfKeyWords = $("#searchterm").val().replace(/,+$/, '').split(',');
for (var i = 0; i < arrayOfKeyWords.length; i++) {
$('.message p.messagecontent').highlight(arrayOfKeyWords[i]);
}
}
Thats how you can remove last comma of a string:
var str = "stack,overflow,js,";
if(str.charAt( str.length-1 ) == ",") {
str = str.slice(0, -1);
}

Split a CSV string by line skipping newlines contained between quotes

If the following regex can split a csv string by line.
var lines = csv.split(/\r|\r?\n/g);
How could this be adapted to skip newline chars that are contained within a CSV value (Ie between quotes/double-quotes)?
Example:
2,"Evans & Sutherland","230-132-111AA",,"Visual","P
CB",,1,"Offsite",
If you don't see it, here's a version with the newlines visible:
2,"Evans & Sutherland","230-132-111AA",,"Visual","P\r\nCB",,1,"Offsite",\r\n
The part I'm trying to skip over is the newline contained in the middle of the "PCB" entry.
Update:
I probably should've mentioned this before but this is a part of a dedicated CSV parsing library called jquery-csv. To provide a better context I have added the current parser implementation below.
Here's the code for validating and parsing an entry (ie one line):
$.csvEntry2Array = function(csv, meta) {
var meta = (meta !== undefined ? meta : {});
var separator = 'separator' in meta ? meta.separator : $.csvDefaults.separator;
var delimiter = 'delimiter' in meta ? meta.delimiter : $.csvDefaults.delimiter;
// build the CSV validator regex
var reValid = /^\s*(?:D[^D\\]*(?:\\[\S\s][^D\\]*)*D|[^SD\s\\]*(?:\s+[^SD\s\\]+)*)\s*(?:S\s*(?:D[^D\\]*(?:\\[\S\s][^D\\]*)*D|[^SD\s\\]*(?:\s+[^SD\s\\]+)*)\s*)*$/;
reValid = RegExp(reValid.source.replace(/S/g, separator));
reValid = RegExp(reValid.source.replace(/D/g, delimiter));
// build the CSV line parser regex
var reValue = /(?!\s*$)\s*(?:D([^D\\]*(?:\\[\S\s][^D\\]*)*)D|([^SD\s\\]*(?:\s+[^SD\s\\]+)*))\s*(?:S|$)/g;
reValue = RegExp(reValue.source.replace(/S/g, separator), 'g');
reValue = RegExp(reValue.source.replace(/D/g, delimiter), 'g');
// Return NULL if input string is not well formed CSV string.
if (!reValid.test(csv)) {
return null;
}
// "Walk" the string using replace with callback.
var output = [];
csv.replace(reValue, function(m0, m1, m2) {
// Remove backslash from any delimiters in the value
if (m1 !== undefined) {
var reDelimiterUnescape = /\\D/g;
reDelimiterUnescape = RegExp(reDelimiterUnescape.source.replace(/D/, delimiter), 'g');
output.push(m1.replace(reDelimiterUnescape, delimiter));
} else if (m2 !== undefined) {
output.push(m2);
}
return '';
});
// Handle special case of empty last value.
var reEmptyLast = /S\s*$/;
reEmptyLast = RegExp(reEmptyLast.source.replace(/S/, separator));
if (reEmptyLast.test(csv)) {
output.push('');
}
return output;
};
Note: I haven't tested yet but I think I could probably incorporate the last match into the main split/callback.
This is the code that does the split-by-line part:
$.csv2Array = function(csv, meta) {
var meta = (meta !== undefined ? meta : {});
var separator = 'separator' in meta ? meta.separator : $.csvDefaults.separator;
var delimiter = 'delimiter' in meta ? meta.delimiter : $.csvDefaults.delimiter;
var skip = 'skip' in meta ? meta.skip : $.csvDefaults.skip;
// process by line
var lines = csv.split(/\r\n|\r|\n/g);
var output = [];
for(var i in lines) {
if(i < skip) {
continue;
}
// process each value
var line = $.csvEntry2Array(lines[i], {
delimiter: delimiter,
separator: separator
});
output.push(line);
}
return output;
};
For a breakdown on how that reges works take a look at this answer. Mine is a slightly adapted version. I consolidated the single and double quote matching to match just one text delimiter and made the delimiter/separators dynamic. It does a great job of validating entiries but the line-splitting solution I added on top is pretty frail and breaks on the edge case I described above.
I'm just looking for a solution that walks the string extracting valid entries (to pass on to the entry parser) or fails on bad data returning an error indicating the line the parsing failed on.
Update:
splitLines: function(csv, delimiter) {
var state = 0;
var value = "";
var line = "";
var lines = [];
function endOfRow() {
lines.push(value);
value = "";
state = 0;
};
csv.replace(/(\"|,|\n|\r|[^\",\r\n]+)/gm, function (m0){
switch (state) {
// the start of an entry
case 0:
if (m0 === "\"") {
state = 1;
} else if (m0 === "\n") {
endOfRow();
} else if (/^\r$/.test(m0)) {
// carriage returns are ignored
} else {
value += m0;
state = 3;
}
break;
// delimited input
case 1:
if (m0 === "\"") {
state = 2;
} else {
value += m0;
state = 1;
}
break;
// delimiter found in delimited input
case 2:
// is the delimiter escaped?
if (m0 === "\"" && value.substr(value.length - 1) === "\"") {
value += m0;
state = 1;
} else if (m0 === ",") {
value += m0;
state = 0;
} else if (m0 === "\n") {
endOfRow();
} else if (m0 === "\r") {
// Ignore
} else {
throw new Error("Illegal state");
}
break;
// un-delimited input
case 3:
if (m0 === ",") {
value += m0;
state = 0;
} else if (m0 === "\"") {
throw new Error("Unquoted delimiter found");
} else if (m0 === "\n") {
endOfRow();
} else if (m0 === "\r") {
// Ignore
} else {
throw new Error("Illegal data");
}
break;
default:
throw new Error("Unknown state");
}
return "";
});
if (state != 0) {
endOfRow();
}
return lines;
}
All it took is 4 states for a line splitter:
0: the start of an entry
1: the following is quoted
2: a second quote has been encountered
3: the following isn't quoted
It's almost a complete parser. For my use case, I just wanted a line splitter so I could provide a more granual approach to processing CSV data.
Note: Credit for this approach goes to another dev whom I won't name publicly without his permission. All I did was adapt it from a complete parser to a line-splitter.
Update:
Discovered a few broken edge cases in the previous lineSplitter implementation. The one provided should be fully RFC 4180 compliant.
As I have noted in a comment there is no complete solution just using single regex.
A novel method using several regexps by splitting on comma and joining back strings with embedded commas is described here:-
Personally I would use a simple finite state machine as described here
The state machine has more code, but the code is cleaner and its clear what each piece of code is doing. Longer term this will be much more reliable and maintainable.
It's not a good idea to use regex's to parse. Better to use it to detect the "bad" splits and then merge them back:
var lines = csv.split(/\r?\n/g);
var bad = [];
for(var i=lines.length-1; i> 0; i--) {
// find all the unescaped quotes on the line:
var m = lines[i].match(/[^\\]?\"/g);
// if there are an odd number of them, this line, and the line after it is bad:
if((m ? m.length : 0) % 2 == 1) { bad.push(i--); }
}
// starting at the bottom of the list, merge lines back, using \r\n
for(var b=0,len=bad.length; b < len; b++) {
lines.splice(bad[b]-1, 2, lines[bad[b]-1]+"\r\n"+lines[bad[b]]);
}
(This answer is licensed under both CC0 and WTFPL.)
Be careful- That newline is PART of that value. It's not PCB, it's P\nCB.
However, why can't you just use string.split(',')? If need be, you can run through the list and cast to ints or remove the padded quotation marks.

Categories

Resources