Search for Special Characters in Text

Search for Special Characters in Text - javascript

I have this Javascript code below that searches for any word entered into a textfield. Now, the text that needs to be searched through contains special characters like the apostrophe and dot in this sample text: "And the tribe of Zeb′u·lun."
Now, how can I adopt my JS code to include those special characters? If I type Zebulun with no special characters in my textfield the search function cannot find it.
var SearchResultCount = 0;
var a = new Array();
var oneTime = false;
// helper function, recursively searches in elements and their child nodes
function HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword;
if (idx < 0) break; // not found, abort
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
span.appendChild(text);
span.setAttribute("class","MyAppHighlight");
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
span.scrollIntoView();
span.style.background= "-webkit-linear-gradient(top, #FAE309, #FFF7AA)";
span.style.fontWeight = "bold";
span.style.padding = "2px";
span.style.borderRadius = "5px";
span.style.boxShadow = "0px 0px 2px black";
a.push(span); // SET THIS CODE HERE
SearchResultCount++; // update the counter
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function HighlightAllOccurencesOfString(keyword) {
RemoveAllHighlights();
HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}

First off, there's a closing parenthesis missing in the following line:
var idx = value.toLowerCase().indexOf(keyword;
So I wouldn't be surprised if the function didn't work at all.
To answer your question, one way to do this is to wash out every character except alphabetic characters using the String variable's native replace() function. You'd have to do this with both the search term and the text you're searching, so you'll have to pass both your value and your keyword variables through the function. Something like this:
keyword = cleanUp(keyword);
var value = cleanUp(element.nodeValue);
...
function cleanUp(toClean) {
cleaned = toClean.replace([^a-zA-Z],""); //Deletes non-alphabetic characters (including spaces) by replacing them with nothing. If you want to leave spaces intact, use [^a-zA-Z ] instead.
return cleaned;
}
Once this is done, use the same function you've got going to compare the two strings.

Related

UIWebView JavaScript Highlight - how to Highlight special characters? (utf8 encoding?)

I was trying to wrap my head around this but I kind of gave up searching. I don't know much about Javascript but I have a .js highlight function for my UIWebView.
My problem is that it does not Highlight text if it includes any special characters like: "',à etc."
I am parsing a NSString onto the .js function which probably is causing the trouble but I can't figure out if I have to parse a utf8 char or if I have to convert the string to utf8 inside my .js.
Here is my .js code
function MyApp_HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break; // not found, abort
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
span.appendChild(text);
span.setAttribute("class","MyAppHighlight");
span.style.backgroundColor="#C4B695";
span.style.color="black";
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
span.scrollIntoView();
MyApp_SearchResultCount++; // update the counter
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
MyApp_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function MyApp_HighlightAllOccurencesOfString(keyword) {
// MyApp_RemoveAllHighlights();
MyApp_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
Please let me know what other information I can provide.

in case anyone else was wondering about that, the solution was a simple escape character. Thanks everyone.
editedSearchString = [editedSearchString stringByReplacingOccurrencesOfString:#"á" withString:#"\\á"];

jQuery write each character with formatting

let´s say I have a string in JavaScript:
var str = '<span class="color:red;">Hello</span> my name is <span class="color:red;">Julian</span>';
So I would like to print each 300ms one character so that it looks as if it is being entered. Sure I can make a for-loop for each character and print it inside an element, but the problem is the formatting. If I use the for-loop it will even print the span-tag separately, but that will causing problems.
How to print every character after a while with formatting?

This quite an evil trick but you can use a white div on top of your string and move it step by step every 300ms. In this way a letter appears every 300ms. The only problem is to determine how big each step needs to be since the width of each character will vary.
A way to determine the width is to load all the characters separate in a div and measure the width. Of course you first need to strip the html. In order to so you could use How to strip HTML tags with jQuery?

You could split all characters into an array and then loop like this:
var str = '<span class="red">Hello</span> my name is <span class="red">Julian</span>',
AllChars = [],
SetTxt = true,
newstr = '';
for (var i = 0; i < str.length; i++) {
newstr += str.substr(i,1);
if((str.substr(i,1) == '<') || (str.substr(i,1) == '&')){
SetTxt = false;
}else if(SetTxt){
AllChars.push(newstr);
}else if((str.substr(i,1) == '>') || (str.substr(i,1) == ';')){
if(str.length == (i+1)){
AllChars.push(newstr);
}
SetTxt = true;
}
}
for (var i in AllChars){
setTimeout(function(i){
$('#text').html(AllChars[i]);
},300 * i,i);
}
Check the jsfiddle for a working example: http://jsfiddle.net/2R9Dk/1/

You need to parse html tags and text separately. Something like:
var str = '<span class="colored">Hello</span> my name is <span class="colored bold">Julian</span>';
function printTextByLetter(text, selector, speed) {
var html = text.match(/(<[^<>]*>)/gi),
sel = selector || 'body',
arr = text.replace(/(<[^<>]*>)/gi, '{!!}').match(/(\{!!\}|.)/gi),
counter = 0, cursor = jQuery(sel), insideTag,
interval = setInterval(printChar, speed);
function printChar() {
if(arr[0]){
if(arr[0] === '{!!}') {
if(!insideTag) {
insideTag = true;
cursor.append(html[0], html[1]);
html.shift();
html.shift();
cursor = cursor.children().eq(counter);
} else {
insideTag = false;
cursor = cursor.parent();
counter++;
}
} else {
cursor.append(arr[0]);
}
arr.shift();
} else {
clearInterval(interval);
}
}
}
// DOM ready
jQuery(function($){
printTextByLetter(str, '#target', 300);
});
And don't forget to clear intervals - it does affect performance.
Example on JSFiddle: http://jsfiddle.net/36kLf/7/

Custom Mask Function does not work when inserting character in the middle Javascript/JQuery

I have a function:
function maskInput(input, location, delimiter, length) {
//Get the delimiter positons
var locs = location.split(',');
//Iterate until all the delimiters are placed in the textbox
for (var delimCount = 0; delimCount <= locs.length; delimCount++) {
for (var inputCharCount = 0; inputCharCount <= input.length; inputCharCount++) {
//Check for the actual position of the delimiter
if (inputCharCount == locs[delimCount]) {
//Confirm that the delimiter is not already present in that position
if (input.substring(inputCharCount, inputCharCount + 1) != delimiter) {
input = input.substring(0, inputCharCount) + delimiter + input.substring(inputCharCount, input.length);
}
}
}
}
input = input.length > length ? input.substring(0, length) : input;
return input;
}
I use this in:
$(document).on('keypress paste drop blur', '#my_phone_number', function() {
//remove any nondigit characters
var myVal = $(this).val().toString().replace(/\D/g,'');
$(this).val(myVal);
var inVal = maskInput(myVal, '3,7', '-', 12);
$(this).val(inVal);
});
This works perfectly but when I try to remove a digit from the middle of string and then add it again it appends it to the end of string, does not stick to the current position.
Example:
Entered String: '1234567890'
After Mask Function Is Called: '123-456-7890'
Number 5 removed and entered Number 8 instead of 5: '123-467-8908'
Note that it appended number 8 at the end of string.
Any help is appreciated, thank you

You should use keyup instead of keypress, since on keypress the value of input is not yet changed and you apply your filter prior to new value is posted to input. That is the reason why new character is appended.
$(document).on('keyup paste drop blur', '#my_phone_number', function() {
...
});
Example of working code here http://jsbin.com/upinux/1/edit

How can I ignore leading and trailing linebreaks within an html "code" block using css, javascript or jquery?

In my HTML source code, I have a code block like the following (I use showdown and highlight.js on this page):
<pre><code class="cpp">
double myNumber = (double)4;
</code></pre>
My problem is that the first linebreak remains part of the "code" block. It's probably because of the enclosing "pre" block, but I need that there because highlight.js expects it (also apparently the HTML5 standard recommends it). The code is rendered like this (note the leading line break):
So my question is, using css, javascript or jquery, how can I remove leading or trailing linebreaks from "code" blocks like this one?

You could use this hack:
pre:first-line {
line-height: 0;
}

Here's another approach using Javascript that also solves the problem:
<script>
window.onload = function (){
// remove leading linebreaks from code blocks.
var pre = document.getElementsByTagName("code");
for (var i = 0, len = pre.length; i < len; i++) {
var text = pre[i].firstChild.nodeValue;
pre[i].firstChild.nodeValue = text.replace(/^\n+|\n+$/g, "");
}
}
</script>
Someone else posted this, then deleted their answer, but I thought it was worth preserving.

That's how pre works by default: it honors line breaks and whitespace. If you don't want the newline to render, then you have to remove it. Either outright remove it from the source or comment it out if you care how the source looks.
http://jsfiddle.net/VL8tG/
<pre><code class="cpp"><!--
-->double myNumber = (double)4;<!--
--></code></pre>

I just remove them manually before the highlighter starts.
const trimLine = node =>
node.firstChild.nodeValue = node.firstChild.nodeValue.replace(/^\n+|\n+$/g, "");
window.onload = function () {
Array.prototype.slice
.call(document.getElementsByTagName("code"), 0)
.map(trimLine);
hljs.initHighlightingOnLoad();
}

Demo: http://jsfiddle.net/WjVVs/4/.
Tested with Chrome and FF on the PC. It does not work in IE 9 when the plugin is applied to a code element (it appears to work fine when applied to a pre element). I can't find a suitable workaround, but feel free to comment/update.
This is a modified version from another answer. This plugin attempts to remove extra indentation caused by the natural flow of the document. I've modified it to be smarter about leading whitespace.
If it works correctly, you should see something like:
Usage
$("code").prettyPre(); // any selector here
HTML with leading whitespace and extra indentation
<div>
<pre><code class="cpp">
double myNumber = (double)4;
// another line
// another line
// this is purposely indented further
for( var i = 0; i < 100; i++ ){
}
</code></pre>
</div>
Plugin
(function( $ ) {
$.fn.prettyPre = function( method ) {
var defaults = {
ignoreExpression: /\s/ // what should be ignored?
};
var methods = {
init: function( options ) {
this.each( function() {
var context = $.extend( {}, defaults, options );
var $obj = $( this );
var usingInnerText = true;
var text = $obj.get( 0 ).innerText;
// some browsers support innerText...some don't...some ONLY work with innerText.
if ( typeof text == "undefined" ) {
text = $obj.html();
usingInnerText = false;
}
// use the first line as a baseline for how many unwanted leading whitespace characters are present
var superfluousSpaceCount = 0;
var pos = 0;
var currentChar = text.substring( 0, 1 );
while ( context.ignoreExpression.test( currentChar ) ) {
if(currentChar !== "\n"){
superfluousSpaceCount++;
}else{
superfluousSpaceCount = 0;
}
currentChar = text.substring( ++pos, pos + 1 );
}
// split
var parts = text.split( "\n" );
var reformattedText = "";
// reconstruct
var length = parts.length;
for ( var i = 0; i < length; i++ ) {
// remove leading whitespace (represented by an empty string)
if(i === 0 && parts[0]=== ""){
continue;
}
// cleanup, and don't append a trailing newline if we are on the last line
reformattedText += parts[i].substring( superfluousSpaceCount ) + ( i == length - 1 ? "" : "\n" );
}
// modify original
if ( usingInnerText ) {
$obj.get( 0 ).innerText = reformattedText;
}
else {
// This does not appear to execute code in any browser but the onus is on the developer to not
// put raw input from a user anywhere on a page, even if it doesn't execute!
$obj.html( reformattedText );
}
} );
}
}
if ( methods[method] ) {
return methods[method].apply( this, Array.prototype.slice.call( arguments, 1 ) );
}
else if ( typeof method === "object" || !method ) {
return methods.init.apply( this, arguments );
}
else {
$.error( "Method " + method + " does not exist on jQuery.prettyPre." );
}
}
} )( jQuery );

These functions use a class (added to the pre) to remove leading and trailing whitespace
function removeWhitespace(indent) {
// Get a list of all elements that need whitespace removed by indent value (will have class `indent-X`)
// List may be 0 long - loop simply doesn't run
var preElements = document.getElementsByClassName('indent-'+indent);
for (i = 0; i < preElements.length; i++) {
preElements[i].innerHTML = preElements[i].innerHTML.split('\n'+' '.repeat(indent)).join('\n').split('\n'+' '.repeat(indent-2)+'</code>').join('</code>').split("\n").slice(1,-1).join("\n");
//split('\n'+' '.repeat(indent)).join('\n') -- Split at every newline followed by X spaces. Then join together with the newlines.
// .split('\n'+' '.repeat(indent-2)+'</code>').join('</code>') -- The lastline will have 2 less spaces, so remove those, and the newline at the end. Add the tag back in.
//.split("\n").slice(1,-2).join("\n"); -- Remove the first and last lines.
}
}
function removeWhitespaces() {
// Loop over all indents, 2 to 40
for (indent = 2; indent <= 40; indent+=2) {
removeWhitespace(indent);
}
}
Simply add the class indent-X where X is the amount of whitespace you want to remove to the pre.
JSFiddle
function removeWhitespace(indent) {
// Get a list of all elements that need indent removed by indent value (will have class `indent-X`)
// List may be 0 long - loop simply doesn't run
var preElements = document.getElementsByClassName('indent-' + indent);
for (i = 0; i < preElements.length; i++) {
preElements[i].innerHTML = preElements[i].innerHTML.split('\n' + ' '.repeat(indent)).join('\n').split('\n' + ' '.repeat(indent - 2) + '</code>').join('</code>').split("\n").slice(1, -2).join("\n");
//split('\n'+' '.repeat(indent)).join('\n') -- Split at every newline followed by X spaces. Then join together with the newlines.
// .split('\n'+' '.repeat(indent-2)+'</code>').join('</code>') -- The lastline will have 2 less spaces, so remove those, and the newline at the end. Add the tag back in.
//.split("\n").slice(1,-1).join("\n"); -- Remove the first and last lines.
// Remove the clickme element.
document.getElementById('clickme').innerHTML = '';
}
}
function removeWhitespaces() {
// Loop over all indents, 2 to 40
for (indent = 2; indent <= 40; indent += 2) {
removeWhitespace(indent);
}
}
.indent-14 {
background-color: #ccc;
}
<body>
<div id="clickme" onclick="removeWhitespaces()">
Click Me
</div>
<pre class="indent-14">
<code>
function createCORSRequest(method, url) {
var request = new XMLHttpRequest();
if ('withCredentials' in request) {
request.open(method, url, true);
} else if (typeof XDomainRequest != 'undefined') {
request = new XDomainRequest();
request.open(method, url);
} else {
request = null;
}
return request;
}
</code>
</pre>
</body>

Split a CSV string by line skipping newlines contained between quotes

If the following regex can split a csv string by line.
var lines = csv.split(/\r|\r?\n/g);
How could this be adapted to skip newline chars that are contained within a CSV value (Ie between quotes/double-quotes)?
Example:
2,"Evans & Sutherland","230-132-111AA",,"Visual","P
CB",,1,"Offsite",
If you don't see it, here's a version with the newlines visible:
2,"Evans & Sutherland","230-132-111AA",,"Visual","P\r\nCB",,1,"Offsite",\r\n
The part I'm trying to skip over is the newline contained in the middle of the "PCB" entry.
Update:
I probably should've mentioned this before but this is a part of a dedicated CSV parsing library called jquery-csv. To provide a better context I have added the current parser implementation below.
Here's the code for validating and parsing an entry (ie one line):
$.csvEntry2Array = function(csv, meta) {
var meta = (meta !== undefined ? meta : {});
var separator = 'separator' in meta ? meta.separator : $.csvDefaults.separator;
var delimiter = 'delimiter' in meta ? meta.delimiter : $.csvDefaults.delimiter;
// build the CSV validator regex
var reValid = /^\s*(?:D[^D\\]*(?:\\[\S\s][^D\\]*)*D|[^SD\s\\]*(?:\s+[^SD\s\\]+)*)\s*(?:S\s*(?:D[^D\\]*(?:\\[\S\s][^D\\]*)*D|[^SD\s\\]*(?:\s+[^SD\s\\]+)*)\s*)*$/;
reValid = RegExp(reValid.source.replace(/S/g, separator));
reValid = RegExp(reValid.source.replace(/D/g, delimiter));
// build the CSV line parser regex
var reValue = /(?!\s*$)\s*(?:D([^D\\]*(?:\\[\S\s][^D\\]*)*)D|([^SD\s\\]*(?:\s+[^SD\s\\]+)*))\s*(?:S|$)/g;
reValue = RegExp(reValue.source.replace(/S/g, separator), 'g');
reValue = RegExp(reValue.source.replace(/D/g, delimiter), 'g');
// Return NULL if input string is not well formed CSV string.
if (!reValid.test(csv)) {
return null;
}
// "Walk" the string using replace with callback.
var output = [];
csv.replace(reValue, function(m0, m1, m2) {
// Remove backslash from any delimiters in the value
if (m1 !== undefined) {
var reDelimiterUnescape = /\\D/g;
reDelimiterUnescape = RegExp(reDelimiterUnescape.source.replace(/D/, delimiter), 'g');
output.push(m1.replace(reDelimiterUnescape, delimiter));
} else if (m2 !== undefined) {
output.push(m2);
}
return '';
});
// Handle special case of empty last value.
var reEmptyLast = /S\s*$/;
reEmptyLast = RegExp(reEmptyLast.source.replace(/S/, separator));
if (reEmptyLast.test(csv)) {
output.push('');
}
return output;
};
Note: I haven't tested yet but I think I could probably incorporate the last match into the main split/callback.
This is the code that does the split-by-line part:
$.csv2Array = function(csv, meta) {
var meta = (meta !== undefined ? meta : {});
var separator = 'separator' in meta ? meta.separator : $.csvDefaults.separator;
var delimiter = 'delimiter' in meta ? meta.delimiter : $.csvDefaults.delimiter;
var skip = 'skip' in meta ? meta.skip : $.csvDefaults.skip;
// process by line
var lines = csv.split(/\r\n|\r|\n/g);
var output = [];
for(var i in lines) {
if(i < skip) {
continue;
}
// process each value
var line = $.csvEntry2Array(lines[i], {
delimiter: delimiter,
separator: separator
});
output.push(line);
}
return output;
};
For a breakdown on how that reges works take a look at this answer. Mine is a slightly adapted version. I consolidated the single and double quote matching to match just one text delimiter and made the delimiter/separators dynamic. It does a great job of validating entiries but the line-splitting solution I added on top is pretty frail and breaks on the edge case I described above.
I'm just looking for a solution that walks the string extracting valid entries (to pass on to the entry parser) or fails on bad data returning an error indicating the line the parsing failed on.
Update:
splitLines: function(csv, delimiter) {
var state = 0;
var value = "";
var line = "";
var lines = [];
function endOfRow() {
lines.push(value);
value = "";
state = 0;
};
csv.replace(/(\"|,|\n|\r|[^\",\r\n]+)/gm, function (m0){
switch (state) {
// the start of an entry
case 0:
if (m0 === "\"") {
state = 1;
} else if (m0 === "\n") {
endOfRow();
} else if (/^\r$/.test(m0)) {
// carriage returns are ignored
} else {
value += m0;
state = 3;
}
break;
// delimited input
case 1:
if (m0 === "\"") {
state = 2;
} else {
value += m0;
state = 1;
}
break;
// delimiter found in delimited input
case 2:
// is the delimiter escaped?
if (m0 === "\"" && value.substr(value.length - 1) === "\"") {
value += m0;
state = 1;
} else if (m0 === ",") {
value += m0;
state = 0;
} else if (m0 === "\n") {
endOfRow();
} else if (m0 === "\r") {
// Ignore
} else {
throw new Error("Illegal state");
}
break;
// un-delimited input
case 3:
if (m0 === ",") {
value += m0;
state = 0;
} else if (m0 === "\"") {
throw new Error("Unquoted delimiter found");
} else if (m0 === "\n") {
endOfRow();
} else if (m0 === "\r") {
// Ignore
} else {
throw new Error("Illegal data");
}
break;
default:
throw new Error("Unknown state");
}
return "";
});
if (state != 0) {
endOfRow();
}
return lines;
}
All it took is 4 states for a line splitter:
0: the start of an entry
1: the following is quoted
2: a second quote has been encountered
3: the following isn't quoted
It's almost a complete parser. For my use case, I just wanted a line splitter so I could provide a more granual approach to processing CSV data.
Note: Credit for this approach goes to another dev whom I won't name publicly without his permission. All I did was adapt it from a complete parser to a line-splitter.
Update:
Discovered a few broken edge cases in the previous lineSplitter implementation. The one provided should be fully RFC 4180 compliant.

As I have noted in a comment there is no complete solution just using single regex.
A novel method using several regexps by splitting on comma and joining back strings with embedded commas is described here:-
Personally I would use a simple finite state machine as described here
The state machine has more code, but the code is cleaner and its clear what each piece of code is doing. Longer term this will be much more reliable and maintainable.

It's not a good idea to use regex's to parse. Better to use it to detect the "bad" splits and then merge them back:
var lines = csv.split(/\r?\n/g);
var bad = [];
for(var i=lines.length-1; i> 0; i--) {
// find all the unescaped quotes on the line:
var m = lines[i].match(/[^\\]?\"/g);
// if there are an odd number of them, this line, and the line after it is bad:
if((m ? m.length : 0) % 2 == 1) { bad.push(i--); }
}
// starting at the bottom of the list, merge lines back, using \r\n
for(var b=0,len=bad.length; b < len; b++) {
lines.splice(bad[b]-1, 2, lines[bad[b]-1]+"\r\n"+lines[bad[b]]);
}
(This answer is licensed under both CC0 and WTFPL.)

Be careful- That newline is PART of that value. It's not PCB, it's P\nCB.
However, why can't you just use string.split(',')? If need be, you can run through the list and cast to ints or remove the padded quotation marks.

Develop Reference

JavaScript is the programming language of the Web.

Search for Special Characters in Text - javascript

Related

UIWebView JavaScript Highlight - how to Highlight special characters? (utf8 encoding?)

jQuery write each character with formatting

Custom Mask Function does not work when inserting character in the middle Javascript/JQuery

How can I ignore leading and trailing linebreaks within an html "code" block using css, javascript or jquery?

Split a CSV string by line skipping newlines contained between quotes

Categories

Resources