Can't manage to remove diacritics from a filename - javascript

I stumble on a strange behaviour while trying to replace diacritics and special character of a filename.
The function works correctly with a string, but when I try this exact same sequence with a filename, it doesn't
.
$(document).on('change', 'input[type=file]', function() {
var files = this.files;
for (var i = 0; i < files.length; i++) {
(function(file) {
// Assuming the file name is áñǽŦõş
var _string = 'áñǽŦõş.jpg',
_filename = file.name;
$('.string .result').html(convertAscii(_string));
$('.filename .result').html(convertAscii(_filename));
})(files[i]);
}
});
function convertAscii(str) {
str = str.replace(/Ä/g, 'Ae');
str = str.replace(/æ|ǽ|ä/g, 'ae');
str = str.replace(/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ|А/g, 'A');
str = str.replace(/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª|а/g, 'a');
str = str.replace(/Б/g, 'B');
str = str.replace(/б/g, 'b');
str = str.replace(/Ç|Ć|Ĉ|Ċ|Č|Ц/g, 'C');
str = str.replace(/ç|ć|ĉ|ċ|č|ц/g, 'c');
str = str.replace(/Ð|Ď|Đ/g, 'Dj');
str = str.replace(/ð|ď|đ/g, 'dj');
str = str.replace(/Д/g, 'D');
str = str.replace(/д/g, 'd');
str = str.replace(/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě|Е|Ё|Э/g, 'E');
str = str.replace(/è|é|ê|ë|ē|ĕ|ė|ę|ě|е|ё|э/g, 'e');
str = str.replace(/Ф/g, 'F');
str = str.replace(/ƒ|ф/g, 'f');
str = str.replace(/Ĝ|Ğ|Ġ|Ģ|Г/g, 'G');
str = str.replace(/ĝ|ğ|ġ|ģ|г/g, 'g');
str = str.replace(/Ĥ|Ħ|Х/g, 'H');
str = str.replace(/ĥ|ħ|х/g, 'h');
str = str.replace(/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ|И/g, 'I');
str = str.replace(/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı|и/g, 'i');
str = str.replace(/Ĵ|Й/g, 'J');
str = str.replace(/ĵ|й/g, 'j');
str = str.replace(/Ķ|К/g, 'K');
str = str.replace(/ķ|к/g, 'k');
str = str.replace(/Ĺ|Ļ|Ľ|Ŀ|Ł|Л/g, 'L');
str = str.replace(/ĺ|ļ|ľ|ŀ|ł|л/g, 'l');
str = str.replace(/М/g, 'M');
str = str.replace(/м/g, 'm');
str = str.replace(/Ñ|Ń|Ņ|Ň|Н/g, 'N');
str = str.replace(/ñ|ń|ņ|ň|ʼn|н/g, 'n');
str = str.replace(/Ö/g, 'Oe');
str = str.replace(/œ|ö/g, 'oe');
str = str.replace(/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ|О/g, 'O');
str = str.replace(/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º|о/g, 'o');
str = str.replace(/П/g, 'P');
str = str.replace(/п/g, 'p');
str = str.replace(/Ŕ|Ŗ|Ř|Р/g, 'R');
str = str.replace(/ŕ|ŗ|ř|р/g, 'r');
str = str.replace(/Ś|Ŝ|Ş|Ș|Š|С/g, 'S');
str = str.replace(/ś|ŝ|ş|ș|š|ſ|с/g, 's');
str = str.replace(/Ţ|Ț|Ť|Ŧ|Т/g, 'T');
str = str.replace(/ţ|ț|ť|ŧ|т/g, 't');
str = str.replace(/Ü/g, 'Ue');
str = str.replace(/ü/g, 'ue');
str = str.replace(/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ|У/g, 'U');
str = str.replace(/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ|у/g, 'u');
str = str.replace(/В/g, 'V');
str = str.replace(/в/g, 'v');
str = str.replace(/Ý|Ÿ|Ŷ|Ы/g, 'Y');
str = str.replace(/ý|ÿ|ŷ|ы/g, 'y');
str = str.replace(/Ŵ/g, 'W');
str = str.replace(/ŵ/g, 'w');
str = str.replace(/Ź|Ż|Ž|З/g, 'Z');
str = str.replace(/ź|ż|ž|з/g, 'z');
str = str.replace(/Æ|Ǽ/g, 'AE');
str = str.replace(/ß/g, 'ss');
str = str.replace(/IJ/g, 'IJ');
str = str.replace(/ij/g, 'ij');
str = str.replace(/Œ/g, 'OE');
str = str.replace(/Ч/g, 'Ch');
str = str.replace(/ч/g, 'ch');
str = str.replace(/Ю/g, 'Ju');
str = str.replace(/ю/g, 'ju');
str = str.replace(/Я/g, 'Ja');
str = str.replace(/я/g, 'ja');
str = str.replace(/Ш/g, 'Sh');
str = str.replace(/ш/g, 'sh');
str = str.replace(/Щ/g, 'Shch');
str = str.replace(/щ/g, 'shch');
str = str.replace(/Ж/g, 'Zh');
str = str.replace(/ж/g, 'zh');
return str;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<input type="file" name="pic" accept="image/*">
<div>
<div class="string">Converted string : <span class="result"></span></div>
<div class="filename">Converted filename : <span class="result"></span></div>
</div>
I also made a fiddle showing the issue, you'll see what I mean when uploading a jpg named áñaéTõş.
I'm getting the file name from an input. The weird behaviour is this :
console.log(convertAscii(_string)); // Correct => anaeTos
console.log(convertAscii(_filename)); // Wrong => áñaéTõş
I believe it's because diacritics are interpreted as independent characters, but does someone have an idea of how to fix it?

I am convinced your code does not work because of codeppoint problems. The characters in question may look like the specials chars you expect, but are ultimately not equal. Hence, the string replacement does not work
To solve this issue, as suggested by #ClasG, you can perform a Unicode normalization with an external library, since JavaScript doesn't have this functionality built in.
unorm is low-level lib that can do the normalization for you.
There are also more high-level libs like iconv lite, which have more dependencies.
It is also required to use Unicode characters in your code instead to make the comparisons with the normalized text work.
Here is my modified fiddle. I hope this solves the issue for you.
$(document).on('change', 'input[type=file]', function() {
var files = this.files;
for (var i = 0; i < files.length; i++) {
(function(file) {
// Assuming the file name is áñǽŦõş
var _string = 'äöüß', // 'áñǽŦõş.jpg',
_filename = file.name;
$('.string .result').html(convertAscii(_string.normalize('NFC')));
$('.filename .result').html(convertAscii(_filename.normalize('NFC')));
})(files[i]);
}
});
function convertAscii(str) {
//convert German umlauts (normalized using nfc: Canonical Decomposition, followed by Canonical Composition) to Ascii
tr = {
"\u00e4": "ae",
"\u00fc": "ue",
"\u00f6": "oe",
"\u00df": "ss"
}
str = str.replace(/[\u00e4|\u00fc|\u00f6|\u00df]/g, function($0) {
return tr[$0]
})
//... add more..
return str;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src='https://raw.githubusercontent.com/walling/unorm/master/lib/unorm.js'></script>
<input type="file" name="pic" accept="image/*">
</script>
<div>
<div class="string">Converted string : <span class="result"></span></div>
<div class="filename">Converted filename : <span class="result"></span></div>
</div>

Related

JavaScript regex.exec() should intended to use an assembled regEx

if I use ( https://jsfiddle.net/fgsvzn4a/ ) :
var text = "ui1pu";
var regExParameter = '\d+';
var regEx = '/(.*)' + regExParameter + '(.*)/gi';
var matches = regEx.exec(text);
if(matches && matches[1]) {
var str1 = matches[1];
var str2 = matches[2];
var newStr = str1 + str2
console.log(newStr);
}
i get this error:
Paused on exception
TypeError: regEx.exec is not a function
this prototype is working (inspired by https://stackoverflow.com/a/15845184/2891692 ):
var text = "my1bla";
var matches = /(my)\d+(.*)/gi.exec(text);
if(matches && matches[1]) {
var str1 = matches[1];
var str2 = matches[2];
var newStr = str1 + str2
alert(newStr);
}
but i want to use input parameters to build the regex (first example).
i get ReferenceError: Regex is not defined if i try this:
var text = "ui1pu";
var regExParameter = '\d+';
var regExString = '/(.*)' + regExParameter + '(.*)/gi';
var regEx = new Regex(regExString);
var matches = regEx.exec(text);
if(matches && matches[1]) {
var str1 = matches[1];
var str2 = matches[2];
var newStr = str1 + str2
console.log(newStr);
}
any idea?
Use the RegExp constructor. Note that the slashes should be omitted from the string and the flags should be passed as the second argument.
var text = "ui1pu";
var regExParameter = '\\d+';
var regExString = '(.*)' + regExParameter + '(.*)';
var regEx = new RegExp(regExString, 'gi');
var matches = regEx.exec(text);
if(matches && matches[1]) {
var str1 = matches[1];
var str2 = matches[2];
var newStr = str1 + str2
console.log(newStr);
}
Based on the previous correct answers, I was able to come up with this more comprehensive solution. Its a little modification of this correct answer by Unmitigated and all answers :
// window.replaceDOM = function (regExParameter, replaceParameter) {
function replaceDOM(regExParameter, replaceParameter) {
// let reg = '/' + search + '/';
// regExParameter = '\\d+';
console.log('regExParameter=' + regExParameter);
const regExString = '(\\w+)' + regExParameter + '(\\w+)'; // ugly but needet. that escape, that double backslash
console.log('regExString=' + regExString);
const regEx = new RegExp(regExString, 'gi');
let elems = document.body.getElementsByTagName("*");
for (i in elems) {
let ele = elems[i];
if(ele.classList){
const val = ele.classList.value;
if(!val)
continue;
const matches = regEx.exec(val);
if(matches && matches[1]) {
console.log('val=' + val);
const str1 = matches[1];
const str2 = matches[2];
const valNew = str1 + replaceParameter + str2
// alert(valNew);
console.log('valNew=' + valNew);
ele.classList.value = ele.classList.value.replace(val, valNew);
}
}
};
}
.my1bla {
background-color: black;
}
.mybla {
background-color: blue;
}
<button onclick="replaceDOM('y\\d+','y')">
change the style class from DIV using regEx</button>
<div class="my1bla">
I am a DIV element
</div>

node.js replace() - Invalid string length error

I've just coded a little script to replace all variables from a .txt file to their values in a JS file
Example:
Txt file example (values):
Hi = "HELLO WORLD",
Hey = /someregex/g,
Hh = 'haha';
Script example:
window[Hi] = true;
"someregex hi".replace(Hey, "")
window[Hh] = 1;
Here's my script:
var fs = require("fs")
var script = fs.readFileSync("./script.js", "utf8");
var vars = fs.readFileSync("./vars.txt", "utf8");
var replace = {}
var spl = vars.replace(/\r\n/g, "").replace(/ /g, "").split(",");
console.log("caching variables")
for(var dt of spl) {
var splt = dt.split(" = ");
var name = splt[0];
var val = splt[1];
if(!name || !val) {
continue;
}
if(val.endsWith(";")) {
val = val.slice(0, -1);
}
replace[name] = val;
}
console.log("Variables are in cache!")
console.log("Replacing variables in script")
var i = 1;
var t = Object.keys(replace).length;
for(var var_name in replace) {
var var_val = replace[var_name];
var regex = new RegExp(var_name, "g");
console.log(i, "/", t, "Replacing", var_name, "with", var_val, "regex", regex)
script = script.replace(regex, var_val);
i++;
}
console.log("DONE!")
fs.writeFileSync("./dec.js", script, "utf8")
However, when i ~= 100, I have this error:
RangeError: Invalid string length
at RegExp.[Symbol.replace] (native)
at String.replace (native)
EDIT: also, I can see that node.js process is using ~400MB of RAM and I have the error when it reaches 900MB
What's wrong?

Regex exact text match is not working

I am trying to perform exact match of the text keyed in a textbox but, somehow it is working as partial match. I tried different options but could not figure out the cause.
RegExp.escape = function (text) {
//escape the +,[,?... characters
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
var resultLbl = $('#ResultLbl');
$('#SearchTxtBox').bind('change keyup', function () {
resultLbl.text('');
var options = [];
options.push('[1]My]');
options.push('[2]My Name]');
options.push('[3]Name]');
options.push('[2]My Name]');
var searchStr = RegExp.escape($.trim($(this).val()));
var searchArr = [];
if (searchStr != '' && searchStr != null) {
searchStr = searchStr.replace(/\,/g, '\\ ')
searchArr = searchStr.split('\\ ');
}
var search = searchArr[0];
search = search.replace(/[.?*+^$[\]\\(){}|-]/g, '');
var regex = new RegExp($.trim(search), 'gi');
$.each(options, function (i, option) {
if (option.match(regex) !== null) {
resultLbl.append(option + ' ');
}
});
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
Search:
<input type="text" id="SearchTxtBox"/>
<br/>
<label id='ResultLbl'></label>
Expectation:
If you key in the text 'Name' in textbox, only '[3]Name' should be
matched.
If you key in the text 'My Name' in textbox, only '[2]My
Name' should be matched.
Any suggestions are appreciated.
Instead of complex pre-processing, you can just test if the string matches this pattern:
^\[\d+\]<searchStr>\]$
with
var regex = new RegExp("^\\[\\d+\\]" + $.trim(searchStr) + "\\]$", 'gi');
Here is an updated snippet:
RegExp.escape = function (text) {
//escape the +,[,?... characters
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
var resultLbl = $('#ResultLbl');
$('#SearchTxtBox').bind('change keyup', function () {
resultLbl.text('');
var options = [];
options.push('[1]My]');
options.push('[2]My Name]');
options.push('[3]Name]');
options.push('[2]My Name]');
var searchStr = RegExp.escape($.trim($(this).val()));
var regex = new RegExp("^\\[\\d+\\]" + $.trim(searchStr) + "\\]$", 'gi');
$.each(options, function (i, option) {
if (option.match(regex) !== null) {
resultLbl.append(option + ' ');
}
});
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
Search:
<input type="text" id="SearchTxtBox"/>
<br/>
<label id='ResultLbl'></label>

javascript, exception for string/object manipulation

So, I have to functions to turn a string to an object and an object to a string, however I need to account for an except and I am not sure how. Let me show you what I have
parseObjectToUrl: function (obj){
var myStr = "";
var first_iteration = true;
for (var p in obj) {
if(first_iteration){
myStr += p + "=";
first_iteration = false;
}else{
myStr += "&" + p + "=";
}
tObj = obj[p];
var first_inner = true;
for(t in tObj){
if(first_inner){
myStr += t;
first_inner = false;
}else{
myStr += "," + t;
}
yObj = tObj[t];
for( y in yObj){
myStr += "/" + yObj[y];
}
}
}
return myStr;
},
parseObjectFromUrl : function(url){
var builtObj = {};
//remove first slash
url = url.slice(0, 0) + url.slice(1);
var ch = url.split('&');
var tempParent = {};
for (var p in ch) {
var tempSub = {};
var arr = ch[p].split('=');
var keyParent = arr[0];
var splitInside = arr[1].split(",");
for (var i in splitInside) {
var sub = splitInside[i].split('/');
var subKey = sub[0];
tempSub[subKey] = sub.slice(1);
}
tempParent[keyParent] = tempSub;
}
return tempParent
}
So these the string looks like
/module1=mod1/2/3/4,mod2/2/3/4&module2=mod2/3/4/5
and the object looks like
myObj =
{
module1 : { mod1 : [2,3,4] , mod2 [2,3,4]} ,
module2 : { mod2 : [3,4,5]}
}
So these functions work fine for me however I (unfortunately) need to be able to handle the case when the user adds an "/" into the options like -
myObj =
{
module1 : { mod1 : [2/,3/,4/] , mod2 [2,3,4]} ,
module2 : { mod2 : [3,4,5]}
}
I'm sure it's going to throw a wrench in my function because i'm splitting by the "/", so I'm not sure how to get around this. Would i escape the slash? How would that fit into the functions if so? Looking for any advice on this issue. Thanks!
Edit:
I was able to encode the escaped url like :
obj.replace(/([/-])/g, "%2F");
to an escaped url, hoever I am having trouble doing the reverse of this. here is my attempt.
obj.replace(/(%2F)/g, "/");
in my opinion it would be better to use url arrays, but keep in mind the characters for your url could be limited:
maximum length of HTTP GET request?
having said that one could do something like this:
module1[]=1&module1[]=2&module2[]=4&module2[]=3
this is equal to the following pseudo code:
$_GET["module1"] = array(1,2);
$_GET["module2"] = array(4,3);
and use encodeURIComponent & decodeURIComponent for your values
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent

javascript: how to modify the query string?

Like
?a=1&b=2&c=3
I only want to change b=6 while keep the other things the same, how to do it?
Following function if you have to replace any b=XXX with b=newBValue
function ReplaceB(strQuery,newBValue)
{
var idxStart= strQuery.indexOf("b=")
if(idxStart<0)
return; // b= not found, nothing to change
var idxFin=strQuery.substr(0,idxStart).indexOf("&");
var newQuery;
if(idxFin<0)
newQuery = strQuery.substr(0,idxStart) + "b="+newBValue
else
newQuery = strQuery.substr(0,idxStart) + "b="+newBValue+strQuery.substr(idxStart+idxFin)
return newQuery;
}
here a small function to do this:
jQuery.replaceURLParam = function (oldURL, replaceParam, newVal) {
var iStart = oldURL .indexOf(replaceParam + '=');
var iEnd = oldURL .substring(iStart + 1).indexOf('&');
var sEnd = oldURL .substring(iStart + iEnd + 1);
var sStart = oldURL .substring(0, iStart);
var newURl = sStart + replaceParam + '=' + newVal;
if (iEnd > 0) {
newURl += sEnd;
}
return newURl;
}
document.body.innerHTML = jQuery.replaceURLParam('www.foo.com?a=1&b=2&c=3', 'b', 6) ;
demo: http://jsfiddle.net/3rkrn/
var queryStr = "?a=1&b=2&c=3";
var startTag = "b=";
var endTag = "&";
var index1 = queryStr.indexOf(startTag) + startTag.length;
var index2 = queryStr.indexOf(endTag,index1);
var newValue = 23;
queryStr = queryStr.substr(0, index1) + newValue + queryStr.substr(index2);
alert(queryStr);
See it here : http://jsfiddle.net/aQL8p/
var test = "?a=1&b=2&c=3".replace(/b=2/gi,"b=6");
alert(test);
Example
var test = "?a=1&b=2&c=3".replace("b=2","b=6");
alert(test);
Example
var test = "?a=1&b=2&c=3".split("b=2").join("b=6");
alert(test);
Example
Regardless of Number
Want to change to value of b regardless of number it is already? Use:
var test = "?a=1&b=2&c=3".replace(/b=\d/gi, "b=6");
alert(test);
Example
var queryString = '?a=1&b=2&c=3'.replace(/([?&;])b=2([$&;])/g, '$1b=6$2');
jsFiddle.
(JavaScript regex does not support lookbehinds).
See if this works for you - a more or less universal urlFor:
https://gist.github.com/4108452
You would simply do
newUrl = urlFor(oldUrl, {b:6});

Categories

Resources