Detect difference between & and %26 in location.hash - javascript

Analyzing the location.hash with this simple javascript code:
<script type="text/javascript">alert(location.hash);</script>
I have a difficult time separating out GET variables that contain a & (encoded as %26) and a & used to separate variables.
Example one:
code=php&age=15d
Example two:
code=php%20%26%20code&age=15d
As you can see, example 1 has no problems, but getting javascript to know that "code=php & code" in example two is beyond my abilities:
(Note: I'm not really using these variable names, and changing them to something else will only work so long as a search term does not match a search key, so I wouldn't consider that a valid solution.)

There is no difference between %26 and & in a fragment identifier (‘hash’). ‘&’ is only a reserved character with special meaning in a query (‘search’) segment of a URI. Escaping ‘&’ to ‘%26’ need be given no more application-level visibility than escaping ‘a’ to ‘%61’.
Since there is no standard encoding scheme for hiding structured data within a fragment identifier, you could make your own. For example, use ‘+XX’ hex-encoding to encode a character in a component:
hxxp://www.example.com/page#code=php+20+2B+20php&age=15d
function encodeHashComponent(x) {
return encodeURIComponent(x).split('%').join('+');
}
function decodeHashComponent(x) {
return decodeURIComponent(x.split('+').join('%'));
}
function getHashParameters() {
var parts= location.hash.substring(1).split('&');
var pars= {};
for (var i= parts.length; i-->0;) {
var kv= parts[i].split('=');
var k= kv[0];
var v= kv.slice(1).join('=');
pars[decodeHashComponent(k)]= decodeHashComponent(v);
}
return pars;
}

Testing on Firefox 3.1, it looks as if the browser converts hex codes to the appropriate characters when populating the location.hash variable, so there is no way JavaScript can know how the original was a single character or a hex code.
If you're trying to encode a character like & inside of your hash variables, I would suggest replacing it with another string.
You can also parse the string in weird ways, like (JS 1.6 here):
function pairs(xs) {
return xs.length > 1 ? [[xs[0], xs[1]]].concat(pairs(xs.slice(2))) : []
}
function union(xss) {
return xss.length == 0 ? [] : xss[0].concat(union(xss.slice(1)));
}
function splitOnLast(s, sub) {
return s.indexOf(sub) == -1 ? [s] :
[s.substr(0, s.lastIndexOf(sub)),
s.substr(s.lastIndexOf(sub) + sub.length)];
}
function objFromPairs(ps) {
var o = {};
for (var i = 0; i < ps.length; i++) {
o[ps[i][0]] = ps[i][1];
}
return o;
}
function parseHash(hash) {
return objFromPairs(
pairs(
union(
location.hash
.substr(1)
.split("=")
.map(
function (s) splitOnLast(s, '&')))))
}
>>> location.hash
"#code=php & code&age=15d"
>>> parseHash(location.hash)
{ "code": "php & code", "age": "15d" }

Just do the same as you do with the first example, but after you have split on the & then call unescape() to convert the %26 to & and the %20 to a space.
Edit:
Looks like I'm a bit out of date and you should be using decodeURIComponent() now, though I don't see any clear explanation on what it does differently to unescape(), apart from a suggestion that it doesn't handle Unicode properly.

This worked fine for me:
var hash = [];
if (location.hash) {
hash = location.href.split('#')[1].split('&');
}

Related

Where and why would you use tagged template literals? [duplicate]

I understand the syntax of ES6 tagged templates. What I don't see is the practical usability. When is it better than passing an object parameter, like the settings in jQuery's AJAX? $.ajax('url', { /*this guy here*/ })
Right now I only see the tricky syntax but I don't see why I would need/use it. I also found that the TypeScript team chose to implement it (in 1.5) before other important features. What is the concept behind tagged string templates?
You can use tagged templates to build APIs that are more expressive than regular function calls.
For example, I'm working on a proof-of-concept library for SQL queries on JS arrays:
let admins = sql`SELECT name, id FROM ${users}
WHERE ${user => user.roles.indexOf('admin') >= 0}`
Notice it has nothing to do with String interpolation; it uses tagged templates for readability. It would be hard to construct something that reads as intuitively with plain function calls - I guess you'd have something like this:
let admins = sql("SELECT name, id FROM $users WHERE $filter",
{ $users: users, $filter: (user) => user.roles.contains('admin') })
This example is just a fun side project, but I think it shows some of the benefits of tagged templates.
Another example, maybe more obvious, is i18n - a tagged template could insert locale-sensitive versions of your input.
See Sitepoint's explanation:
The final stage of template strings specification is about adding a custom function before the string itself to create a tagged template string.
...
For instance, here is a piece of code to block strings that try to inject custom DOM elements:
var items = [];
items.push("banana");
items.push("tomato");
items.push("light saber");
var total = "Trying to hijack your site <BR>";
var myTagFunction = function (strings,...values) {
var output = "";
for (var index = 0; index < values.length; index++) {
var valueString = values[index].toString();
if (valueString.indexOf(">") !== -1) {
// Far more complex tests can be implemented here :)
return "String analyzed and refused!";
}
output += strings[index] + values[index];
}
output += strings[index]
return output;
}
result.innerHTML = myTagFunction `You have ${items.length} item(s) in your basket for a total of $${total}`;
Tagged template strings can used for a lot of things like security, localization, creating your own domain specific language, etc.
They're useful because the function can (almost) completely define the meaning of the text inside it (almost = other than placeholders). I like to use the example of Steven Levithan's XRegExp library. It's awkward to use regular expressions defined as strings, because you have to double-escape things: Once for the string literal, and once for regex. This is one of the reasons we have regular expression literals in JavaScript.
For instance, suppose I'm doing maintenance on a site and I find this:
var isSingleUnicodeWord = /^\w+$/;
...which is meant to check if a string contains only "letters." Two problems: A) There are thousands of "word" characters across the realm of human language that \w doesn't recognize, because its definition is English-centric; and B) It includes _, which many (including the Unicode consortium) would argue is not a "letter."
Suppose in my work I've introduced XRegExp to the codebase. Since I know it supports \pL (\p for Unicode categories, and L for "letter"), I might quickly swap this in:
var isSingleUnicodeWord = XRegExp("^\pL+$"); // WRONG
Then I wonder why it didn't work, *facepalm*, and go back and escape that backslash, since it's being consumed by the string literal:
var isSingleUnicodeWord = XRegExp("^\\pL+$");
// ---------------------------------^
What a pain. Suppose I could write the actual regular expression without worrying about double-escaping?
I can: With a tagged template function. I can put this in my standard lib:
function xrex(strings, ...values) {
const raw = strings.raw;
let result = "";
for (let i = 0; i < raw.length; ++i) {
result += raw[i];
if (i < values.length) { // `values` always has one fewer entry
result += values[i];
}
}
return XRegExp(result);
}
Or alternately, this is a valid use case for reduce, and we can use destructuring in the argument list:
function xrex({raw}, ...values) {
return XRegExp(
raw.reduce(
(acc, str, index) => acc + str + (index < values.length ? values[index] : ""),
""
)
);
}
And then I can happily write:
const isSingleUnicodeWord = xrex`^\pL+$`;
Example:
// My tag function (defined once, then reused)
function xrex({raw}, ...values) {
const result = raw.reduce(
(acc, str, index) => acc + str + (index < values.length ? values[index] : ""),
""
);
console.log("Creating with:", result);
return XRegExp(result);
}
// Using it, with a couple of substitutions to prove to myself they work
let category = "L"; // L: Letter
let maybeEol = "$";
let isSingleUnicodeWord = xrex`^\p${category}+${maybeEol}`;
function test(str) {
console.log(str + ": " + isSingleUnicodeWord.test(str));
}
test("Русский"); // true
test("日本語"); // true
test("العربية"); // true
test("foo bar"); // false
test("$£"); // false
<script src="https://cdnjs.cloudflare.com/ajax/libs/xregexp/3.2.0/xregexp-all.min.js"></script>
The only thing I have to remember now is that ${...} is special because it's a placeholder. In this specific case, it's not a problem, I'm unlikely to want to apply a quantifier to the end-of-input assertion, but that's a coincidence...

ES6 tagged templates practical usability

I understand the syntax of ES6 tagged templates. What I don't see is the practical usability. When is it better than passing an object parameter, like the settings in jQuery's AJAX? $.ajax('url', { /*this guy here*/ })
Right now I only see the tricky syntax but I don't see why I would need/use it. I also found that the TypeScript team chose to implement it (in 1.5) before other important features. What is the concept behind tagged string templates?
You can use tagged templates to build APIs that are more expressive than regular function calls.
For example, I'm working on a proof-of-concept library for SQL queries on JS arrays:
let admins = sql`SELECT name, id FROM ${users}
WHERE ${user => user.roles.indexOf('admin') >= 0}`
Notice it has nothing to do with String interpolation; it uses tagged templates for readability. It would be hard to construct something that reads as intuitively with plain function calls - I guess you'd have something like this:
let admins = sql("SELECT name, id FROM $users WHERE $filter",
{ $users: users, $filter: (user) => user.roles.contains('admin') })
This example is just a fun side project, but I think it shows some of the benefits of tagged templates.
Another example, maybe more obvious, is i18n - a tagged template could insert locale-sensitive versions of your input.
See Sitepoint's explanation:
The final stage of template strings specification is about adding a custom function before the string itself to create a tagged template string.
...
For instance, here is a piece of code to block strings that try to inject custom DOM elements:
var items = [];
items.push("banana");
items.push("tomato");
items.push("light saber");
var total = "Trying to hijack your site <BR>";
var myTagFunction = function (strings,...values) {
var output = "";
for (var index = 0; index < values.length; index++) {
var valueString = values[index].toString();
if (valueString.indexOf(">") !== -1) {
// Far more complex tests can be implemented here :)
return "String analyzed and refused!";
}
output += strings[index] + values[index];
}
output += strings[index]
return output;
}
result.innerHTML = myTagFunction `You have ${items.length} item(s) in your basket for a total of $${total}`;
Tagged template strings can used for a lot of things like security, localization, creating your own domain specific language, etc.
They're useful because the function can (almost) completely define the meaning of the text inside it (almost = other than placeholders). I like to use the example of Steven Levithan's XRegExp library. It's awkward to use regular expressions defined as strings, because you have to double-escape things: Once for the string literal, and once for regex. This is one of the reasons we have regular expression literals in JavaScript.
For instance, suppose I'm doing maintenance on a site and I find this:
var isSingleUnicodeWord = /^\w+$/;
...which is meant to check if a string contains only "letters." Two problems: A) There are thousands of "word" characters across the realm of human language that \w doesn't recognize, because its definition is English-centric; and B) It includes _, which many (including the Unicode consortium) would argue is not a "letter."
Suppose in my work I've introduced XRegExp to the codebase. Since I know it supports \pL (\p for Unicode categories, and L for "letter"), I might quickly swap this in:
var isSingleUnicodeWord = XRegExp("^\pL+$"); // WRONG
Then I wonder why it didn't work, *facepalm*, and go back and escape that backslash, since it's being consumed by the string literal:
var isSingleUnicodeWord = XRegExp("^\\pL+$");
// ---------------------------------^
What a pain. Suppose I could write the actual regular expression without worrying about double-escaping?
I can: With a tagged template function. I can put this in my standard lib:
function xrex(strings, ...values) {
const raw = strings.raw;
let result = "";
for (let i = 0; i < raw.length; ++i) {
result += raw[i];
if (i < values.length) { // `values` always has one fewer entry
result += values[i];
}
}
return XRegExp(result);
}
Or alternately, this is a valid use case for reduce, and we can use destructuring in the argument list:
function xrex({raw}, ...values) {
return XRegExp(
raw.reduce(
(acc, str, index) => acc + str + (index < values.length ? values[index] : ""),
""
)
);
}
And then I can happily write:
const isSingleUnicodeWord = xrex`^\pL+$`;
Example:
// My tag function (defined once, then reused)
function xrex({raw}, ...values) {
const result = raw.reduce(
(acc, str, index) => acc + str + (index < values.length ? values[index] : ""),
""
);
console.log("Creating with:", result);
return XRegExp(result);
}
// Using it, with a couple of substitutions to prove to myself they work
let category = "L"; // L: Letter
let maybeEol = "$";
let isSingleUnicodeWord = xrex`^\p${category}+${maybeEol}`;
function test(str) {
console.log(str + ": " + isSingleUnicodeWord.test(str));
}
test("Русский"); // true
test("日本語"); // true
test("العربية"); // true
test("foo bar"); // false
test("$£"); // false
<script src="https://cdnjs.cloudflare.com/ajax/libs/xregexp/3.2.0/xregexp-all.min.js"></script>
The only thing I have to remember now is that ${...} is special because it's a placeholder. In this specific case, it's not a problem, I'm unlikely to want to apply a quantifier to the end-of-input assertion, but that's a coincidence...

I need a JavaScript procedure that reverses the following procedure

I have the following function that encrypts a string and I was hoping for a function that reverses the process.
function encryptStr(thisString)
{
retString = "";
/* Make retString a string of the 8-bit representations of
the ASCII values of its thisCharacters in order.
EXAMPLE: "abc" --> "011000010110001001100011"
since the ASCII values for 'a', 'b' and 'c'
are 97=01100001, 98=01100010 and 99=01100011
respectively
*/
for (i = 0, j = thisString.length; i < j; i++)
{
bits = thisString.charCodeAt(i).toString(2);
retString += new Array(8-bits.length+1).join('0') + bits;
}
/* Compress retString by taking each substring of 3, 4, ..., 9
consecutive 1's or 0's and it by the number of such consecutive
thisCharacters followed by the thisCharacter.
EXAMPLES:
"10101000010111" --> "10101401031"
"001100011111111111111" --> "0011319151"
*/
retString = retString.replace(/([01])\1{2,8}/g, function($0, $1) { return ($0.length + $1);});
return retString;
}
I tried to make a function and I'm probably doing it wrong because it's 50 lines already. I'm realizing that there's tons of error checking that needs to go on. For instance, I just realized a potential problem because JavaScript characters don't span the entire 127 ASCII values. Should I just give up? Is this a futile problem?
First, find the numbers in the string which are not 0 or 1. Then, expand them in the opposite way that the original function collapsed them. You can again use String.prototype.replace() here with a replacement function...
str.replace(/([2-9])([01])/g,
function(all, replacementCount, bit) {
return Array(+replacementCount + 1).join(bit);
});
Then, simply decode the bit stream back into characters with String.fromCharCode(). You'd need to chunk the stream into 8 bit chunks, and then perform the conversion. I chose to use Array.prototype.reduce() as it's quite suited to this task. Alternatively, you could use String.fromCharCode.apply(String, chunks.map(function(byte) { return parseInt(byte, 2); })) to get the resulting string.
Something like...
str.split(/(.{8})/g).reduce(function(str, byte) {
return str + String.fromCharCode(parseInt(byte, 2));
}, "");
Put it together, and you get a function like...
function decryptStr(thisString) {
return thisString.replace(/([2-9])([01])/g,
function (all, replacementCount, bit) {
return Array(+replacementCount + 1).join(bit);
}).split(/(.{8})/g).reduce(function (str, byte) {
return str + String.fromCharCode(parseInt(byte, 2));
}, "");
}
jsFiddle.
Also, remember to place var in front of your variable declarations, otherwise those variable identifiers will leak to the containing scope until they're resolved (which is usually the global object).

Firefox pref is destroying JSON

I have the following JSON: http://pastebin.com/Sh20StJY
SO removed the chars on my post, so look at the link for the real JSON
which was generated using JSON.stringify and saved on Firefox prefs (pref.setCharPref(prefName, value);)
The problem is that when I save the value, Firefox does something that corrupts the JSON. If I try a JSON.parse retrieving the value from the config I get an error:
Error: JSON.parse: bad control character in string literal
If I try to validate the above JSON (which was retrieved from the settings) I get an error at line 20, the tokens value contains two invalid characters.
If I try a JSON.parse immediately after JSON.stringify the error doesn't occur.
Do I have to set something to save in a different encoding? How can I fix it?
nsIPrefBranch.getCharPref() only works for ASCII data, your JSON data contains some non-ASCII characters however. You can store Unicode data in preferences, it is merely a little bit more complicated:
var str = Components.classes["#mozilla.org/supports-string;1"]
.createInstance(Components.interfaces.nsISupportsString);
str.data = value;
pref.setComplexValue(prefName, Components.interfaces.nsISupportsString, str);
And to read that preference:
var str = pref.getComplexValue(prefName, Components.interfaces.nsISupportsString);
var value = str.data;
For reference: Documentation
Your JSON appears to contain non-ASCII characters such as ½. Can you check what encoding everything is being handled in?
nsIPrefBranch.setCharPref() assumes that its input is UTF-8 encoded, and the return value of nsIPrefBranch.getCharPref() is always an UTF-8 string. If your input is a bytestring or a character in some other encoding, you will either need to switch to UTF-8, or encode and decode it yourself when interacting with preferences.
I did this in one place to fix this issue:
(function overrideJsonParse() {
if (!window.JSON || !window.JSON.parse) {
window.setTimeout(overrideJsonParse, 1);
return; //this code has executed before JSON2.js, try again in a moment
}
var oldParse = window.JSON.parse;
window.JSON.parse = function (s) {
var b = "", i, l = s.length, c;
for (i = 0; i < l; ++i) {
c = s[i];
if (c.charCodeAt(0) >= 32) { b += c; }
}
return oldParse(b);
};
}());
This works in IE8 (using json2 or whatever), IE9, Firefox and Chrome.
The code seems correct. Try use single quotes '..': '...' instead of double quotes "..":"..." .
I still couldn't find the solution, but I found a workaround:
var b = "";
[].forEach.call("{ JSON STRING }", function(c, i) {
if (c.charCodeAt(0) >= 32)
b += c;
});
Now b is the new JSON, and might work...

Convert JavaScript String to be all lowercase

How can I convert a JavaScript string value to be in all lowercase letters?
Example: "Your Name" to "your name"
var lowerCaseName = "Your Name".toLowerCase();
Use either toLowerCase or toLocaleLowerCase methods of the String object. The difference is that toLocaleLowerCase will take current locale of the user/host into account. As per § 15.5.4.17 of the ECMAScript Language Specification (ECMA-262), toLocaleLowerCase…
…works exactly the same as toLowerCase
except that its result is intended to
yield the correct result for the host
environment’s current locale, rather
than a locale-independent result.
There will only be a difference in the
few cases (such as Turkish) where the
rules for that language conflict with
the regular Unicode case mappings.
Example:
var lower = 'Your Name'.toLowerCase();
Also note that the toLowerCase and toLocaleLowerCase functions are implemented to work generically on any value type. Therefore you can invoke these functions even on non-String objects. Doing so will imply automatic conversion to a string value prior to changing the case of each character in the resulting string value. For example, you can apply toLowerCase directly on a date like this:
var lower = String.prototype.toLowerCase.apply(new Date());
and which is effectively equivalent to:
var lower = new Date().toString().toLowerCase();
The second form is generally preferred for its simplicity and readability. On earlier versions of IE, the first had the benefit that it could work with a null value. The result of applying toLowerCase or toLocaleLowerCase on null would yield null (and not an error condition).
Yes, any string in JavaScript has a toLowerCase() method that will return a new string that is the old string in all lowercase. The old string will remain unchanged.
So, you can do something like:
"Foo".toLowerCase();
document.getElementById('myField').value.toLowerCase();
toLocaleUpperCase() or lower case functions don't behave like they should do. For example, on my system, with Safari 4, Chrome 4 Beta, and Firefox 3.5.x, it converts strings with Turkish characters incorrectly. The browsers respond to navigator.language as "en-US", "tr", "en-US" respectively.
But there isn't any way to get user's Accept-Lang setting in the browser as far as I could find.
Only Chrome gives me trouble although I have configured every browser as tr-TR locale preferred. I think these settings only affect the HTTP header, but we can't access to these settings via JavaScript.
In the Mozilla documentation it says "The characters within a string are converted to ... while respecting the current locale. For most languages, this will return the same as ...". I think it's valid for Turkish, and it doesn't differ if it's configured as en or tr.
In Turkish it should convert "DİNÇ" to "dinç" and "DINÇ" to "dınç" or vice-versa.
Just an example for toLowerCase(), toUpperCase() and a prototype for the not yet available toTitleCase() or toProperCase():
String.prototype.toTitleCase = function() {
return this.split(' ').map(i => i[0].toUpperCase() + i.substring(1).toLowerCase()).join(' ');
}
String.prototype.toPropperCase = function() {
return this.toTitleCase();
}
var OriginalCase = 'Your Name';
var lowercase = OriginalCase.toLowerCase();
var upperCase = lowercase.toUpperCase();
var titleCase = upperCase.toTitleCase();
console.log('Original: ' + OriginalCase);
console.log('toLowerCase(): ' + lowercase);
console.log('toUpperCase(): ' + upperCase);
console.log('toTitleCase(): ' + titleCase);
I paid attention that lots of people are looking for strtolower() in JavaScript. They are expecting the same function name as in other languages, and that's why this post is here.
I would recommend using a native JavaScript function:
"SomE StriNg".toLowerCase()
Here's the function that behaves exactly the same as PHP's one (for those who are porting PHP code into JavaScript)
function strToLower (str) {
return String(str).toLowerCase();
}
Methods or functions: toLowerCase() and toUpperCase()
Description: These methods are used to cover a string or alphabet from lowercase to uppercase or vice versa. E.g., "and" to "AND".
Converting to uppercase:
Example code:
<script language=javascript>
var ss = " testing case conversion method ";
var result = ss.toUpperCase();
document.write(result);
</script>
Result: TESTING CASE CONVERSION METHOD
Converting to lowercase:
Example Code:
<script language=javascript>
var ss = " TESTING LOWERCASE CONVERT FUNCTION ";
var result = ss.toLowerCase();
document.write(result);
</script>
Result: testing lowercase convert function
Explanation: In the above examples,
toUpperCase() method converts any string to "UPPER" case letters.
toLowerCase() method converts any string to "lower" case letters.
Note that the function will only work on string objects.
For instance, I was consuming a plugin, and was confused why I was getting a "extension.tolowercase is not a function" JavaScript error.
onChange: function(file, extension)
{
alert("extension.toLowerCase()=>" + extension.toLowerCase() + "<=");
Which produced the error "extension.toLowerCase is not a function". So I tried this piece of code, which revealed the problem!
alert("(typeof extension)=>" + (typeof extension) + "<=");;
The output was "(typeof extension)=>object<=" - so aha, I was not getting a string var for my input. The fix is straightforward though - just force the darn thing into a String!:
var extension = String(extension);
After the cast, the extension.toLowerCase() function worked fine.
Option 1: Using toLowerCase()
var x = 'ABC';
x = x.toLowerCase();
Option 2: Using your own function
function convertToLowerCase(str) {
var result = '';
for (var i = 0; i < str.length; i++) {
var code = str.charCodeAt(i);
if (code > 64 && code < 91) {
result += String.fromCharCode(code + 32);
} else {
result += str.charAt(i);
}
}
return result;
}
Call it as:
x = convertToLowerCase(x);
Simply use JS toLowerCase()
let v = "Your Name"
let u = v.toLowerCase(); or
let u = "Your Name".toLowerCase();
const str = 'Your Name';
// convert string to lowercase
const lowerStr = str.toLowerCase();
// print the new string
console.log(lowerStr);
In case you want to build it yourself:
function toLowerCase(string) {
let lowerCaseString = "";
for (let i = 0; i < string.length; i++) {
// Find ASCII charcode
let charcode = string.charCodeAt(i);
// If uppercase
if (charcode > 64 && charcode < 97) {
// Convert to lowercase
charcode = charcode + 32
}
// Back to char
let lowercase = String.fromCharCode(charcode);
// Append
lowerCaseString = lowerCaseString.concat(lowercase);
}
return lowerCaseString
}
You can use the in built .toLowerCase() method on JavaScript strings. Example:
var x = "Hello";
x.toLowerCase();
Try this short way:
var lower = (str+"").toLowerCase();
Try
<input type="text" style="text-transform: uppercase"> <!-- uppercase -->
<input type="text" style="text-transform: lowercase"> <!-- lowercase -->
Demo - JSFiddle

Categories

Resources