regular expression to extract all the attributes of a div - javascript

I have a requirement to extract the all the attributes of some tag. so i want to go for regex for this.for example <sometag attr1="val1" attr2="val2" ></sometag>. i want the attributes and values as name value pairs.
Any help appreciated
thanks

var s = '<sometag attr1="val1" attr2="val2" ></sometag>';
var reg = /\s(\w+?)="(.+?)"/g;
while( true ) {
var res = reg.exec( s );
if( res !== null ) {
alert( 'name = '+res[1] );
alert( 'value = '+res[2] );
} else {
break;
}
}

preg_match_all( '/\s(\w+?)="(.+?)"/', '<sometag attr1="val1" attr2="val2" ></sometag>', $matches );
for( $i = 0; $i < count( $matches[1] ); ++$i ) {
$name = $matches[1][$i];
$value = $matches[2][$i];
echo 'name'.$i.' = "'.$name.'", value'.$i.' = "'.$value.'", ';
}
result:
name0 = "attr1", value0 = "val1", name1 = "attr2", value1 = "val2",
of course you need to tweak this to fit your need and deal with bad html.

You could use [jquery][1] to get all attrubutes of an element
$('sometag').getAttributes();
http://plugins.jquery.com/project/getAttributes

A regex is not required. Much easier, use Element.attributes():
var attributes = element.attributes();
"Returns an array (NamedNodeMap) containing all the attributes defined for the element in question, including custom attributes." See the link for examples on how to access each attribute and it's value.

You can't do this in native JavaScript by using a regular expression. Using native JavaScript you have a couple of basic options. You can enumerate all of the node's properties and intelligently filter to get just the things you want, like:
window.extractAttributes = function(node) {
var attribString = "";
var template = document.createElement(node.tagName);
template.innerHTML = node.innerHTML;
for (var key in node) {
if (typeof node[key] == "string" && node[key] != "" && ! template[key]) {
if (attribString.length > 0) {
attribString += ", ";
}
attribString += key + "=" + node[key];
}
}
return attribString;
};
Or you can use Element.attributes to iterate the list of declared attributes (note that this may not detect non-standard attribute values that are added dynamically at runtime), like:
window.extractAttributesAlternate = function(node) {
var attribString = "";
for (var index = 0; index < node.attributes.length; index++) {
if (attribString.length > 0) {
attribString += ", ";
}
attribString += node.attributes[index].name+ "=" + node.attributes[index].nodeValue;
}
return attribString;
};
Note that the first approach may not pick up custom attributes that have been defined in the page markup, and that the second approach may not pick up custom attributes that have been defined dynamically by JavaScript on the page.
Which gives us option 3. You can enumerate the attributes both ways, and then merge the results. This has the benefit of being able to reliably pick up upon any custom attributes no matter when/how they were added to the element.
Here's an example of all 3 options: http://jsfiddle.net/cgj5G/3/

You can use XML parser, because provided input is well-formed XML.

Do not use Regex for this! Javacript's DOM already has all the information you need, easily accessible.
List all attributes of a DOM element:
var element = document.getElementById('myElementName');
var attributes = element.attributes;
for(var attr=0; attr<attributes.length; attr++) {
alert(attributes[attr].name+" = "+attributes[attr].nodeValue);
}
(tested above code in FF5, IE8, Opera11.5, Chrome12: Works in all of them, even with non-standard attributes)

Given the text of a single element which includes its start and end tags (equivalent of the element's outerHTML), the following function will return an object containing all of the attribute name=value pairs. Each attribute value can be single quoted, double quoted or un-quoted. Attribute values are optional and if not present will take on the attribute's name.
function getElemAttributes(elemText) {
// Regex to pick out start tag from start of element's HTML.
var re_start_tag = /^<\w+\b(?:\s+[\w\-.:]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[\w\-.:]+))?)*\s*\/?>/;
var start_tag = elemText.match(re_start_tag);
start_tag = start_tag ? start_tag[0] : '';
// Regex to pick out attribute name and (optional) value from start tag.
var re_attribs = /\s+([\w\-.:]+)(\s*=\s*(?:"([^"]*)"|'([^']*)'|([\w\-.:]+)))?/g;
var attribs = {}; // Store attribute name=value pairs in object.
var match = re_attribs.exec(start_tag);
while (match != null) {
var attrib = match[1]; // Attribute name in $1.
var value = match[1]; // Assume no value specified.
if (match[2]) { // If match[2] is set, then attribute has a value.
value = match[3] ? match[3] : // Attribute value is in $3, $4 or $5.
match[4] ? match[4] : match[5];
}
attribs[attrib] = value;
match = re_attribs.exec(start_tag);
}
return attribs;
}
Given this input:
<sometag attr1="val1" attr2='val2' attr3=val3 attr4 >TEST</sometag>
This is the output:
attrib = {
attr1: "val1",
attr2: "val2",
attr3: "val3",
attr4: "attr4"
};

Related

Converting a badly stringfied json to a json object

I have some data i am pulling from a web service. This is the string
(Body:'3886' MessageProperties [headers={}, timestamp=null,
messageId=null, userId=null, receivedUserId=null, appId=null,
clusterId=null, type=null, correlationId=null,
correlationIdString=null, replyTo=null,
contentType=application/x-java-serialized-object,
contentEncoding=null, contentLength=0, deliveryMode=null,
receivedDeliveryMode=PERSISTENT, expiration=null, priority=0,
redelivered=false, receivedExchange=,
receivedRoutingKey=bottomlesspit, receivedDelay=null, deliveryTag=62,
messageCount=0, consumerTag=amq.ctag-sCwfLaMEqWp2GkFwFrY1yg,
consumerQueue=bottomlesspit])
It looks like json but the key value pairs are almost fine but the most important key which is Body isn't like other keys as the string would tell.
I need to read the value of Body and be able to get the value like this
console.log(d.body);
//This above outputs the string as shown
obj = eval('{' + d.body + '}');
console.log(obj);
var match = "Body";
var val = obj.find( function(item) { return item.key == match } );
console.log(val);
How can i read the value of the key Body?.
Use this regular expression instead of a match Body:
\bBody:'(\d*)'
This will catch the Body number in group 1.
You can write a parser function get string and extract values. A very simple function is here. You can modify it also for all exceptions exist.
var str = `(Body:'3886' MessageProperties [headers={}, timestamp=null, messageId=null, userId=null, receivedUserId=null, appId=null, clusterId=null, type=null, correlationId=null, correlationIdString=null, replyTo=null, contentType=application/x-java-serialized-object, contentEncoding=null, contentLength=0, deliveryMode=null, receivedDeliveryMode=PERSISTENT, expiration=null, priority=0, redelivered=false, receivedExchange=, receivedRoutingKey=bottomlesspit, receivedDelay=null, deliveryTag=62, messageCount=0, consumerTag=amq.ctag-sCwfLaMEqWp2GkFwFrY1yg, consumerQueue=bottomlesspit])`;
function f(inp) {
var index = str.indexOf(inp),
endIndex;
for(var i = index; i < str.length; i ++) {
if(str[i] == ',') {
endIndex = i;
break;
}
}
var output = str.substr(index, endIndex).split('=');
return output;
}
console.log(f('consumerQueue'));
Why not use a regex to match and extract the Body.
Example:
const match = d.body.match(/Body:\'(.+)\'/)
if (match) {
const body = match[1] // This is the value of Body
} else {
// Unable to find Body, handle it here
}

How to remove duplicates from text using regex, match and exec in javascript

My javascript knowledge is limited.
I use this code :
var myString = $("#builder-area-center-frame-content").html();
var pattern = /{{(.*?)}}/g;
var match;
while ((match = pattern.exec(myString)) !== null ){
$("#sim-edit-variable-replace").show();
$("#sim-edit-variable-replace select[name=variable_element]").append('<option value="'+ match[0] +'">'+ match[1] +'</option>');
}
'myString' is the container of my div #builder-area-center-frame-content,
and in my div there are elements like that {{FOO}} or {{TOTO}}, etc...
I have a form with a select and in my loop i create option attribut for each {{XXX}} found.
But if there are, for example, 2 {{FOO}}, the option attribut will be repeat twice. Like this example :
<select class="form-control" name="variable_element"><option value="{{FOO}}">FOO</option><option value="{{FOO}}">FOO</option><option value="{{toto}}">toto</option></select>
How can i remove duplicate {{FOO}} to keep only one ?
Sorry for my bad english, i hope you will understand what i want to do ?
EDIT : I tried your code but i have a lot of error of rendering about 'let'. I'm using NetBeans.
function variable_replace() {
let str = $("#builder-area-center-frame-content").html();
let collection = str.match(/{{[aA-zZ]+}}/g).filter((item, index, self) => index === self.indexOf(item));
if (collection.length > 0) {
$('#sim-edit-variable-replace').show();
let elem = $('#sim-edit-variable-replace select[name=variable_element]');
for (let item of collection) {
elem.append('<option value="${item.replace(/[{}]/g, '')}">${item}</option>');
}
}
}
variable_replace();
If you can get the string value of the content of your element, you can use a regex to match for the input and use Array#filter to check if there are any duplicates.
let str = '{{TODO}}{{FOO}}{{FOO}} {{TODO}}';
let collection = str.match(/{{[aA-zZ]+}}/g).filter((item, index, self) => index === self.indexOf(item));
console.log(collection);
EDIT:
You can replace the while loop with an for loop and iterate over all items in the array to append new options to the selection element.
if (collection.length > 0) {
$('#sim-edit-variable-replace').show();
let elem = $('#sim-edit-variable-replace select[name=variable_element]');
for (let item of collection) {
elem.append(`<option value="${item.replace(/[{}]/g, '')}">${item}</option>`);
}
}

Get multiple values from hash in URL

www.domain.com/lookbook.html#look0&product1
On page load I would like to grab the whole hash ie. #look0&product1
then split it up and save the number of the look ie 0 in a variable called var look and the number of the product ie 1 in another variable called var product. Not sure how to achieve this.
Is this also the best way of passing and retrieving such parameters? Thanks
Use var myHash = location.hash to get hash part of URL. Than do var params = myHash.split('&') and after that for each part do part.split('=') to get key-value pairs.
Maybe it's better to pass these parameters via GET from PHP side and than post them inside page when page is processed via PHP?
<input type="hidden" name="look" value="<?php echo isset($_GET['look']) ? $_GET['look'] : '';?>"/>
Here's the pure Javascript method:
function parseHash(hash) {
// Remove the first character (i.e. the prepended "#").
hash = hash.substring(1, hash.length);
// This is where we will store our properties and values.
var hashObj = {};
// Split on the delimiter "&" and for each key/val pair...
hash.split('&').forEach(function(q) {
// Get the property by splitting on all numbers and taking the first entry.
var prop = q.split(/\d/)[0];
// Get the numerical value by splitting on all non-numbers and taking the last entry.
var val_raw = q.split(/[^\d]/);
var val = val_raw[val_raw.length - 1]
// If the property and key are defined, add the key/val pair to our final object.
if (typeof prop !== 'undefined' && typeof val !== 'undefined') {
hashObj[prop] = +val;
}
});
return hashObj;
}
Use like:
parseHash(window.location.hash /* #book10&id1483 */)
/* returns: Object {book: 10, id: 1483} */
I suggest using the norm for passing values through the location's hash: prop=value. Ex: #book=10&id=311. Then you can easily split on = for each property.
You can use .match(re) method with use of regular expression to extract the number from the given string.
You can try this:
var hashes = location.hash.split('&'); // get the hash and split it to make array
var values = hashes.map(function(hash){ // use .map to iterate and get a new array
return hash.match(/\d+/)[0]; // returns the numbers from the string.
});
var loc = "look0345345345&product1";
var hashes = loc.split('&');
var values = hashes.map(function(hash){ return hash.match(/\d+/)[0]; });
document.body.innerHTML = '<pre>'+ JSON.stringify(values) + '</pre>';
You could try this:
var url = 'www.domain.com/lookbook.html#look0&product1'
, result = {}
, expr = RegExp(/[#&]([a-zA-z]+)(\d+)/g);
var parts = expr.exec(url);
while(parts != null && parts.length == 3) {
result[parts[1]] = parts[2];
parts = expr.exec(url);
}
var look = result['look']
, product = result['product'];
document.getElementById('result').innerHTML = 'look = ' + look + '<br>' + 'product = ' + product;
<p id='result'></p>
We are basically using a regular expression to divide the parameter name and value into two groups that we can then get by calling expr.exec(url).
Each time we call expr.exec(url), we get the next set of name and value groups.
We set the value of the parameter to its name in the result object.
In the regular expression /[#&]([a-zA-z]+)(\d+)/g, the g after the /.../ means match each time find the two groups.
The two groups are prefaced by either & or # ([#&]). The first group is a String of letters ([a-zA-z]+), the name of the parameter. The second is a String of numbers (\d+), the value you are looking for.
The regex returns the String that matches the pattern as the first result in the parts array, followed by the groups matched, which which means that our two groups in each iteration will be parts[1] and parts[2].
you should use:
function parseHash(hash){
hash = hash.substring(1, hash.length); //remove first character (#)
var obj ={}; //create the output
var qa = hash.split('&'); //split all parameters in an array
for(var i = 0; i < qa.length; i++){
var fra1 = qa[i].split('='); //split every parameter into [parameter, value]
var prop = fra1[0];
var value = fra1[1];
if(/[0-9]/.test(value) && !isNaN(value)){ //check if is a number
value = parseInt(value);
}
obj[prop] = value; //add the parameter to the value
}
return obj;
}
document.querySelector("input.hash").onkeyup = function(){
console.log( parseHash(document.querySelector("input.hash").value));
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<input type="text" class="hash"/>
<p class="output"></p>
use as
parseHash(location.hash /* #look=0&product=1 );
/returns {look: 0, product: 1}/

Javascript : Select Element in URL with multiple instance of the same element

i need to retrieve a value from an URL in JS, my problem is in this url, the element is repeated at least twice with different value. What i need is the last one.
Example :
http://randomsite.com/index.php?element1=random1&element2=random2&element1=random3
and what i want is "random3" and NOT "random1"
I've tried url.match(/.(\&|\?)element1=(.?)\&/)[2];
But it always gives me the first one :(
I don't have the possibility to change how the url is written as this is for a browser extension.
var ws = "http://randomsite.com/index.php?element1=random1&element2=random2&element1=random3",
input = ws.split("?")[1].split("&"),
dataset = {},
val_to_find = "element1";
for ( var item in input){
var d = input[item].split("=");
if (!dataset[d[0]]){ dataset[d[0]] = new Array(); dataset[d[0]].push(d[1]); }
else{
dataset[d[0]].push(d[1]);
}
}
console.log("item: ", dataset[val_to_find][dataset[val_to_find].length -1]);
return dataset[val_to_find][dataset[val_to_find].length -1];
http://jsfiddle.net/wMuHW/
Take the minimum value (other than -1) from urlString.lastIndexOf("&element1=") and urlString.lastIndexOf("?element1="), then use urlString.substring.
Or alternately, split the string up:
var parts = urlString.split(/[?&]/);
...which will give you:
[
"http://randomsite.com/index.php",
"element1=random1",
"element2=random2",
"element1=random3"
]
...then start looping from the end of the array finding the first entry that starts with element= and grabbing the bit after the = (again with substring).
You could;
for (var result, match, re = /[&?]element1=(.+?)(\&|$)/g; match = re.exec(url);) {
result = match[1];
}
alert(result);
Id try keeping a nested array of duplicate elements
function parseQueryString() {
var elements = {},
query = window.location.search.substring(1),
vars = query.split('&');
for (var i = 0; i < vars.length; i++) {
var pair = vars[i].split('='),
key = decodeURIComponent(pair[0]),
value = decodeURIComponent(pair[1]);
if (elements.hasOwnProperty(key)) {
elements[key].push(value);
}
else {
elements[key] = [value];
}
}
}
Used on: www.example.com?element1=hello&element2=test&element1=more
Would give you the object:
{
element1: [
"hello",
"more"
],
element2:[
"test"
]
}

Getting Attributes from User submitted text! RegExp?

I am trying to pull the attributes out of piece of submitted text in Javascript and change it to an array.
So the user submits this:
<iframe src="http://www.stackoverflow.com/" width="123" height="123" frameborder="1"></iframe>
and I would get:
arr['src'] = http://www.stackoverflow.com/
arr['width'] = 123
arr['height'] = 123
arr['frameborder'] = 1
Just need a regexp I think but any help would be great!
I recommend to use a RegExp to parse user-inputed HTML, instead of creating a DOM object, because it's not desired to load external content (iframe, script, link, style, object, ...) when performing a "simple" task such as getting attribute values of a HTML string.
Using similar (although similarcontradiction?) methods as in my previous answer, I've created a function to match quoted attribute values. Both quoted, as non-quoted attributes are matched.
The code currently returns an object with attributes from the first tag, but it's easily extensible to retrieve all HTML elements (see bottom of answer).
Fiddle: http://jsfiddle.net/BP4nF/1/
// Example:
var htmlString = '<iframe src="http://www.stackoverflow.com/" width="123" height="123" frameborder="1" non-quoted=test></iframe>';
var arr = parseHTMLTag(htmlString);
//arr is the desired object. An easy method to verify:
alert(JSON.stringify(arr));
function parseHTMLTag(htmlString){
var tagPattern = /<[a-z]\S*(?:[^<>"']*(?:"[^"]*"|'[^']*'))*?[^<>]*(?:>|(?=<))/i;
var attPattern = /([-a-z0-9:._]+)\s*=(?:\s*(["'])((?:[^"']+|(?!\2).)*)\2|([^><\s]+))/ig;
// 1 = attribute, 2 = quote, 3 = value, 4=non-quoted value (either 3 or 4)
var tag = htmlString.match(tagPattern);
var attributes = {};
if(tag){ //If there's a tag match
tag = tag[0]; //Match the whole tag
var match;
while((match = attPattern.exec(tag)) !== null){
//match[1] = attribute, match[3] = value, match[4] = non-quoted value
attributes[match[1]] = match[3] || match[4];
}
}
return attributes;
}
The output of the example is equivalent to:
var arr = {
"src": "http://www.stackoverflow.com/",
"width": "123",
"height": "123",
"frameborder": "1",
"non-quoted": "test"
};
Extra: Modifying the function to get multiple matches (only showing code to update)
function parseHTMLTags(htmlString){
var tagPattern = /<([a-z]\S*)(?:[^<>"']*(?:"[^"]*"|'[^']*'))*?[^<>]*(?:>|(?=<))/ig;
// 1 = tag name
var attPattern = /([-a-z0-9:._]+)\s*=(?:\s*(["'])((?:[^"']+|(?!\2).)*)\2|([^><\s]+))/ig;
// 1 = attribute, 2 = quote, 3 = value, 4=non-quoted value (either 3 or 4)
var htmlObject = [];
var tag, match, attributes;
while(tag = tagPattern.exec(htmlString)){
attributes = {};
while(match = attPattern.exec(tag)){
attributes[match[1]] = match[3] || match[4];
}
htmlObject.push({
tagName: tag[1],
attributes: attributes
});
}
return htmlObject; //Array of all HTML elements
}
Assuming you're doing this client side, you're better off not using RegExp, but using the DOM:
var tmp = document.createElement("div");
tmp.innerHTML = userStr;
tmp = tmp.firstChild;
console.log(tmp.src);
console.log(tmp.width);
console.log(tmp.height);
console.log(tmp.frameBorder);
Just make sure you don't add the created element to the document without sanitizing it first. You might also need to loop over the created nodes until you get to an element node.
Assuming they will always enter an HTML element you could parse it and read the elements from the DOM, like so (untested):
var getAttributes = function(str) {
var a={}, div=document.createElement("div");
div.innerHTML = str;
var attrs=div.firstChild.attributes, len=attrs.length, i;
for (i=0; i<len; i++) {
a[attrs[i].nodeName] = attrs[i].nodeValue];
}
return a;
};
var x = getAttributes(inputStr);
x; // => {width:'123', height:123, src:'http://...', ...}
Instead of regexp, use pure JavaScript:
Grab iframe element:
var iframe = document.getElementsByTagName('iframe')[0];
and then access its properties using:
var arr = {
src : iframe.src,
width : iframe.width,
height : iframe.height,
frameborder : iframe.frameborder
};
I would personally do this with jQuery, if possible. With it, you can create a DOM element without actually injecting it into your page and creating a potential security hazard.
var userTxt = '<iframe src="http://www.stackoverflow.com/" width="123" height="123" frameborder="1"></iframe>';
var userInput = $(userTxt);
console.log(userInput.attr('src'));
console.log(userInput.attr('width'));
console.log(userInput.attr('height'));
console.log(userInput.attr('frameborder'));

Categories

Resources