convert css units - javascript

i'm trying to get back a style property in all valid 'length' and 'percent' units, converted from the original value set for that property.
e.g., if i have a div with style.width set to 20%, i'd want back an object with that value in percent (of course, 20%), pixels (whatever the actual pixel width is), em, pt, ex, etc.
i realize that 'percentage' is not a 'length' value, and that not all properties that accept length values accept percentage, but want to include that as well.
of course, some values will be dependent on the element specifically, and possibly it's position in the DOM (e.g., getting the em value will require that element's parent computed font size as well).
i can assume that the style is set explicitly for the element - i'm aware of how to retrieve the current computed style of an element - i'm just hoping to not repeat work someone else has probably already done. i'm also aware of http://www.galasoft.ch/myjavascript/WebControls/css-length.html, but it relies on style.pixelWidth or node.clientWidth, and fails in Chrome (I'd assume it fails in Safari as well... and probably others).
i've already got color values worked out (rgb, rgba, hex, name) - this is of course a lot more straightforward. i'm working with values that are mathematically mutable, so really only need 'length' and 'percent' values (if called on a property set with a non-length, non-percent value - like 'font-size: larger' - the function could fail, or throw an error).
if written procedurally, something like this would be ideal:
function getUnits(target, prop){
var value = // get target's computed style property value
// figure out what unit is being used natively, and it's values - for this e.g., 100px
var units = {};
units.pixel = 100;
units.percent = 50; // e.g., if the prop was height and the parent was 200px tall
units.inch = 1.39; // presumably units.pixel / 72 would work, but i'm not positive
units.point = units.inch / 72;
units.pica = units.point * 12;
// etc...
return units;
}
I'm not asking for someone to write code for me, but my hope is that someone has already done this before and it's available in some open-source library, framework, blog post, tut, whatever. failing that, if someone has a clever idea how to streamline the process, that'd be great as well (the author of the link above created a temporary div and computed a single value to determine the ratios for other units - a handy idea but not one i'm entirely sold on, and definitely one that'd need supplemental logic to handle everything i'm hoping accept).
thanks in advance for any insight or suggestions.

EDIT: updated to allow user to pick a single unit to be returned (e.g., exists as %, get back in px) - big improvement in performance for when that's enough - might end up changing it to just accept a single unit to convert, and get rid the loops. Thanks to eyelidlessness for his help. /EDIT
this is what i've come up with - after preliminary testing it appears to work. i borrowed the temporary div idea from the link mentioned in the original question, but that's about all that was taken from that other class.
if anyone has any input or improvements, i'd be happy to hear it.
(function(){
// pass to string.replace for camel to hyphen
var hyphenate = function(a, b, c){
return b + "-" + c.toLowerCase();
}
// get computed style property
var getStyle = function(target, prop){
if(prop in target.style){ // if it's explicitly assigned, just grab that
if(!!(target.style[prop]) || target.style[prop] === 0){
return target.style[prop];
}
}
if(window.getComputedStyle){ // gecko and webkit
prop = prop.replace(/([a-z])([A-Z])/, hyphenate); // requires hyphenated, not camel
return window.getComputedStyle(target, null).getPropertyValue(prop);
}
if(target.currentStyle){ // ie
return target.currentStyle[prop];
}
return null;
}
// get object with units
var getUnits = function(target, prop, returnUnit){
var baseline = 100; // any number serves
var item; // generic iterator
var map = { // list of all units and their identifying string
pixel : "px",
percent : "%",
inch : "in",
cm : "cm",
mm : "mm",
point : "pt",
pica : "pc",
em : "em",
ex : "ex"
};
var factors = {}; // holds ratios
var units = {}; // holds calculated values
var value = getStyle(target, prop); // get the computed style value
var numeric = value.match(/\d+/); // get the numeric component
if(numeric === null) { // if match returns null, throw error... use === so 0 values are accepted
throw "Invalid property value returned";
}
numeric = numeric[0]; // get the string
var unit = value.match(/\D+$/); // get the existing unit
unit = (unit == null) ? "px" : unit[0]; // if its not set, assume px - otherwise grab string
var activeMap; // a reference to the map key for the existing unit
for(item in map){
if(map[item] == unit){
activeMap = item;
break;
}
}
if(!activeMap) { // if existing unit isn't in the map, throw an error
throw "Unit not found in map";
}
var singleUnit = false; // return object (all units) or string (one unit)?
if(returnUnit && (typeof returnUnit == "string")) { // if user wants only one unit returned, delete other maps
for(item in map){
if(map[item] == returnUnit){
singleUnit = item;
continue;
}
delete map[item];
}
}
var temp = document.createElement("div"); // create temporary element
temp.style.overflow = "hidden"; // in case baseline is set too low
temp.style.visibility = "hidden"; // no need to show it
target.parentNode.appendChild(temp); // insert it into the parent for em and ex
for(item in map){ // set the style for each unit, then calculate it's relative value against the baseline
temp.style.width = baseline + map[item];
factors[item] = baseline / temp.offsetWidth;
}
for(item in map){ // use the ratios figured in the above loop to determine converted values
units[item] = (numeric * (factors[item] * factors[activeMap])) + map[item];
}
target.parentNode.removeChild(temp); // clean up
if(singleUnit !== false){ // if they just want one unit back
return units[singleUnit];
}
return units; // returns the object with converted unit values...
}
// expose
window.getUnits = this.getUnits = getUnits;
})();
tyia

Check out Units, a JavaScript library that does these conversions.
Here's a blog post by the author describing the code.

Late to the party and I don't think this necessarily answers the question fully because I haven't included conversion of percentages. However, I do think it's a good start that can be easily modified for your specific usage.
Javascript function
/**
* Convert absolute CSS numerical values to pixels.
*
* #link https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Values_and_units#numbers_lengths_and_percentages
*
* #param {string} cssValue
* #param {null|HTMLElement} target Used for relative units.
* #return {*}
*/
window.convertCssUnit = function( cssValue, target ) {
target = target || document.body;
const supportedUnits = {
// Absolute sizes
'px': value => value,
'cm': value => value * 38,
'mm': value => value * 3.8,
'q': value => value * 0.95,
'in': value => value * 96,
'pc': value => value * 16,
'pt': value => value * 1.333333,
// Relative sizes
'rem': value => value * parseFloat( getComputedStyle( document.documentElement ).fontSize ),
'em': value => value * parseFloat( getComputedStyle( target ).fontSize ),
'vw': value => value / 100 * window.innerWidth,
'vh': value => value / 100 * window.innerHeight,
// Times
'ms': value => value,
's': value => value * 1000,
// Angles
'deg': value => value,
'rad': value => value * ( 180 / Math.PI ),
'grad': value => value * ( 180 / 200 ),
'turn': value => value * 360
};
// Match positive and negative numbers including decimals with following unit
const pattern = new RegExp( `^([\-\+]?(?:\\d+(?:\\.\\d+)?))(${ Object.keys( supportedUnits ).join( '|' ) })$`, 'i' );
// If is a match, return example: [ "-2.75rem", "-2.75", "rem" ]
const matches = String.prototype.toString.apply( cssValue ).trim().match( pattern );
if ( matches ) {
const value = Number( matches[ 1 ] );
const unit = matches[ 2 ].toLocaleLowerCase();
// Sanity check, make sure unit conversion function exists
if ( unit in supportedUnits ) {
return supportedUnits[ unit ]( value );
}
}
return cssValue;
};
Example usage
// Convert rem value to pixels
const remExample = convertCssUnit( '2.5rem' );
// Convert time unit (seconds) to milliseconds
const speedExample = convertCssUnit( '2s' );
// Convert angle unit (grad) to degrees
const emExample = convertCssUnit( '200grad' );
// Convert vw value to pixels
const vwExample = convertCssUnit( '80vw' );
// Convert the css variable to pixels
const varExample = convertCssUnit( getComputedStyle( document.body ).getPropertyValue( '--container-width' ) );
// Convert `em` value relative to page element
const emExample = convertCssUnit( '2em', document.getElementById( '#my-element' ) );
Current supported formats
Any format with preceding plus (+) or minus (-) symbol is valid, along with any of the following units: px, cm, mm, q, in, pc, pt, rem, em, vw, vh, s, ms, deg, rad, grad, turn
For example:
10rem
10.2em
-0.34cm
+10.567s
You can see a full combination of formats here: https://jsfiddle.net/thelevicole/k7yt4naw/1/

Émile kind of does this, specifically in its parse function:
function parse(prop){
var p = parseFloat(prop), q = prop.replace(/^[\-\d\.]+/,'');
return isNaN(p) ? { v: q, f: color, u: ''} : { v: p, f: interpolate, u: q };
}
The prop argument is the computedStyle for some element. The object that's returned has a v property (the value), an f method that is only used later on for animation, and a u property (the unit of the value, if necessary).
This doesn't entirely answer the question, but it could be a start.

While digging through the SVG spec, I found that SVGLength provides an interesting DOM API for builtin unit conversion. Here's a function making use of it:
/** Convert a value to a different unit
* #param {number} val - value to convert
* #param {string} from - unit `val`; can be one of: %, em, ex, px, cm, mm, in, pt, pc
* #param {string} to - unit to convert to, same as `from`
* #returns {object} - {number, string} with the number/string forms for the converted value
*/
const convert_units = (() => {
const rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
const len = rect.width.baseVal;
const modes = {
"%": len.SVG_LENGTHTYPE_PERCENTAGE,
"em": len.SVG_LENGTHTYPE_EMS,
"ex": len.SVG_LENGTHTYPE_EXS,
"px": len.SVG_LENGTHTYPE_PX,
"cm": len.SVG_LENGTHTYPE_CM,
"mm": len.SVG_LENGTHTYPE_MM,
"in": len.SVG_LENGTHTYPE_IN,
"pt": len.SVG_LENGTHTYPE_PT,
"pc": len.SVG_LENGTHTYPE_PC,
};
return (val, from, to, context) => {
if (context)
context.appendChild(rect);
len.newValueSpecifiedUnits(modes[from], val);
len.convertToSpecifiedUnits(modes[to]);
const out = {
number: len.valueInSpecifiedUnits,
string: len.valueAsString
};
if (context)
context.removeChild(rect);
return out;
};
})();
Usage example:
convert_units(1, "in", "mm");
// output: {"number": 25.399999618530273, "string": "25.4mm"}
Some units are relative, so need to be placed in a parent DOM element temporarily to be able to resolve the unit's absolute value. In those cases provide a fourth argument with the parent element:
convert_units(1, "em", "px", document.body);
// output: {"number": 16, "string": "16px"}

Related

Crossfilter - Cannot get filtered records from other groups (NOT from associate groups)

I'm working with "airplane" data set from this reference http://square.github.io/crossfilter/
date,delay,distance,origin,destination
01010001,14,405,MCI,MDW
01010530,-11,370,LAX,PHX
...
// Create the crossfilter for the relevant dimensions and groups.
var flight = crossfilter(flights),
all = flight.groupAll(),
date = flight.dimension(function(d) { return d.date; }),
dates = date.group(d3.time.day),
hour = flight.dimension(function(d) { return d.date.getHours() + d.date.getMinutes() / 60; }),
hours = hour.group(Math.floor),
delay = flight.dimension(function(d) { return Math.max(-60, Math.min(149, d.delay)); }),
delays = delay.group(function(d) { return Math.floor(d / 10) * 10; }),
distance = flight.dimension(function(d) { return Math.min(1999, d.distance); }),
distances = distance.group(function(d) { return Math.floor(d / 50) * 50; });
Following document of Crossfilter, "groups don't observe the filters on their own dimension" => we can get filtered records from groups that theirs dimension are not filtered at this moment, can't we?
I have performed some test but this is not correct:
console.dir(date.group().all()); // 50895 records
console.dir(distance.group().all()); // 297 records
date.filter([new Date(2001, 1, 1), new Date(2001, 2, 1)]);
console.dir(date.group().all()); // 50895 records => this number still the same because we are filtering on its dimension
console.dir(distance.group().all()); // 297 records => but this number still the same too. I don't know why
Could you please explain for me why number of "distance.group().all()" still the same as before we perform the filter? Am I missing something here?
If we really cannot get "filtered records" from "distance dimension" by this way, how can I achive this?
Thanks.
So, yes, this is the expected behavior.
Crossfilter will create a "bin" in the group for every value it finds by applying the dimension key and group key functions. Then when a filter is applied, it will apply the reduce-remove function, which by default subtracts the count of rows removed.
The result is that empty bins still exist, but they have a value of 0.
EDIT: here is the Crossfilter Gotchas entry with further explanation.
If you want to remove the zeros, you can use a "fake group" to do that.
function remove_empty_bins(source_group) {
return {
all:function () {
return source_group.all().filter(function(d) {
//return Math.abs(d.value) > 0.00001; // if using floating-point numbers
return d.value !== 0; // if integers only
});
}
};
}
https://github.com/dc-js/dc.js/wiki/FAQ#remove-empty-bins
This function wraps the group in an object which implements .all() by calling source_group.all() and then filters the result. So if you're using dc.js you could supply this fake group to your chart like so:
chart.group(remove_empty_bins(yourGroup));

How can d3.transform be used in d3 v4?

In d3.js v4 the d3.transform method has been removed, without any hint about how to replace it.
Does anyone know how to replace the following d3.js v3 code?
d3.transform(String).translate;
Edit 2016-10-07: For a more general approach see addendum below.
According to the changelog it is gone. There is a function in transform/decompose.js, though, which does the calculations for internal use. Sadly, it is not exposed for external use.
That said, this is easily done even without putting any D3 to use:
function getTranslation(transform) {
// Create a dummy g for calculation purposes only. This will never
// be appended to the DOM and will be discarded once this function
// returns.
var g = document.createElementNS("http://www.w3.org/2000/svg", "g");
// Set the transform attribute to the provided string value.
g.setAttributeNS(null, "transform", transform);
// consolidate the SVGTransformList containing all transformations
// to a single SVGTransform of type SVG_TRANSFORM_MATRIX and get
// its SVGMatrix.
var matrix = g.transform.baseVal.consolidate().matrix;
// As per definition values e and f are the ones for the translation.
return [matrix.e, matrix.f];
}
console.log(getTranslation("translate(20,30)")) // simple case: should return [20,30]
console.log(getTranslation("rotate(45) skewX(20) translate(20,30) translate(-5,40)"))
This creates a dummy g element for calculation purposes using standard DOM methods and sets its transform attribute to the string containing your transformations. It then calls .consolidate() of the SVGTransformList interface to consolidate the possibly long list of transformation to a single SVGTransform of type SVG_TRANSFORM_MATRIX which contains the boiled down version of all transformations in its matrix property. This SVGMatrix per definition holds the values for the translation in its properties e and f.
Using this function getTranslation() you could rewrite your D3 v3 statement
d3.transform(transformString).translate;
as
getTranslation(transformString);
Addendum
Because this answer has gained some interest over time, I decided to put together a more general method capable of returning not only the translation but the values of all transformation definitions of a transform string. The basic approach is the same as laid out in my original post above plus the calculations taken from transform/decompose.js. This function will return an object having properties for all transformation definitions much like the former d3.transform() did.
function getTransformation(transform) {
// Create a dummy g for calculation purposes only. This will never
// be appended to the DOM and will be discarded once this function
// returns.
var g = document.createElementNS("http://www.w3.org/2000/svg", "g");
// Set the transform attribute to the provided string value.
g.setAttributeNS(null, "transform", transform);
// consolidate the SVGTransformList containing all transformations
// to a single SVGTransform of type SVG_TRANSFORM_MATRIX and get
// its SVGMatrix.
var matrix = g.transform.baseVal.consolidate().matrix;
// Below calculations are taken and adapted from the private function
// transform/decompose.js of D3's module d3-interpolate.
var {a, b, c, d, e, f} = matrix; // ES6, if this doesn't work, use below assignment
// var a=matrix.a, b=matrix.b, c=matrix.c, d=matrix.d, e=matrix.e, f=matrix.f; // ES5
var scaleX, scaleY, skewX;
if (scaleX = Math.sqrt(a * a + b * b)) a /= scaleX, b /= scaleX;
if (skewX = a * c + b * d) c -= a * skewX, d -= b * skewX;
if (scaleY = Math.sqrt(c * c + d * d)) c /= scaleY, d /= scaleY, skewX /= scaleY;
if (a * d < b * c) a = -a, b = -b, skewX = -skewX, scaleX = -scaleX;
return {
translateX: e,
translateY: f,
rotate: Math.atan2(b, a) * 180 / Math.PI,
skewX: Math.atan(skewX) * 180 / Math.PI,
scaleX: scaleX,
scaleY: scaleY
};
}
console.log(getTransformation("translate(20,30)"));
console.log(getTransformation("rotate(45) skewX(20) translate(20,30) translate(-5,40)"));
If you pull in d3 v4 through npm, you can import the src/transform/parse file directly and call parseSvg:
// using es2015 modules syntax
import { parseSvg } from "d3-interpolate/src/transform/parse";
parseSvg("translate(20, 20)");
On elements which have the d3.js zoom listener on them -- usually the <g> element appended to the svg element -- you can use this call to get the transformation attributes outside of the zoom function:
var self = this;
var t = d3.zoomTransform(self.svg.node());
// t = {k: 1, x: 0, y: 0} or your current transformation values
This returns the same values as when calling d3.event.transform within the zoom event function itself.
Calling d3.event.transform outside the zoom event function will error:
Uncaught TypeError: Cannot read property 'transform' of null
I have to use d3.zoomTransform to allow panning and zooming from buttons outside the graph.
I found a little bit simpler solution than that.
selection.node().transform.baseVal[0].matrix
In this matrix you have cordinates e and f witch are equivalent to x, y. (e === x, f === y). No need to implement your very own funtion for that.
baseVal is a list of transformations of the element. You can't use that for the object without previus transformation! (the list will be empty) Or if you done many tranformation to the object the last position will be under the last element of baseVal list.
I am a little late to the party, but I had some code that was beneficial to me, I hope it helps you out too.
The code above by #altocumulus is quite thorough and works like a charm. However it didn't quite meet my needs since I was doing the calculations by hand and needed to alter some transform properties as painlessly as possible.
This might not be the solution for everyone, but it was perfect for me.
function _getTokenizedTransformAttributeValue(transformStr) {
var cleanedUpTransformAttrArr = transformStr.split(')', ).slice(0,-1);
return cleanedUpTransformAttrArr.reduce(function(retObj, item) {
var transformPair = item.split('(');
retObj[transformPair[0]] = transformPair[1].split(',');
return retObj;
}, {});
}
function _getStringFromTokenizedTransformAttributeObj(transformAttributeObj) {
return Object.keys(transformAttributeObj).reduce(function(finalStr, key) {
// wrap the transformAttributeObj[key] in array brackets to ensure we have an array
// join will flatten the array first and then do the join so [[x,y]].join(',') -> "x,y"
return finalStr += key + "(" + [transformAttributeObj[key]].join(',') + ")";
}, '');
}
The really great thing with the first function is that I can manually alter a specific property (e.g. rotation), and not have to worry about how it affects translate or anything else (when rotating around a point), whereas when I rely on the built-in or even d3.transform methods they consolidate all the properties into one value.
Why is this cool?
Imagine a some HTML
<g class="tick-label tick-label--is-rotated" transform="translate(542.8228777985075,0) rotate(60, 50.324859619140625, 011.402383210764288)" style="visibility: inherit;"></g>
Using d3.transfrom I get:
In object form
jr {rotate: 59.99999999999999, translate: [577.8600589984691, -37.88141544673796], scale: [1, 1], skew: 0, toString: function}
In string form
"translate(577.8600589984691,-37.88141544673796)rotate(59.99999999999999)skewX(0)scale(1,1)"
Which is correct mathematically, but makes it hard for me to simply remove the angle of rotation and the translation that had to be introduced to rotate this element around a given point.
Using my _getTokenizedTransformAttributeValue function
In object form
{translate: ["542.8228777985075", "0"], rotate: ["60", " 50.324859619140625", " 011.402383210764288"]}
In string form using the function _getStringFromTokenizedTransformAttributeObj
"translate(542.8228777985075,0)rotate(60, 50.324859619140625, 011.402383210764288)"
Which is perfect because now when you remove the rotation, your element can go back to where it was
Granted, the code could be cleaner and the function names more concise, but I really wanted to get this out there so others could benefit from it.
I found a way do achieve something similar by using this:
d3.select(this).node().getBBox();
this will give you access to the x/y position and width/height
You can see an example here: https://bl.ocks.org/mbostock/1160929

pdf.js: Get the text colour

I have a simple pdf file, containing the words "Hello world", each in a different colour.
I'm loading the PDF, like this:
PDFJS.getDocument('test.pdf').then( onPDF );
function onPDF( pdf )
{
pdf.getPage( 1 ).then( onPage );
}
function onPage( page )
{
page.getTextContent().then( onText );
}
function onText( text )
{
console.log( JSON.stringify( text ) );
}
And I get a JSON output like this:
{
"items" : [{
"str" : "Hello ",
"dir" : "ltr",
"width" : 29.592,
"height" : 12,
"transform" : [12, 0, 0, 12, 56.8, 774.1],
"fontName" : "g_font_1"
}, {
"str" : "world",
"dir" : "ltr",
"width" : 27.983999999999998,
"height" : 12,
"transform" : [12, 0, 0, 12, 86.5, 774.1],
"fontName" : "g_font_1"
}
],
"styles" : {
"g_font_1" : {
"fontFamily" : "serif",
"ascent" : 0.891,
"descent" : 0.216
}
}
}
However, I've not been able to find a way to determine the colour of each word. When I render it, it renders properly, so I know the information is in there somewhere. Is there somewhere I can access this?
As Respawned alluded to, there is no easy answer that will work in all cases. That being said, here are two approaches which seem to work fairly well. Both having upsides and downsides.
Approach 1
Internally, the getTextContent method uses whats called an EvaluatorPreprocessor to parse the PDF operators, and maintain the graphic state. So what we can do is, implement a custom EvaluatorPreprocessor, overwrite the preprocessCommand method, and use it to add the current text color to the graphic state. Once this is in place, anytime a new text chunk is created, we can add a color attribute, and set it to the current color state.
The downsides to this approach are:
Requires modifying the PDFJS source code. It also depends heavily on
the current implementation of PDFJS, and could break if this is
changed.
It will fail in cases where the text is used as a path to be filled with an image. In some PDF creators (such as Photoshop), the way it creates colored text is, it first creates a clipping path from all the given text characters, and then paints a solid image over the path. So the only way to deduce the fill-color is by reading the pixel values from the image, which would require painting it to a canvas. Even hooking into paintChar wont be of much help here, since the fill color will only emerge at a later time.
The upside is, its fairly robust and works irrespective of the page background. It also does not require rendering anything to canvas, so it can be done entirely in the background thread.
Code
All the modifications are made in the core/evaluator.js file.
First you must define the custom evaluator, after the EvaluatorPreprocessor definition.
var CustomEvaluatorPreprocessor = (function() {
function CustomEvaluatorPreprocessor(stream, xref, stateManager, resources) {
EvaluatorPreprocessor.call(this, stream, xref, stateManager);
this.resources = resources;
this.xref = xref;
// set initial color state
var state = this.stateManager.state;
state.textRenderingMode = TextRenderingMode.FILL;
state.fillColorSpace = ColorSpace.singletons.gray;
state.fillColor = [0,0,0];
}
CustomEvaluatorPreprocessor.prototype = Object.create(EvaluatorPreprocessor.prototype);
CustomEvaluatorPreprocessor.prototype.preprocessCommand = function(fn, args) {
EvaluatorPreprocessor.prototype.preprocessCommand.call(this, fn, args);
var state = this.stateManager.state;
switch(fn) {
case OPS.setFillColorSpace:
state.fillColorSpace = ColorSpace.parse(args[0], this.xref, this.resources);
break;
case OPS.setFillColor:
var cs = state.fillColorSpace;
state.fillColor = cs.getRgb(args, 0);
break;
case OPS.setFillGray:
state.fillColorSpace = ColorSpace.singletons.gray;
state.fillColor = ColorSpace.singletons.gray.getRgb(args, 0);
break;
case OPS.setFillCMYKColor:
state.fillColorSpace = ColorSpace.singletons.cmyk;
state.fillColor = ColorSpace.singletons.cmyk.getRgb(args, 0);
break;
case OPS.setFillRGBColor:
state.fillColorSpace = ColorSpace.singletons.rgb;
state.fillColor = ColorSpace.singletons.rgb.getRgb(args, 0);
break;
}
};
return CustomEvaluatorPreprocessor;
})();
Next, you need to modify the getTextContent method to use the new evaluator:
var preprocessor = new CustomEvaluatorPreprocessor(stream, xref, stateManager, resources);
And lastly, in the newTextChunk method, add a color attribute:
color: stateManager.state.fillColor
Approach 2
Another approach would be to extract the text bounding boxes via getTextContent, render the page, and for each text, get the pixel values which reside within its bounds, and take that to be the fill color.
The downsides to this approach are:
The computed text bounding boxes are not always correct, and in some cases may even be off completely (eg: rotated text). If the bounding box does not cover at least partially the actual text on canvas, then this method will fail. We can recover from complete failures, by checking that the text pixels have a color variance greater than a threshold. The rationale being, if bounding box is completely background, it will have little variance, in which case we can fallback to a default text color (or maybe even the color of k nearest-neighbors).
The method assumes the text is darker than the background. Otherwise, the background could be mistaken as the fill color. This wont be a problem is most cases, as most docs have white backgrounds.
The upside is, its simple, and does not require messing with the PDFJS source-code. Also, it will work in cases where the text is used as a clipping path, and filled with an image. Though this can become hazy when you have complex image fills, in which case, the choice of text color becomes ambiguous.
Demo
http://jsfiddle.net/x2rajt5g/
Sample PDF's to test:
https://www.dropbox.com/s/0t5vtu6qqsdm1d4/color-test.pdf?dl=1
https://www.dropbox.com/s/cq0067u80o79o7x/testTextColour.pdf?dl=1
Code
function parseColors(canvasImgData, texts) {
var data = canvasImgData.data,
width = canvasImgData.width,
height = canvasImgData.height,
defaultColor = [0, 0, 0],
minVariance = 20;
texts.forEach(function (t) {
var left = Math.floor(t.transform[4]),
w = Math.round(t.width),
h = Math.round(t.height),
bottom = Math.round(height - t.transform[5]),
top = bottom - h,
start = (left + (top * width)) * 4,
color = [],
best = Infinity,
stat = new ImageStats();
for (var i, v, row = 0; row < h; row++) {
i = start + (row * width * 4);
for (var col = 0; col < w; col++) {
if ((v = data[i] + data[i + 1] + data[i + 2]) < best) { // the darker the "better"
best = v;
color[0] = data[i];
color[1] = data[i + 1];
color[2] = data[i + 2];
}
stat.addPixel(data[i], data[i+1], data[i+2]);
i += 4;
}
}
var stdDev = stat.getStdDev();
t.color = stdDev < minVariance ? defaultColor : color;
});
}
function ImageStats() {
this.pixelCount = 0;
this.pixels = [];
this.rgb = [];
this.mean = 0;
this.stdDev = 0;
}
ImageStats.prototype = {
addPixel: function (r, g, b) {
if (!this.rgb.length) {
this.rgb[0] = r;
this.rgb[1] = g;
this.rgb[2] = b;
} else {
this.rgb[0] += r;
this.rgb[1] += g;
this.rgb[2] += b;
}
this.pixelCount++;
this.pixels.push([r,g,b]);
},
getStdDev: function() {
var mean = [
this.rgb[0] / this.pixelCount,
this.rgb[1] / this.pixelCount,
this.rgb[2] / this.pixelCount
];
var diff = [0,0,0];
this.pixels.forEach(function(p) {
diff[0] += Math.pow(mean[0] - p[0], 2);
diff[1] += Math.pow(mean[1] - p[1], 2);
diff[2] += Math.pow(mean[2] - p[2], 2);
});
diff[0] = Math.sqrt(diff[0] / this.pixelCount);
diff[1] = Math.sqrt(diff[1] / this.pixelCount);
diff[2] = Math.sqrt(diff[2] / this.pixelCount);
return diff[0] + diff[1] + diff[2];
}
};
This question is actually extremely hard if you want to do it to perfection... or it can be relatively easy if you can live with solutions that work only some of the time.
First of all, realize that getTextContent is intended for searchable text extraction and that's all it's intended to do.
It's been suggested in the comments above that you use page.getOperatorList(), but that's basically re-implementing the whole PDF drawing model in your code... which is basically silly because the largest chunk of PDFJS does exactly that... except not for the purpose of text extraction but for the purpose of rendering to canvas. So what you want to do is to hack canvas.js so that instead of just setting its internal knobs it also does some callbacks to your code. Alas, if you go this way, you won't be able to use stock PDFJS, and I rather doubt that your goal of color extraction will be seen as very useful for PDFJS' main purpose, so your changes are likely not going to get accepted upstream, so you'll likely have to maintain your own fork of PDFJS.
After this dire warning, what you'd need to minimally change are the functions where PDFJS has parsed the PDF color operators and sets its own canvas painting color. That happens around line 1566 (of canvas.js) in function setFillColorN. You'll also need to hook the text render... which is rather a character renderer at canvas.js level, namely CanvasGraphics_paintChar around line 1270. With these two hooked, you'll get a stream of callbacks for color changes interspersed between character drawing sequences. So you can reconstruct the color of character sequences reasonably easy from this.. in the simple color cases.
And now I'm getting to the really ugly part: the fact that PDF has an extremely complex color model. First there are two colors for drawing anything, including text: a fill color and stroke (outline) color. So far not too scary, but the color is an index in a ColorSpace... of which there are several, RGB being only one possibility. Then there's also alpha and compositing modes, so the layers (of various alphas) can result in a different final color depending on the compositing mode. And the PDFJS has not a single place where it accumulates color from layers.. it simply [over]paints them as they come. So if you only extract the fill color changes and ignore alpha, compositing etc.. it will work but not for complex documents.
Hope this helps.
There's no need to patch pdfjs, the transform property gives the x and y, so you can go through the operator list and find the setFillColor op that precedes the text op at that point.

Canvas getImageData() For optimal performance. To pull out all data or one at a time?

I need to scan through every pixel in a canvas image and do some fiddling with the colors etc. For optimal performance, should I grab all the data in one go and work on it through the array? Or should I call each pixel as I work on it.
So basically...
data = context.getImageData(x, y, height, width);
VS
data = context.getImageData(x, y, 1, 1); //in a loop height*width times.
You'll get much higher performances by grabbing the image all at once since :
a) a (contiguous) acces to an array is way faster than a function call.
b) especially when this function isa method of a DOM object having some overhead.
c) and there might be buffer refresh issues that might delay response (if canvas is
on sight... or not depending on double buffering implementation).
So go for a one-time grab.
I'll suggest you look into Javascript Typed Arrays to get the most of the
imageData result.
If i may quote myself, look at how you can handle pixels fast in this old post of mine
(look after 2) ):
Nice ellipse on a canvas?
(i quoted the relevant part below : )
You can get a UInt32Array view on your ImageData with :
var myGetImageData = myTempCanvas.getImageData(0,0,sizeX, sizeY);
var sourceBuffer32 = new Uint32Array(myGetImageData.data.buffer);
then sourceBuffer32[i] contains Red, Green, Blue, and transparency packed into one unsigned 32 bit int. Compare it to 0 to know if pixel is non-black ( != (0,0,0,0) )
OR you can be more precise with a Uint8Array view :
var myGetImageData = myTempCanvas.getImageData(0,0,sizeX, sizeY);
var sourceBuffer8 = new Uint8Array(myGetImageData.data.buffer);
If you deal only with shades of grey, then R=G=B, so watch for
sourceBuffer8[4*i]>Threshold
and you can set the i-th pixel to black in one time using the UInt32Array view :
sourceBuffer32[i]=0xff000000;
set to any color/alpha with :
sourceBuffer32[i]= (A<<24) | (B<<16) | (G<<8) | R ;
or just to any color :
sourceBuffer32[i]= 0xff000000 | (B<<16) | (G<<8) | R ;
(be sure R is rounded).
Listening to #Ken's comment, yes endianness can be an issue when you start fighting with bits 32 at a time.
Most computer are using little-endian, so RGBA becomes ABGR when dealing with them 32bits a once.
Since it is the vast majority of systems, if dealing with 32bit integer assume this is the case,
and you can -for compatibility- reverse your computation before writing the 32 bits results on Big endian systems.
Let me share those two functions :
function isLittleEndian() {
// from TooTallNate / endianness.js. https://gist.github.com/TooTallNate/4750953
var b = new ArrayBuffer(4);
var a = new Uint32Array(b);
var c = new Uint8Array(b);
a[0] = 0xdeadbeef;
if (c[0] == 0xef) { isLittleEndian = function() {return true }; return true; }
if (c[0] == 0xde) { isLittleEndian = function() {return false }; return false; }
throw new Error('unknown endianness');
}
function reverseUint32 (uint32) {
var s32 = new Uint32Array(4);
var s8 = new Uint8Array(s32.buffer);
var t32 = new Uint32Array(4);
var t8 = new Uint8Array(t32.buffer);
reverseUint32 = function (x) {
s32[0] = x;
t8[0] = s8[3];
t8[1] = s8[2];
t8[2] = s8[1];
t8[3] = s8[0];
return t32[0];
}
return reverseUint32(uint32);
};
Additionally to what GameAlchemist said, if you want to get or set all the colors of a pixel simultaneously, but you don't want to check endianness, you can use a DataView:
var data = context.getImageData(0, 0, canvas.width, canvas.height);
var view = new DataView(data.data.buffer);
// Read or set pixel (x,y) as #RRGGBBAA (big endian)
view.getUint32(4 * (x + y*canvas.width));
view.setUint32(4 * (x + y*canvas.width), 0xRRGGBBAA);
// Read or set pixel (x,y) as #AABBGGRR (little endian)
view.getUint32(4 * (x + y*canvas.width), true);
view.setUint32(4 * (x + y*canvas.width), 0xAABBGGRR, true);
// Save changes
ctx.putImageData(data, 0, 0);
It depends on what exactly you're doing, but I'd suggest grabbing it all at once, and then looping through it.
Grabbing it all at once is faster than grabbing it pixel by pixel, since searching through an array is a lot faster than searching through a canvas, once for each pixel.
If you're really in need of speed, look into web workers. You can set each one to grab a specific section of the canvas, and since they can run simultaneously, they'll make much better use out of your CPU.
getImageData() isn't really slow enough for you to notice the difference if you were to grab it all at once or individually, in my experiences using the function.

YUI API Dual Slider control question

I'm using the YUI 2.7 library to handle a dual-slider (range slider) control in a webpage.
It works great-- however, I wanted to allow users to switch the range values by Ajax-- effectively changing the price range from "0-50,000" to a subset (eg. "50-250") without reloading the page.
The problem is that it appears the values from the existing slider do not get reset, even when I explicitly set them back to NULL inside the function to "rebuild" the slider.
The slider handles appear out of position after the ajax request, (way off the scale to the right) and the values of the slider apparently randomly fluctuate.
Is there a way to explicitly destroy the YUI slider object, beyond setting its reference to null? Or do I just need to redeclare the scale and min/max values somehow?
Thanks for any help (I'll try to post a link to an example asap)
here's the code:
function slider(bg,minthumb,maxthumb,minvalue,maxvalue,startmin,startmax,aSliderName,soptions) {
var scaleFactor = null;
var tickSize = null;
var range = null;
var dual_slider = null;
var initVals = null;
var Dom = null;
range = options.sliderLength;
if ((startmax - startmin) < soptions.sliderLength) {
tickSize = (soptions.sliderLength / (startmax - startmin));
}else{
tickSize = 1;
}
initVals = [ 0,soptions.sliderLength ], // Values assigned during instantiation
//Event = YAHOO.util.Event,
dual_slider,
scaleFactor = ((startmax - startmin) / soptions.sliderLength);
dual_slider = YAHOO.widget.Slider.getHorizDualSlider(
bg,minthumb,maxthumb,range, tickSize, initVals);
dual_slider.subscribe("change", function(instance) {
priceMin = (dual_slider.minVal * scaleFactor) + startmin;
priceMax = (dual_slider.maxVal * scaleFactor) + startmin;
});
dual_slider.subscribe("slideEnd", function(){ alert(priceMin + ' ' + priceMax); });
return dual_slider;
}
Store the startmin, startmax, and scaleFactor on the dual_slider object, then in your ajax callback, update those properties with new values. Change your change event subscriber to reference this.startmin, this.startmax, and this.scaleFactor.
Slider and DualSlider only really understand the pixel offsets of the thumbs, and report the values as such. As you've done (and per most Slider examples), you need to apply a conversion factor to translate a pixel offset to a "value". This common idiom has been rolled into the core logic of the YUI 3 Slider (though there isn't yet a DualSlider in the library).
Here's an example that illustrates dynamically updating value ranges:
http://yuiblog.com/sandbox/yui/v282/examples/slider/slider_factor_change.html

Categories

Resources