pdf.js: Get the text colour

pdf.js: Get the text colour - javascript

I have a simple pdf file, containing the words "Hello world", each in a different colour.
I'm loading the PDF, like this:
PDFJS.getDocument('test.pdf').then( onPDF );
function onPDF( pdf )
{
pdf.getPage( 1 ).then( onPage );
}
function onPage( page )
{
page.getTextContent().then( onText );
}
function onText( text )
{
console.log( JSON.stringify( text ) );
}
And I get a JSON output like this:
{
"items" : [{
"str" : "Hello ",
"dir" : "ltr",
"width" : 29.592,
"height" : 12,
"transform" : [12, 0, 0, 12, 56.8, 774.1],
"fontName" : "g_font_1"
}, {
"str" : "world",
"dir" : "ltr",
"width" : 27.983999999999998,
"height" : 12,
"transform" : [12, 0, 0, 12, 86.5, 774.1],
"fontName" : "g_font_1"
}
],
"styles" : {
"g_font_1" : {
"fontFamily" : "serif",
"ascent" : 0.891,
"descent" : 0.216
}
}
}
However, I've not been able to find a way to determine the colour of each word. When I render it, it renders properly, so I know the information is in there somewhere. Is there somewhere I can access this?

As Respawned alluded to, there is no easy answer that will work in all cases. That being said, here are two approaches which seem to work fairly well. Both having upsides and downsides.
Approach 1
Internally, the getTextContent method uses whats called an EvaluatorPreprocessor to parse the PDF operators, and maintain the graphic state. So what we can do is, implement a custom EvaluatorPreprocessor, overwrite the preprocessCommand method, and use it to add the current text color to the graphic state. Once this is in place, anytime a new text chunk is created, we can add a color attribute, and set it to the current color state.
The downsides to this approach are:
Requires modifying the PDFJS source code. It also depends heavily on
the current implementation of PDFJS, and could break if this is
changed.
It will fail in cases where the text is used as a path to be filled with an image. In some PDF creators (such as Photoshop), the way it creates colored text is, it first creates a clipping path from all the given text characters, and then paints a solid image over the path. So the only way to deduce the fill-color is by reading the pixel values from the image, which would require painting it to a canvas. Even hooking into paintChar wont be of much help here, since the fill color will only emerge at a later time.
The upside is, its fairly robust and works irrespective of the page background. It also does not require rendering anything to canvas, so it can be done entirely in the background thread.
Code
All the modifications are made in the core/evaluator.js file.
First you must define the custom evaluator, after the EvaluatorPreprocessor definition.
var CustomEvaluatorPreprocessor = (function() {
function CustomEvaluatorPreprocessor(stream, xref, stateManager, resources) {
EvaluatorPreprocessor.call(this, stream, xref, stateManager);
this.resources = resources;
this.xref = xref;
// set initial color state
var state = this.stateManager.state;
state.textRenderingMode = TextRenderingMode.FILL;
state.fillColorSpace = ColorSpace.singletons.gray;
state.fillColor = [0,0,0];
}
CustomEvaluatorPreprocessor.prototype = Object.create(EvaluatorPreprocessor.prototype);
CustomEvaluatorPreprocessor.prototype.preprocessCommand = function(fn, args) {
EvaluatorPreprocessor.prototype.preprocessCommand.call(this, fn, args);
var state = this.stateManager.state;
switch(fn) {
case OPS.setFillColorSpace:
state.fillColorSpace = ColorSpace.parse(args[0], this.xref, this.resources);
break;
case OPS.setFillColor:
var cs = state.fillColorSpace;
state.fillColor = cs.getRgb(args, 0);
break;
case OPS.setFillGray:
state.fillColorSpace = ColorSpace.singletons.gray;
state.fillColor = ColorSpace.singletons.gray.getRgb(args, 0);
break;
case OPS.setFillCMYKColor:
state.fillColorSpace = ColorSpace.singletons.cmyk;
state.fillColor = ColorSpace.singletons.cmyk.getRgb(args, 0);
break;
case OPS.setFillRGBColor:
state.fillColorSpace = ColorSpace.singletons.rgb;
state.fillColor = ColorSpace.singletons.rgb.getRgb(args, 0);
break;
}
};
return CustomEvaluatorPreprocessor;
})();
Next, you need to modify the getTextContent method to use the new evaluator:
var preprocessor = new CustomEvaluatorPreprocessor(stream, xref, stateManager, resources);
And lastly, in the newTextChunk method, add a color attribute:
color: stateManager.state.fillColor
Approach 2
Another approach would be to extract the text bounding boxes via getTextContent, render the page, and for each text, get the pixel values which reside within its bounds, and take that to be the fill color.
The downsides to this approach are:
The computed text bounding boxes are not always correct, and in some cases may even be off completely (eg: rotated text). If the bounding box does not cover at least partially the actual text on canvas, then this method will fail. We can recover from complete failures, by checking that the text pixels have a color variance greater than a threshold. The rationale being, if bounding box is completely background, it will have little variance, in which case we can fallback to a default text color (or maybe even the color of k nearest-neighbors).
The method assumes the text is darker than the background. Otherwise, the background could be mistaken as the fill color. This wont be a problem is most cases, as most docs have white backgrounds.
The upside is, its simple, and does not require messing with the PDFJS source-code. Also, it will work in cases where the text is used as a clipping path, and filled with an image. Though this can become hazy when you have complex image fills, in which case, the choice of text color becomes ambiguous.
Demo
http://jsfiddle.net/x2rajt5g/
Sample PDF's to test:
https://www.dropbox.com/s/0t5vtu6qqsdm1d4/color-test.pdf?dl=1
https://www.dropbox.com/s/cq0067u80o79o7x/testTextColour.pdf?dl=1
Code
function parseColors(canvasImgData, texts) {
var data = canvasImgData.data,
width = canvasImgData.width,
height = canvasImgData.height,
defaultColor = [0, 0, 0],
minVariance = 20;
texts.forEach(function (t) {
var left = Math.floor(t.transform[4]),
w = Math.round(t.width),
h = Math.round(t.height),
bottom = Math.round(height - t.transform[5]),
top = bottom - h,
start = (left + (top * width)) * 4,
color = [],
best = Infinity,
stat = new ImageStats();
for (var i, v, row = 0; row < h; row++) {
i = start + (row * width * 4);
for (var col = 0; col < w; col++) {
if ((v = data[i] + data[i + 1] + data[i + 2]) < best) { // the darker the "better"
best = v;
color[0] = data[i];
color[1] = data[i + 1];
color[2] = data[i + 2];
}
stat.addPixel(data[i], data[i+1], data[i+2]);
i += 4;
}
}
var stdDev = stat.getStdDev();
t.color = stdDev < minVariance ? defaultColor : color;
});
}
function ImageStats() {
this.pixelCount = 0;
this.pixels = [];
this.rgb = [];
this.mean = 0;
this.stdDev = 0;
}
ImageStats.prototype = {
addPixel: function (r, g, b) {
if (!this.rgb.length) {
this.rgb[0] = r;
this.rgb[1] = g;
this.rgb[2] = b;
} else {
this.rgb[0] += r;
this.rgb[1] += g;
this.rgb[2] += b;
}
this.pixelCount++;
this.pixels.push([r,g,b]);
},
getStdDev: function() {
var mean = [
this.rgb[0] / this.pixelCount,
this.rgb[1] / this.pixelCount,
this.rgb[2] / this.pixelCount
];
var diff = [0,0,0];
this.pixels.forEach(function(p) {
diff[0] += Math.pow(mean[0] - p[0], 2);
diff[1] += Math.pow(mean[1] - p[1], 2);
diff[2] += Math.pow(mean[2] - p[2], 2);
});
diff[0] = Math.sqrt(diff[0] / this.pixelCount);
diff[1] = Math.sqrt(diff[1] / this.pixelCount);
diff[2] = Math.sqrt(diff[2] / this.pixelCount);
return diff[0] + diff[1] + diff[2];
}
};

This question is actually extremely hard if you want to do it to perfection... or it can be relatively easy if you can live with solutions that work only some of the time.
First of all, realize that getTextContent is intended for searchable text extraction and that's all it's intended to do.
It's been suggested in the comments above that you use page.getOperatorList(), but that's basically re-implementing the whole PDF drawing model in your code... which is basically silly because the largest chunk of PDFJS does exactly that... except not for the purpose of text extraction but for the purpose of rendering to canvas. So what you want to do is to hack canvas.js so that instead of just setting its internal knobs it also does some callbacks to your code. Alas, if you go this way, you won't be able to use stock PDFJS, and I rather doubt that your goal of color extraction will be seen as very useful for PDFJS' main purpose, so your changes are likely not going to get accepted upstream, so you'll likely have to maintain your own fork of PDFJS.
After this dire warning, what you'd need to minimally change are the functions where PDFJS has parsed the PDF color operators and sets its own canvas painting color. That happens around line 1566 (of canvas.js) in function setFillColorN. You'll also need to hook the text render... which is rather a character renderer at canvas.js level, namely CanvasGraphics_paintChar around line 1270. With these two hooked, you'll get a stream of callbacks for color changes interspersed between character drawing sequences. So you can reconstruct the color of character sequences reasonably easy from this.. in the simple color cases.
And now I'm getting to the really ugly part: the fact that PDF has an extremely complex color model. First there are two colors for drawing anything, including text: a fill color and stroke (outline) color. So far not too scary, but the color is an index in a ColorSpace... of which there are several, RGB being only one possibility. Then there's also alpha and compositing modes, so the layers (of various alphas) can result in a different final color depending on the compositing mode. And the PDFJS has not a single place where it accumulates color from layers.. it simply [over]paints them as they come. So if you only extract the fill color changes and ignore alpha, compositing etc.. it will work but not for complex documents.
Hope this helps.

There's no need to patch pdfjs, the transform property gives the x and y, so you can go through the operator list and find the setFillColor op that precedes the text op at that point.

Related

How to find 5-10 colors which are shades away from a given color, which are all visually distinct (in JavaScript)?

I have a problem where I would like to generate a chart using 5 to 10 variations of 1 primary color, and everything else is a different shade of the main color. The main color is selected by a customer, so this color may be any possible color. It may, for example, be very light, in which case every one of the 5-10 variant colors will be darker than it, or it may be very dark in which case every other variant is lighter than it, or it might be in the middle, so some are darker and some are lighter. It shouldn't matter. Ideally I pass in a given selected color, and it outputs an array of colors which are visually distinct. Something like this:
function getColorPalette(selectedColor, numberOfColorsInOutput = 10) {
// ?
}
I'm just not sure the best approach of figuring out where in the spectrum the color is, and then how to generate the lighter/darker. colors relative to that. Some trickiness involved there.
const givenColor = '#e6ffe6'
getColorPalette(givenColor, 8).map(buildForDOM).forEach(child => {
document.querySelector('#generated').appendChild(child)
})
function getColorPalette(selectedColor, numberOfColorsInOutput = 10) {
const colorObj = tinycolor(selectedColor)
const n = numberOfColorsInOutput
const output = new Array(n)
const fraction = 100 / n
let i = 0
while (i < n) {
output[i] = colorObj.darken(fraction * i).toHex()
i++
}
return output
}
function buildForDOM(color) {
const div = document.createElement('div')
div.style.width = '100px'
div.style.height = '100px'
div.style.background = `#${color}`;
return div
}
<script src="https://unpkg.com/tinycolor2#1.4.1/tinycolor.js"></script>
<div>Given:</div>
<div style="background: #e6ffe6; width: 100px; height: 100px"></div>
<div id="generated">Generated:</div>
This example code generates a few darker ones, but is incorrect as most of the tail is black. It should fill it in so it fills the spectrum with items as far apart as possible (like CSS justify-content: space-between sort of thing, but for the color output). So it somehow needs to figure out what the hue is or something, and it then generates the shades from that.
It doesn't need to be perfect, anything which generates a spectrum of distinct variations on a given hue will do, that covers the full space of shades (from very light to almost black, but not white and black). It doesn't necessarily even need to contain the selected/given color, just use the hue information from it somehow.
For this implementation I used TinyColor (a JS lib), but you can use that or any other color functionality for darkening/lightening things, doesn't matter to me. Just looking for the hex array at the end.

There are many possible approaches, but here is a very basic solution that uses hsl color in calculations and create an array of different lightness in colors (the levels are based on numberOfColorsInOutput).
It uses the same library and only added some conversion to hsl format (the output is kept as hex, in case it is needed to display on page).
A min and max of lightness can be specified for generated colors, so some control can be set to limit the generation of many darker colors.
If needed, further variations in saturation can be added to the calculation.
Example:
const givenColor = "#FF7F50";
const minLight = 0.1;
const maxLight = 0.9;
getColorPalette(givenColor, 8)
.map(buildForDOM)
.forEach((child) => {
document.querySelector("#generated").appendChild(child);
});
function getColorPalette(selectedColor, numberOfColorsInOutput = 10) {
const colorObj = tinycolor(selectedColor);
const n = numberOfColorsInOutput;
const output = new Array(n);
const calcLight = (i) => (i * (maxLight - minLight)) / (n - 1) + minLight;
let i = 0;
while (i < n) {
const baseColor = colorObj.clone().toHsl();
const outputColor = { ...baseColor, l: calcLight(i) };
output[i] = tinycolor(outputColor).toHexString();
i++;
}
return output;
}
function buildForDOM(color) {
const div = document.createElement("div");
div.style.width = "100px";
div.style.height = "100px";
div.style.background = `${color}`;
return div;
}
<script src="https://unpkg.com/tinycolor2#1.4.1/tinycolor.js"></script>
<div>Given:</div>
<div style="background: #FF7F50; width: 100px; height: 100px"></div>
<div id="generated">Generated:</div>
Hope this will help.

Google Sheets. How to get the real range size in pixels

My script converts the selected range into an image, please see. It first creates a public PDF URL and then converts it to PNG.
It works well for small ranges (10-20 rows) and creates a shot including images, charts, sparklines, and formatting.
The problem is with big ranges (100-1000 rows). They contain a border of unknown size and I cannot calculate it.
Heavy borders make rows higher so the image does not fit.
If we have no borders or thin borders, the real image size appears a bit smaller than calculated. This creates an empty space below the image.
My code sample for getting the range size in pixels:
// get row height in pixels
var h = 0;
for (var i = rownum; i <= rownum2; i++) {
if (i <= options.measure_limit) {
size = sheet.getRowHeight(i);
}
h += size
/** manual correction */
if (size === 2) {
h-=1;
} else {
// h -= 0.42; /** TODO → test the range to make it fit any range */
}
if ((i % 50) === 0 && i <= options.measure_limit) {
file.toast(
'Done ' + i + ' rows of ' + rownum2,
'↕📐Measuring height...');
}
}
if (i > options.measure_limit) {
file.toast(
'Estimation: all other rows are the same size',
'↕📐Measuring height...');
}
As you see, I have to loop over all rows which is extremely inefficient. I'd be glad to hear your ideas for code optimization. Now it loops the first 150 rows and next it assumes all other rows have the same height.
Sample Situations
"Small" ranges are that you can see on screen. "Big" ranges have 100+ rows so they do not fit normal screen. As I create screenshots, I tested all possible range sizes.
Case1 - no borders or thin borders
If I select a big range I get the image, and see it has a white space at the bottom. This means the real size of image was slightly smaller than one I get from the Script by calling sheet.getRowHeight(i).
Case1 - heavy borders
If I select a big range I get the image, and see not all rows I've selected are on that image. Some rows at the bottom of the range are missing. This means when I add heavy borders, the real size of rows is bigger than one I get from the Script by calling sheet.getRowHeight(i).
Conclusion
I'd be glad to hear any ideas including JavaScript hacks to remove empty space below the image. If it is currently not possible, please also answer with links to docs.

I believe your goal is as follows.
You want to export the range as an image using Google Apps Script and Javascript.
In order to achieve this, in this question, you want to calculate the row height of the selected cell range.
Issue and workaround:
As our discussions in the comment, in the current stage, when the correct row height of the cell range is trying to be obtained, there are several problems as follows.
When the border is used for the cells, it seems that the row height + the border size is different from the exported result. Ref
Pixel size might not be changed linearly with the value of row height and border size. Ref
When I tested the cell size including the borders, I thought that the tendency of change of size might be different between height and width. Ref
When the row height is the default (21 from getRowHeight) and the text font size in the cell is increased, the value retrieved by getRowHeight is not changed from 21. Ref
There is also issue with wrapping text inside a cell which on my experience also causes errors in a pixel size of cell. Ref
From your question, when the selected cell range is large, the number of pages is more than 2. In this case, all pages cannot be correctly merged as an image.
From the above situation, I'm worried that obtaining the correct size of the selected cells might be difficult. So, I proposed to process this as image processing. Ref I thought that when this process is run with the image processing, the above issues might be able to be avoided.
But, unfortunately, in order to process this as image processing, there is no built-in method in Google Apps Script. But, fortunately, in your situation, it seems that Javascript can be used in a dialog. So, I created a Javascript library for achieving this process as the image processing. Ref
When this Javascript library is used, the sample demonstration is as follows.
Usage:
1. Prepare a Spreadsheet.
Please create a new Spreadsheet and put several values to the cells.
2. Sample script.
Please copy and paste the following script to the script editor of Spreadsheet.
Google Apps Script side: Code.gs
function getActiveRange_(ss, borderColor) {
const space = 5;
const sheet = ss.getActiveSheet();
const range = sheet.getActiveRange();
const obj = { startRow: range.getRow(), startCol: range.getColumn(), endRow: range.getLastRow(), endCol: range.getLastColumn() };
const temp = sheet.copyTo(ss);
const r = temp.getDataRange();
r.copyTo(r, { contentsOnly: true });
temp.insertRowAfter(obj.endRow).insertRowBefore(obj.startRow).insertColumnAfter(obj.endCol).insertColumnBefore(obj.startCol);
obj.startRow += 1;
obj.endRow += 1;
obj.startCol += 1;
obj.endCol += 1;
temp.setRowHeight(obj.startRow - 1, space).setColumnWidth(obj.startCol - 1, space).setRowHeight(obj.endRow + 1, space).setColumnWidth(obj.endCol + 1, space);
const maxRow = temp.getMaxRows();
const maxCol = temp.getMaxColumns();
if (obj.startRow + 1 < maxRow) {
temp.deleteRows(obj.endRow + 2, maxRow - (obj.endRow + 1));
}
if (obj.startCol + 1 < maxCol) {
temp.deleteColumns(obj.endCol + 2, maxCol - (obj.endCol + 1));
}
if (obj.startRow - 1 > 1) {
temp.deleteRows(1, obj.startRow - 2);
}
if (obj.startCol - 1 > 1) {
temp.deleteColumns(1, obj.startCol - 2);
}
const mRow = temp.getMaxRows();
const mCol = temp.getMaxColumns();
const clearRanges = [[1, 1, mRow], [1, obj.endCol, mRow], [1, 1, 1, mCol], [obj.endRow, 1, 1, mCol]];
temp.getRangeList(clearRanges.map(r => temp.getRange(...r).getA1Notation())).clear();
temp.getRange(1, 1, 1, mCol).setBorder(true, null, null, null, null, null, borderColor, SpreadsheetApp.BorderStyle.SOLID);
temp.getRange(mRow, 1, 1, mCol).setBorder(null, null, true, null, null, null, borderColor, SpreadsheetApp.BorderStyle.SOLID);
SpreadsheetApp.flush();
return temp;
}
function getPDF_(ss, temp) {
const url = ss.getUrl().replace(/\/edit.*$/, '')
+ '/export?exportFormat=pdf&format=pdf'
// + '&size=20x20' // If you want to increase the size of one page, please use this. But, when the page size is increased, the process time becomes long. Please be careful about this.
+ '&scale=2'
+ '&top_margin=0.05'
+ '&bottom_margin=0'
+ '&left_margin=0.05'
+ '&right_margin=0'
+ '&sheetnames=false'
+ '&printtitle=false'
+ '&pagenum=UNDEFINED'
+ 'horizontal_alignment=LEFT'
+ '&gridlines=false'
+ "&fmcmd=12"
+ '&fzr=FALSE'
+ '&gid=' + temp.getSheetId();
const res = UrlFetchApp.fetch(url, { headers: { authorization: "Bearer " + ScriptApp.getOAuthToken() } });
return "data:application/pdf;base64," + Utilities.base64Encode(res.getContent());
}
// Please run this function.
function main() {
const ss = SpreadsheetApp.getActiveSpreadsheet();
const temp = getActiveRange_(ss, "#000000");
const base64 = getPDF_(ss, temp);
const htmltext = HtmlService.createTemplateFromFile('index').evaluate().getContent();
htmltext = htmltext.replace(/IMPORT_PDF_URL/m, base64);
const html = HtmlService.createTemplate(htmltext).evaluate().setSandboxMode(HtmlService.SandboxMode.NATIVE);
SpreadsheetApp.getUi().showModalDialog(html, 'sample');
ss.deleteSheet(temp);
}
function saveFile(data) {
const blob = Utilities.newBlob(Utilities.base64Decode(data), MimeType.PNG, "sample.png");
return DriveApp.createFile(blob).getId();
}
HTML & Javascript side: index.gs
Here, I used a Javascript library of CropImageByBorder_js for processing this as the image processing.
<script src="//mozilla.github.io/pdf.js/build/pdf.js"></script>
<script src="https://cdn.jsdelivr.net/gh/tanaikech/CropImageByBorder_js#latest/cropImageByBorder_js.min.js"></script>
<canvas id="canvas"></canvas>
<script>
var pdfjsLib = window['pdfjs-dist/build/pdf'];
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
const base64 = 'IMPORT_PDF_URL'; //Loaading the PDF from URL
const cvs = document.getElementById("canvas");
pdfjsLib.getDocument(base64).promise.then(pdf => {
const {numPages} = pdf;
if (numPages > 1) {
throw new Error("Sorry. In the current stage, this sample script can be used for one page of PDF data. So, please change the selected range to smaller.")
}
pdf.getPage(1).then(page => {
const viewport = page.getViewport({scale: 2});
cvs.height = viewport.height;
cvs.width = viewport.width;
const ctx = cvs.getContext('2d');
const renderContext = { canvasContext: ctx, viewport: viewport };
page.render(renderContext).promise.then(async function() {
const obj = { borderColor: "#000000", base64Data: cvs.toDataURL() };
const base64 = await CropImageByBorder.getInnerImage(obj).catch(err => console.log(err));
const img = new Image();
img.src = base64;
img.onload = function () {
cvs.width = img.naturalWidth;
cvs.height = img.naturalHeight;
ctx.drawImage(img, 0, 0);
}
google.script.run.withSuccessHandler(id => console.log(id)).saveFile(base64.split(",").pop());
});
});
});
</script>
3. Testing
When you test this script, please select the cells and run main(). By this, the selected cells are exported as an image (PNG) to the root folder as follows. In this case, you can see the above demonstration.
4. Flow.
In this sample script, the following flow is used.
Manually select the cells, and run the script of main().
At the script, the selected cells enclosed by the single row and column are created as a temporal sheet.
Export the temporal sheet as a PDF data as base64. Here, the PDF data is sent to Javascript side.
Convert 1st page of PDF data to an image using PDF.js.
Cropping the selected cells using CropImageByBorder_js, and return the result image to Google Apps Script side.
Save the image as a file to Google Drive.
LIMITATION:
In this sample script, it supposes that the selected range is put on one PDF page. So, when you select a large range, when the number of PDF pages is more than 2, unfortunately, this script cannot be used. So, please be careful about this.
And also, in this case, Javascript is used on a dialog. So, when you use this sample script, it is required to open the Spreadsheet and select the cells and run the script.
Note:
In your showing script, in order to use a created PDF data with PDF.js, the Spreadsheet is required to be publicly shared. But, in the case of PDF.js, it seems that the data URL can be directly used. So in this sample script, the created PDF is used as the data URL (base64). By this, it is not required to publicly share the Spreadsheet.
References:
PDF.js
CropImageByBorder_js

Rotating SVG path points for better morphing

I am using a couple of functions from Snap.SVG, mainly path2curve and the functions around it to build a SVG morph plugin.
I've setup a demo here on Codepen to better illustrate the issue. Basically morphing shapes simple to complex and the other way around is working properly as of Javascript functionality, however, the visual isn't very pleasing.
The first shape morph looks awful, the second looks a little better because I changed/rotated it's points a bit, but the last example is perfect.
So I need either a better path2curve or a function to prepare the path string before the other function builds the curves array. Snap.SVG has a function called getClosest that I think may be useful but it's not documented.
There isn't any documentation available on this topic so I would appreciate any suggestion/input from RaphaelJS / SnapSVG / d3.js / three/js developers.

I've provided a runnable code snippet below that uses Snap.svg and that I believe demonstrates one solution to your problem. With respect to trying to find the best way to morph a starting shape into an ending shape, this algorithm essentially rotates the points of the starting shape one position at a time, sums the squares of the distances between corresponding points on the (rotated) starting shape and the (unchanged) ending shape, and finds the minimum of all those sums. i.e. It's basically a least squares approach. The minimum value identifies the rotation that, as a first guess, will provide the "shortest" morph trajectories. In spite of these coordinate reassignments, however, all 'rotations' should result in visually identical starting shapes, as required.
This is, of course, a "blind" mathematical approach, but it might help provide you with a starting point before doing manual visual analysis. As a bonus, even if you don't like the rotation that the algorithm chose, it also provides the path 'd' attribute strings for all the other rotations, so some of that work has already been done for you.
You can modify the snippet to provide any starting and ending shapes you want. The limitations are as follows:
Each shape should have the same number of points (although the point types, e.g. 'lineto', 'cubic bezier curve', 'horizontal lineto', etc., can completely vary)
Each shape should be closed, i.e. end with "Z"
The morph desired should involve only translation. If scaling or rotation is desired, those should be applied after calculating the morph based only on translation.
By the way, in response to some of your comments, while I find Snap.svg intriguing, I also find its documentation to be somewhat lacking.
Update: The code snippet below works in Firefox (Mac or Windows) and Safari. However, Chrome seems to have trouble accessing the Snap.svg library from its external web site as written (<script...github...>). Opera and Internet Explorer also have problems. So, try the snippet in the working browsers, or try copying the snippet code as well as the Snap library code to your own computer. (Is this an issue of accessing third party libraries from within the code snippet? And why browser differences? Insightful comments would be appreciated.)
var
s = Snap(),
colors = ["red", "blue", "green", "orange"], // colour list can be any length
staPath = s.path("M25,35 l-15,-25 C35,20 25,0 40,0 L80,40Z"), // create the "start" shape
endPath = s.path("M10,110 h30 l30,20 C30,120 35,135 25,135Z"), // create the "end" shape
staSegs = getSegs(staPath), // convert the paths to absolute values, using only cubic bezier
endSegs = getSegs(endPath), // segments, & extract the pt coordinates & segment strings
numSegs = staSegs.length, // note: the # of pts is one less than the # of path segments
numPts = numSegs - 1, // b/c the path's initial 'moveto' pt is also the 'close' pt
linePaths = [],
minSumLensSqrd = Infinity,
rotNumOfMin,
rotNum = 0;
document.querySelector('button').addEventListener('click', function() {
if (rotNum < numPts) {
linePaths.forEach(function(linePath) {linePath.remove();}); // erase any previous coloured lines
var sumLensSqrd = 0;
for (var ptNum = 0; ptNum < numPts; ptNum += 1) { // draw new lines, point-to-point
var linePt1 = staSegs[(rotNum + ptNum) % numPts]; // the new line begins on the 'start' shape
var linePt2 = endSegs[ ptNum % numPts]; // and finished on the 'end' shape
var linePathStr = "M" + linePt1.x + "," + linePt1.y + "L" + linePt2.x + "," + linePt2.y;
var linePath = s.path(linePathStr).attr({stroke: colors[ptNum % colors.length]}); // draw it
var lineLen = Snap.path.getTotalLength(linePath); // calculate its length
sumLensSqrd += lineLen * lineLen; // square the length, and add it to the accumulating total
linePaths[ptNum] = linePath; // remember the path to facilitate erasing it later
}
if (sumLensSqrd < minSumLensSqrd) { // keep track of which rotation has the lowest value
minSumLensSqrd = sumLensSqrd; // of the sum of lengths squared (the 'lsq sum')
rotNumOfMin = rotNum; // as well as the corresponding rotation number
}
show("ROTATION OF POINTS #" + rotNum + ":"); // display info about this rotation
var rotInfo = getRotInfo(rotNum);
show(" point coordinates: " + rotInfo.ptsStr); // show point coordinates
show(" path 'd' string: " + rotInfo.dStr); // show 'd' string needed to draw it
show(" sum of (coloured line lengths squared) = " + sumLensSqrd); // the 'lsq sum'
rotNum += 1; // analyze the next rotation of points
} else { // once all the rotations have been analyzed individually...
linePaths.forEach(function(linePath) {linePath.remove();}); // erase any coloured lines
show(" ");
show("BEST ROTATION, i.e. rotation with lowest sum of (lengths squared): #" + rotNumOfMin);
// show which rotation to use
show("Use the shape based on this rotation of points for morphing");
$("button").off("click");
}
});
function getSegs(path) {
var absCubDStr = Snap.path.toCubic(Snap.path.toAbsolute(path.attr("d")));
return Snap.parsePathString(absCubDStr).map(function(seg, segNum) {
return {x: seg[segNum ? 5 : 1], y: seg[segNum ? 6 : 2], seg: seg.toString()};
});
}
function getRotInfo(rotNum) {
var ptsStr = "";
for (var segNum = 0; segNum < numSegs; segNum += 1) {
var oldSegNum = rotNum + segNum;
if (segNum === 0) {
var dStr = "M" + staSegs[oldSegNum].x + "," + staSegs[oldSegNum].y;
} else {
if (oldSegNum >= numSegs) oldSegNum -= numPts;
dStr += staSegs[oldSegNum].seg;
}
if (segNum !== (numSegs - 1)) {
ptsStr += "(" + staSegs[oldSegNum].x + "," + staSegs[oldSegNum].y + "), ";
}
}
ptsStr = ptsStr.slice(0, ptsStr.length - 2);
return {ptsStr: ptsStr, dStr: dStr};
}
function show(msg) {
var m = document.createElement('pre');
m.innerHTML = msg;
document.body.appendChild(m);
}
pre {
margin: 0;
padding: 0;
}
<script src="//cdn.jsdelivr.net/snap.svg/0.4.1/snap.svg-min.js"></script>
<p>Best viewed on full page</p>
<p>Coloured lines show morph trajectories for the points for that particular rotation of points. The algorithm seeks to optimize those trajectories, essentially trying to find the "shortest" cumulative routes.</p>
<p>The order of points can be seen by following the colour of the lines: red, blue, green, orange (at least when this was originally written), repeating if there are more than 4 points.</p>
<p><button>Click to show rotation of points on top shape</button></p>

Canvas getImageData() For optimal performance. To pull out all data or one at a time?

I need to scan through every pixel in a canvas image and do some fiddling with the colors etc. For optimal performance, should I grab all the data in one go and work on it through the array? Or should I call each pixel as I work on it.
So basically...
data = context.getImageData(x, y, height, width);
VS
data = context.getImageData(x, y, 1, 1); //in a loop height*width times.

You'll get much higher performances by grabbing the image all at once since :
a) a (contiguous) acces to an array is way faster than a function call.
b) especially when this function isa method of a DOM object having some overhead.
c) and there might be buffer refresh issues that might delay response (if canvas is
on sight... or not depending on double buffering implementation).
So go for a one-time grab.
I'll suggest you look into Javascript Typed Arrays to get the most of the
imageData result.
If i may quote myself, look at how you can handle pixels fast in this old post of mine
(look after 2) ):
Nice ellipse on a canvas?
(i quoted the relevant part below : )
You can get a UInt32Array view on your ImageData with :
var myGetImageData = myTempCanvas.getImageData(0,0,sizeX, sizeY);
var sourceBuffer32 = new Uint32Array(myGetImageData.data.buffer);
then sourceBuffer32[i] contains Red, Green, Blue, and transparency packed into one unsigned 32 bit int. Compare it to 0 to know if pixel is non-black ( != (0,0,0,0) )
OR you can be more precise with a Uint8Array view :
var myGetImageData = myTempCanvas.getImageData(0,0,sizeX, sizeY);
var sourceBuffer8 = new Uint8Array(myGetImageData.data.buffer);
If you deal only with shades of grey, then R=G=B, so watch for
sourceBuffer8[4*i]>Threshold
and you can set the i-th pixel to black in one time using the UInt32Array view :
sourceBuffer32[i]=0xff000000;
set to any color/alpha with :
sourceBuffer32[i]= (A<<24) | (B<<16) | (G<<8) | R ;
or just to any color :
sourceBuffer32[i]= 0xff000000 | (B<<16) | (G<<8) | R ;
(be sure R is rounded).
Listening to #Ken's comment, yes endianness can be an issue when you start fighting with bits 32 at a time.
Most computer are using little-endian, so RGBA becomes ABGR when dealing with them 32bits a once.
Since it is the vast majority of systems, if dealing with 32bit integer assume this is the case,
and you can -for compatibility- reverse your computation before writing the 32 bits results on Big endian systems.
Let me share those two functions :
function isLittleEndian() {
// from TooTallNate / endianness.js. https://gist.github.com/TooTallNate/4750953
var b = new ArrayBuffer(4);
var a = new Uint32Array(b);
var c = new Uint8Array(b);
a[0] = 0xdeadbeef;
if (c[0] == 0xef) { isLittleEndian = function() {return true }; return true; }
if (c[0] == 0xde) { isLittleEndian = function() {return false }; return false; }
throw new Error('unknown endianness');
}
function reverseUint32 (uint32) {
var s32 = new Uint32Array(4);
var s8 = new Uint8Array(s32.buffer);
var t32 = new Uint32Array(4);
var t8 = new Uint8Array(t32.buffer);
reverseUint32 = function (x) {
s32[0] = x;
t8[0] = s8[3];
t8[1] = s8[2];
t8[2] = s8[1];
t8[3] = s8[0];
return t32[0];
}
return reverseUint32(uint32);
};

Additionally to what GameAlchemist said, if you want to get or set all the colors of a pixel simultaneously, but you don't want to check endianness, you can use a DataView:
var data = context.getImageData(0, 0, canvas.width, canvas.height);
var view = new DataView(data.data.buffer);
// Read or set pixel (x,y) as #RRGGBBAA (big endian)
view.getUint32(4 * (x + y*canvas.width));
view.setUint32(4 * (x + y*canvas.width), 0xRRGGBBAA);
// Read or set pixel (x,y) as #AABBGGRR (little endian)
view.getUint32(4 * (x + y*canvas.width), true);
view.setUint32(4 * (x + y*canvas.width), 0xAABBGGRR, true);
// Save changes
ctx.putImageData(data, 0, 0);

It depends on what exactly you're doing, but I'd suggest grabbing it all at once, and then looping through it.
Grabbing it all at once is faster than grabbing it pixel by pixel, since searching through an array is a lot faster than searching through a canvas, once for each pixel.
If you're really in need of speed, look into web workers. You can set each one to grab a specific section of the canvas, and since they can run simultaneously, they'll make much better use out of your CPU.
getImageData() isn't really slow enough for you to notice the difference if you were to grab it all at once or individually, in my experiences using the function.

Getting the size of text without using the boundingBox function in Raphael javascript library

I am trying to create some buttons with text in javascript using the Rahpael library. I would like to know the size of the styled text, before drawing to avoid that so I can create proper background (the button). Also I would like to avoid drawing the text outside of the canvas/paper (the position of the text is the position of its center).
I can use Raphaels getBBox() method, but I have to create (draw) the text in first place to do this. So I draw text to get the size to be able to draw it on the right position. This is ugly. So I am searching for some general method to estimate the metrics of styled text for given font, font-size, family ...
There is a possibility to do this using the HTML5 canvas http://www.html5canvastutorials.com/tutorials/html5-canvas-text-metrics/ but Raphael do not use canvas. Is there any possibility to get the text metrics using Raphael or plain Javascript?

There are a couple of ways to slice up this cat. Two obvious ones come readily to mind.
Out-of-view Ruler Technique
This one's the easiest. Just create a text element outside of the canvas's viewBox, populate it with your font information and text, and measure it.
// set it up -- put the variable somewhere globally or contextually accessible, as necessary
var textRuler = paper.text( -10000, -10000, '' ).attr( { fill: 'none', stroke: 'none' } );
function getTextWidth( text, fontFamily, fontSize )
{
textResult.attr( { text: text, 'font-family': fontFamily, 'font-size': fontSize } );
var bbox = textResult.getBBox();
return bbox.width;
}
It's not elegant by any stretch of the imagination. But it will do you want with relatively little overhead and no complexity.
Cufonized Font
If you were willing to consider using a cufonized font, you could calculate the size of a given text string without needing to mess with the DOM at all. In fact, this is probably approximately what the canvas's elements measureText method does behind the scenes. Given an imported font, you would simply do something like this (consider this protocode!)
// font should be the result of a call to paper.[getFont][2] for a cufonized font
function getCufonWidth( text, font, fontSize )
{
var textLength = text.length, cufonWidth = 0;
for ( var i = 0; i < textLength; i++ )
{
var thisChar = text[i];
if ( ! font.glyphs[thisChar] || ! font.glyphs[thisChar].w )
continue; // skip missing glyphs and/or 0-width entities
cufonWidth += font.glyphs[thisChar].w / font.face['units-per-em'] * fontSize;
}
return cufonWidth;
}
I really like working with cufonized fonts -- in terms of their capacity for being animated in interesting ways, they are far more useful than text. But this second approach may be more additional complexity than you need or want.

I know it seems sloppy, but you should have no problem drawing the text, measuring it, then drawing the button and moving the label. To be safe, just draw it off the screen:
var paper = Raphael(0, 0, 500, 500);
var text = paper.text(-100, -100, "My name is Chris");
//outputs 80 12
console.log(text.getBBox().width, text.getBBox().height);
If this REALLY offends your sensibilities, however -- and I would understand! -- you can easily generate an object to remember the width off each character for a given font:
var paper = Raphael(0, 0, 500, 500),
alphabet = "abcdefghijklmnopqrstuvwxyz";
font = "Arial",
charLengths = {},
ascii_lower_bound = 32,
ascii_upper_bound = 126;
document.getElementById("widths").style.fontFamily = font;
for (var c = ascii_lower_bound; c <= ascii_upper_bound; c += 1) {
var letter = String.fromCharCode(c);
var L = paper.text(-50, -50, letter).attr("font-family", font);
charLengths[letter] = L.getBBox().width;
}
//output
for (var key in charLengths) if (charLengths.hasOwnProperty(key)) {
var row = document.createElement("tr");
row.innerHTML = "<td>" + key + "</td><td>" + charLengths[key] + "</td>";
document.getElementById("widths").appendChild(row);
}

Develop Reference

JavaScript is the programming language of the Web.