How to use dynamic programming through Levenshtein algorithm (in Javascript) - javascript

I'm trying to understand dynamic programming through Levenshtein algorithm, but I have been stuck on this for a few hours now. I know my attempt at the following problem is the 'brute force' one. How would I use "dynamic programming" to change my approach? I'm pretty lost....
Problem: Given two strings, s and t, with lengths of n and m, create a
function that returns one of the following strings: "insert C" if
string t can be obtained from s by inserting character C "delete C"
(same logic as above) "swap c d" if string t can be obtained from
string s by swapping two adjacent characters (c and d) which appear in
that order in the original string. "Nothing" if no operation is
needed "impossible" if none of the above works ie LevenShtein distance is greater than 1.
Here is my brute force attempt. the "tuple" variable is misnamed as I originally wanted to push the indices and values to the matrix but got stuck on that.
function levenshtein(str1, str2) {
var m = str1.length,
n = str2.length,
d = [],
i, j,
vals = [],
vals2 = [];
for (i = 0; i <= m ; i++) {
var tuple = [str1[i]];
//console.log(tuple);
// console.log(tuple);
d[i] = [i];
// console.log(str1[i]);
vals.push(tuple);
}
vals = [].concat.apply([], vals);
vals = vals.filter(function(n){ return n; });
console.log(vals);
for (j = 0; j <= n; j++) {
d[0][j] = j;
var tuple2 = [str2[j]];
// console.log(tuple2);
vals2.push(tuple2);
// console.log(vals2);
}
vals2 = [].concat.apply([], vals2);
vals2 = vals2.filter(function(n){ return n ;});
console.log(vals2);
for (j = 1; j <= n; j++) {
for (i = 1; i <= m; i++) {
if (str1[i - 1] == str2[j - 1]) d[i][j] = d[i - 1][j - 1];
else d[i][j] = Math.min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + 1;
}
}
var val = d[m][n];
// console.log(d);
if(val > 1){
return "IMPOSSIBLE";
}
if(val === 0){
return "NOTHING";
}
//console.log(d);
if(val === 1){
//find the missing element between the vals
//return "INSERT " + missing element
//find the extra element
//return "DELETE + " extra element
//find the out of place element and swap with another
}
}
console.log(levenshtein("kitten", "mitten"));
// console.log(levenshtein("stop", "tops"));
// console.log(levenshtein("blahblah", "blahblah"));

The problem as described cannot be optimized using dynamic programming because it only involves a single decision, not a series of decisions.
Note that the problem specifically states that you should return "impossible" when the Levenshtein distance is greater than 1, i.e., the strings can't be made equal through a single operation. You need to be searching for a sequence of zero or more operations that cumulatively result in the optimal solution if you want to apply dynamic programming. (This is what the dynamic programming wikipedia article is talking about when it says you need "optimal substructure" and "overlapping subproblems" for dynamic programming to be applicable.)
If you change the problem to calculate the full edit distance between two strings, then you can optimize using dynamic programming because you can reuse the result of choosing to do certain operations at a particular location in the string in order to reduce the complexity of the search.
Your current solution looks a bit overly complex for the given problem. Below a simpler approach you can study. This solution takes advantage of the fact that you know you can only do at most one operation, and you can infer which operation to attempt based off the difference between the lengths of the two strings. We also know that it only makes sense to try the given operation at the point where the two strings differ, rather than at every position.
function lev(s, t) {
// Strings are equal
if (s == t) return "nothing"
// Find difference in string lengths
var delta = s.length - t.length
// Explode strings into arrays
var arrS = s.split("")
var arrT = t.split("")
// Try swapping
if (delta == 0) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var tmp = arrS[i]
arrS[i] = arrS[i+1]
arrS[i+1] = tmp
if (arrS.join("") == t) {
return "swap " + arrS[i+1] + " " + arrS[i]
}
else break
}
}
}
// Try deleting
else if (delta == 1) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var c = arrS.splice(i, 1)[0]
if (arrS.join("") == t) {
return "delete " + c
}
else break
}
}
}
// Try inserting
else if (delta == -1) {
for (var i=0; i<t.length; i++) {
if (arrS[i] != arrT[i]) {
arrS.splice(i, 0, arrT[i])
if (arrS.join("") == t) {
return "insert " + arrS[i]
}
else break
}
}
}
// Strings are too different
return "impossible"
}
// output helper
function out(msg) { $("body").append($("<div/>").text(msg)) }
// tests
out(lev("kitten", "mitten"))
out(lev("kitten", "kitten"))
out(lev("kitten", "kitetn"))
out(lev("kiten", "kitten"))
out(lev("kitten", "kittn"))
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Related

Optimizing node.js solution for HackerRank QHEAP1

Hi I'm trying to familiarize myself a bit better with Heaps so wanted to try and implement a solution to HackerRanks>Practice>Data Structures>Heaps>QHEAP1 using primitives, however I'm getting a timeout error for two of the tests.
A quick summary: I need to be able to parse a standardized input and handle the following 3 types of queries:
Add an element to the heap.
Delete a specific element from the heap.
Print the minimum of all the elements in the heap.
I'm wondering where this could be optimized? From what I can tell my del() will be performed in O(n) since I need to search for the element provided.
// search for and delete specific element {x} from heap
function del(arr, x){
let i = 0;
let found = false;
let n = arr.length;
while(!found && i < n){
if(arr[i] == x) found = true;
i++;
}
if(found){
arr[i-1] = arr[n-1]; // take the last element and overwrite to delete
arr.length = n - 1; // shorten array
downHeap(arr, i); // perform downHeap opertaion from index deleted
}
}
// NOTE: customized for minHeap due to requirement to print minimum value
function downHeap(arr, t){
// use array as binary tree - next index looking down is double current index
// NOTE: i and t are 1 indexed for heap lookahead
let i = 2 * t;
if(i >= arr.length) return; // no more room
// checkes if right child is smallest - if so updates index to right child
if(i < arr.length - 1 && arr[i - 1] > arr[i]) i = i + 1;
// if lower element is smaller than current element, swap em
if(arr[i-1] < arr[t-1]){
swap(arr, i-1, t-1);
downHeap(arr,i); // downHeap again at the next level
}
}
// insert x into heap
function insert(arr, x){
const n = arr.length;
arr.length = n + 1; // increasing array size
arr[n] = x; // adding el to end of array
upHeap(arr, arr.length)
}
//NOTE: customized as minHeap due to requirement to print minimum value.
function upHeap(arr, t){
// using array as binary tree - looking up - parant is half of current index
const i = Math.floor(t/2);
// if we've hit zero gone too far - NOTE: i, and t are 1 indexed for heap reference
// also nothing to do if parent is smaller than current index
if(i == 0 || arr[i-1] <= arr[t-1]) return;
// child is smaller than parent swap and upHeap from parent
swap(arr, t-1, i-1)
upHeap(arr, i)
}
// swahp
function swap(arr, l, r){
const t = arr[l];
arr[l] = arr[r];
arr[r] = t;
}
PS. as a side question, I'm kind of switching between a 1 indexed for heap operations, and a 0 index for array operations (e.g. you'll notices a lot of i-1 statements inside the up and downHeap methods) - wondering if there's a smarter way of having done that?
Support Code:
function processData(input) {
//Enter your code here
const inputs = input.split('\n');
const n = inputs[0];
let arr = [];
for(let i = 1; i <= n; i++){
const query = inputs[i].split(' ');
const op = query[0];
if(op == "1"){
insert(arr, parseInt(query[1]))
} else if(op == "2"){
del(arr, parseInt(query[1]))
} else if(op == "3"){
console.log(arr[0])
} else {
console.log("Error reading op");
}
}
}
process.stdin.resume();
process.stdin.setEncoding("ascii");
_input = "";
process.stdin.on("data", function (input) {
_input += input;
});
process.stdin.on("end", function () {
processData(_input);
});
Example Input
22
1 286789035
1 255653921
1 274310529
1 494521015
3
2 255653921
2 286789035
3
1 236295092
1 254828111
2 254828111
1 465995753
1 85886315
1 7959587
1 20842598
2 7959587
3
1 -51159108
3
2 -51159108
3
1 789534713
The code is indeed confusing because (as you write) it sometimes uses 1-based indexes, while other times it uses them as 0-based.
For instance, in insert, the following line shows that you intend t and i to be a 1-based index, since you convert them on-the-fly to a 0-based index:
if(arr[i-1] < arr[t-1])
...but then in this line, you treat i as a 0-based index (arr.length would be an admissible value of i if it is 1-based):
if(i >= arr.length) return; // no more room
And the same mix-up happens here:
if(i < arr.length - 1 && arr[i - 1] > arr[i]) i = i + 1;
By consequence you will get wrong results.
It is confusing to work with 1-based indexes when JavaScript is expecting 0-based indexes everywhere indexes are used. I didn't feel the courage to further debug your code in that state. I would suggest to use 0-based indexes throughout your code, which means that the left child of a value at index t is at index t*2+1.
Some other remarks:
To find the index where a value occurs in the heap, you don't have to write an explicit loop. Just use the built-in indexOf method.
Recursion is nice, but the downHeap and upHeap functions will work more efficiently with an iterative method, because then -- instead of swapping values -- you can take a copy of the value to bubble up or down, and then only move (not swap) the conflicting values to finally insert the copied value in its right place. This will perform fewer assignments than swapping repeatedly.
To insert a value you can just use the push method instead of updating the length "manually".
Instead of Math.floor for the integer division by 2, you can use a shift operator.
So here is a correction of your code:
function del(arr, x) {
const i = arr.indexOf(x); // This will be faster
if (i >= 0) {
const value = arr.pop();
if (i < arr.length) { // Only assign back when it was not last
arr[i] = value;
downHeap(arr, i);
}
}
}
function downHeap(arr, t) {
const val = arr[t];
while (true) {
let i = t * 2 + 1;
if (i < arr.length - 1 && arr[i] > arr[i + 1]) i = i + 1;
if (i >= arr.length || arr[i] >= val) break;
arr[t] = arr[i]; // Don't swap to gain time
// No recursion to save stack space
t = i;
}
arr[t] = val;
}
function insert(arr, x) {
arr.push(x); // adding element to end of array
upHeap(arr, arr.length - 1);
}
function upHeap(arr, t) {
const val = arr[t];
while (true) {
let i = (t - 1) >> 1; // Shift operator may give some speed increase
if (i < 0 || arr[i] <= val) break;
arr[t] = arr[i]; // Don't swap to gain time
// No recursion to save stack space
t = i;
}
arr[t] = val;
}

How do you most efficiently sort an integer variable? [duplicate]

I've seen versions of this question for other languages, but not for JS.
Is it possible to do this recursively in one function?
I understand that I need to take the first element in the string, and then append it to each solution to the recursion on the remainder of the string.
So logically, I understand how the recursion needs to go. I just don't understand how to append the first char onto each of the recursive solutions
var myString = "xyz";
function printPermut(inputString){
var outputString;
if(inputString.length === 0){
return inputString;
}
if(inputString.length === 1){
return inputString;
}
else{
for(int i = 0; i<inputString.length(); i++){
//something here like:
//outputString = outputString.concat(printPermut(inputString.slice(1))??
//maybe store each unique permutation to an array or something?
}
}
}
Let's write a function that returns all permutations of a string as an array. As you don't want any global variables, returning the permutations is crucial.
function permut(string) {
if (string.length < 2) return string; // This is our break condition
var permutations = []; // This array will hold our permutations
for (var i = 0; i < string.length; i++) {
var char = string[i];
// Cause we don't want any duplicates:
if (string.indexOf(char) != i) // if char was used already
continue; // skip it this time
var remainingString = string.slice(0, i) + string.slice(i + 1, string.length); //Note: you can concat Strings via '+' in JS
for (var subPermutation of permut(remainingString))
permutations.push(char + subPermutation)
}
return permutations;
}
To print them, just iterate over the array afterwards:
var myString = "xyz";
permutations = permut(myString);
for (permutation of permutations)
print(permutation) //Use the output method of your choice
Hope I could help you with your question.
The problem of permutations has been studied to death. Heap's algorithm is one well-known solution. Here is a version in JS, using a generator:
function *permute(a, n = a.length) {
if (n <= 1) yield a.slice();
else for (let i = 0; i < n; i++) {
yield *permute(a, n - 1);
const j = n % 2 ? 0 : i;
[a[n-1], a[j]] = [a[j], a[n-1]];
}
}
console.log(Array.from(permute("abcabad".split('')))
.map(perm => perm.join(''))
.filter((el, idx, self) => (self.indexOf(el) === idx)));
permute is designed to take and generate arrays, not strings, so we split the string into characters before calling it, and paste the characters back into strings before printing out the results.
Use Recursive Function to iterate through the string
function getPermutations(string) {
var results = [];
if (string.length === 1)
{
results.push(string);
return results;
}
for (var i = 0; i < string.length; i++)
{
var firstChar = string[i];
var otherChar = string.substring(0, i) + string.substring(i + 1);
var otherPermutations = getPermutations(otherChar);
for (var j = 0; j < otherPermutations.length; j++) {
results.push(firstChar + otherPermutations[j]);
}
}
return results;
}
var permutation = getPermutations('YES').filter((el, idx, self) => (self.indexOf(el) === idx));
console.log("Total permutation: "+permutation.length);
console.log(permutation);
Problem classification: You can look at this problem as an exploration problem, i.e., given a set of input characters explore the different ways you can arrange them.
Solution: Backtracking algorithm excels in solving exploratory problems, although it comes with high time complexity. To demonstrate a solution, imagine how you would solve this problem by hand for a small set of input characters: [a, b, c].
Here are the steps:
Take the left most character. This is the character at index 0 and swap it with target right character at index 0, i.e. with itself. This is because [a, b, c] is a valid permutation on its own therefore we want to keep it. Swapping characters normally requires two pointers which point to each of the characters. So let's say we will have a left and right pointer.
With the same left most character (at index 0) do the swapping with target right character at index 0 + 1 = 1, i.e. move the target right pointer with 1 step further. This will give you the output: [b, a, c]
With the same left most character (at index 0) do the swapping with the next next target right character (i.e. index 0 + 1 + 1 = 2). This will give you the output: [c, b, a]
Ok, now we need to stop as there are no more target right characters to be swapped with the left most character. So our right pointer needs to stay less than the max index in the input. Moving the right pointer with a step at a time we can do with a for loop which starts from the left index and ends with the input length - 1.
Now you need to do exact same steps from above but move the left pointer so that it points to the next left most character. However, keeping the input from step 2 and 3. Another way to imagine this situation is to say: 'Hey, I am done with the left most character. Now I do not want to work with it anymore but I would love to continue with the second left most from the results I have so far.'
When do we stop? When the left pointer has reached the length of the input string - 1, 'cause there is no more characters after this index. In recursive algorithms (such as the backtracking), the case where you need to stop is called base case. In our example the base case is: left === input.length - 1.
Here is a graphical visualisation:
left index| Input String:
-------------------------------------------------------------------------------
left = 0 | in=[a, b, c]
(swap in[0] with in[0]) (swap in[0] with in[1]) (swap in[0] with in[2])
left = 1 | in=[a, b, c] in=[b, a, c] in=[c, b, a]
(swap in[1] with in[1]) (swap in[1] with in[2]) (swap in[1] with in[1])(swap in[1] with in[2]) (swap in[1] with in[1])(swap in[1] with in[2])
left = 2 | [a, b, c] [a, c, b] [b, a, c] [b, c, a] [c, b, a] [c, a, b]
Summary:
To move the left pointer to the right we will use recursive increment
To move the right pointer to the right we will use a for loop, however we need to start always from the left pointer or else we will explore things we have already explored.
Backtracking:
A pseudo-code for backtracking algorithm takes the form of:
fun(input)
if(base_case_check(input)) {
//do final step
} else {
//choose
fun(reduce(input)) //explore
//un-choose
}
Our solution:
function permutate(string) {
if(!string || string.length === 0)
return new Set(['']);
let left = 0;
let result = new Set();
permutationHelper(string, result, left);
return result;
}
function permutationHelper(string, result, left) {
if(left === string.length-1) {
//base case
result.add(string);
} else {
//recursive case
for(let right=left; right < string.length; right++) {
string = swap(string, left, right); //choose
permutationHelper(string, result, left+1); // explore
string = swap(string, left, right); //unchoose
}
}
}
function swap(string, left, right) {
let tmpString = string.split('');
let tmp = tmpString[left];
tmpString[left] = tmpString[right];
tmpString[right] = tmp;
return tmpString.join('');
}
/* End of solution */
/* Tests */
let input = 'abc';
let result = permutate(input);
let expected = new Set(['abc', 'acb', 'bac', 'bca', 'cab', 'cba']);
if(setsEquality(result, expected)) {
console.log('Congrats, you generated all permuations');
} else {
console.log('Sorry, not all permuations are generated');
}
function setsEquality(actualResult, expectedResult) {
if (actualResult.size !== expectedResult.size) {
return false;
}
for (let permutation of actualResult) {
if (!expectedResult.has(permutation)) return false;
}
return true;
}
function assert(condition, desc) {
if (condition) {
console.log(`${desc} ... PASS`);
} else {
console.log(`${desc} ... FAIL`);
}
}
Summary & Time Complexity:
We make our choice by swapping characters in the existing input string
We explore what is left to be explored once we increment our left index with 1. This in fact means that we are reducing our input set for all subsequent recursions with 1. Therefore the work we need to do is: Nx(N-1)x(N-2)x(N-3)x...x1 = N!. However, as we needed a for loop to explore among the input we have, the total time complexity would be: 0(N*N!)
We revert our choice by swapping characters back in the modified input string
permutation=(str,prefix)=>{
if(str.length==0){
console.log(prefix);
}
else{
for(let i=0;i<str.length;i++){
let rem = str.substring(0,i)+str.substring(i+1);
permutation(rem,prefix+str[i]);
}
}
}
let str="ABC";
permutation(str,"");
Semi-Off topic:
random permutation of a given string is as simple as rndperm:
i = document.getElementById("word");
b = document.getElementById("butt");
rndperm = (z) => {
return z.split("").sort(() => ((Math.random() * 3) >> 0) - 1).join("")
}
function scramble() {
i.value = rndperm(i.value);
}
var z;
function sci() {
if (z != undefined) {
clearInterval(z);
b.innerText = "Scramble";
z=undefined;
} else {
z = setInterval(scramble, 100);
b.innerText = "Running...";
}
}
<center><input id="word" value="HelloWorld"></input><button id="butt" onclick=sci()>Scramble</button></center>
I had same question by my interviewer last day but I was not get the correct logic then I came to stackoverflow and I get here but now I have my solution and want to share with all
const str_Permutations = (str,ar = []) => {
str = `${str}`; // ensure type **String**
if(ar.indexOf(str)>-1 || str.length !== (ar.strlen || str.length)) return false; // Checking if value is alreay there or(||) on recursive call string length should not be provided string
ar.strlen = ar.strlen || str.length; // Setting str length of provided value(string)
ar.push(str); // Pushing to array
for(let i = 0; i<str.length;i++){
str_Permutations(str[i] + str.split('').filter(v=>v!==str[i]).join(''),ar);
}
return Array.from(ar); // Removing *strlen* from main result and return **Result** as array
}
str_Permutations("ABC")
//Result: (6) ["ABC", "BAC", "CBA", "BCA", "ACB", "CAB"]
There is used reference feature of Array to hold the values in same Array by passing. I hope you got my point!!!!
const permut = (str) => {
if (str.length <= 2) return str.length === 2 ? [str, str[1] + str[0]] : [str];
return str
.split("")
.reduce(
(acc, letter, i) =>
acc.concat(
permut(str.slice(0, i) + str.slice(i + 1)).map((val) => letter + val)
),
[]
);
};
found here
This does the job, recursively
function printPermutations(str, res='') {
if (!str.length){
console.log(res);
}
for (let i = 0; i < str.length; i++) {
let remStr = str.substr(0, i) + str.substr(i + 1);
printPermutations(remStr, res + str.substr(i, 1));
}
}
printPermutations("abc")
// result
// abc, acb, bac, bca, cab, cba
Simple and readable approach but only limited to 3 chars
const stringPermutation = (str) => {
let permutations = [];
for (let i in str) {
for (let j in str) {
for (let k in str) {
if (str[i] !== str[j] && str[j] !== str[k] && str[i] !== str[k]) {
permutations.push(str[i] + str[j] + str[k]);
}
}
}
}
return permutations;
};
console.log(stringPermutation("abc"));
var str = "abcdefgh";
for(let i = 0; i<str.length; i++){
for(let j = i; j<=str.length; j++){
if(i != j){
var out = str.slice(i,j);
console.log(out);
}
}
}

Using two for loops to compare two strings

I am working through exercises on exercism.io and the third one asks us to compare two DNA strings and return the difference (hamming distance) between them.
So for example:
GAGCCTACTAACGGGAT
CATCGTAATGACGGCCT
^ ^ ^ ^ ^ ^^
There are 7 different characters lined up in that comparison. My question is whether I'm taking the right approach to solve this. I created two empty arrays, created a function that loops through both strings and pushes the different letters when they meet.
I tried running it through a console and I always get an unexpected input error.
var diff = [];
var same = [];
function ham(dna1, dna2) {
for (var i = 0; i < dna1.length; i++)
for (var j = 0; j < dna2.length; i++){
if (dna1[i] !== dna2[j]) {
console.log(dna1[i]);
diff.push(dna1[i]);
}
else {
console.log(dna1[i]);
same.push(dna1[i]);
}
return diff.length;
}
ham("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT");
console.log("The Hamming distance between both DNA types is " + diff.length + ".");
Do not use globals.
Do not use nested loops if you don't have to.
Do not store useless things in arrays.
function ham(dna1, dna2) {
if (dna1.length !== dna2.length) throw new Error("Strings have different length.");
var diff = 0;
for (var i = 0; i < dna1.length; ++i) {
if (dna1[i] !== dna2[i]) {
++diff;
}
}
return diff;
}
var diff = ham("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT");
console.log("The Hamming distance between both DNA types is " + diff + ".");
The first problem is that you're missing a closing }. I think you want it right before the return statement.
secondly, there's a problem with your algorithm. You compare every item in dna1 (i) with every item in dna2 instead of coparing the item in the same position.
To use a shorter example so we can step through it, consider comparing 'CAT' and 'CBT'. you want to compare the characters in the same position in each string. So you don't actually want 2 for loops, you only want 1. You'd compare C to C ([0]), A to B ([1]), and T to T ( [2] ) to find the 1 difference at [1]. Now step through that with your 2 for loops in your head, and you'll see that you'll get many more differences than exist.
Once you use the same offset for the characters in each string to compare, you have to stat worrying that one might be shorter than the other. You'll get an error if you try to use an offset at the end of the string. So we have to take that into account too, and assumedly count the difference between string length as differences. But perhaps this is out of scope for you, and the the strings will always be the same.
You only need to have one single loop like below:
var diff = [];
var same = [];
function ham(dna1, dna2) {
for (var i = 0; i < dna1.length; i++) {
if (dna1[i] !== dna2[i]) {
console.log("not same");
diff.push(dna1[i]);
} else {
console.log("same");
same.push(dna1[i]);
}
}
return diff.length;
}
ham("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT");
console.log("The Hamming distance between both DNA types is " + diff.length + ".");
The edit distance is not really hard to calculate. More code is needed to cover the edge cases in parameter values.
function hamming(str1, str2) {
var i, len, distance = 0;
// argument validity check
if (typeof str1 === "undefined" || typeof str2 === "undefined") return;
if (str1 === null || str2 === null) return;
// all other argument types are assumed to be meant as strings
str1 = str1.toString();
str2 = str2.toString();
// the longer string governs the maximum edit distance
len = str1.length > str2.length ? str1.length : str2.length;
// now we can compare
for (i = 0; i < len; i++) {
if ( !(str1[i] === str2[i]) ) distance++;
}
return distance;
}
Execution of function:
ham( "GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT" );
of the following function definition:
function ham(A,B){
var D = [], i = 0;
i = A.length > B.length ? A : B;
for( var x in i)
A[x] == B[x] ? D.push(" ") : D.push("^");
console.log( A + "\n" + B +"\n" + D.join("") );
}
will output the log of:
GAGCCTACTAACGGGAT
CATCGTAATGACGGCCT
^ ^ ^ ^ ^ ^^
Is capable of receiving different length strings, which depending on the requirement and data representation comparison can be modified to fill the blank with adequate standard symbols etc.
Demo:
ham("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT");
function ham(A, B) {
var D = [],
i = 0;
i = A.length > B.length ? A : B;
for (var x in i)
A[x] == B[x] ? D.push(" ") : D.push("^");
console.log(A + "\n" + B + "\n" + D.join(""));
};
I think that you would want to do something like this:
var dna1 = "GAGCCTACTAACGGGAT";
var dna2 = "CATCGTAATGACGGCCT";
function ham(string1, string2) {
var counter = 0;
for (i = 0;i < string1.length;i++) {
if (string1.slice(i, i + 1) != string2.slice(i, i + 1)) {
counter++
};
};
return(counter);
};
console.log("insert text here " + ham(dna1, dna2));
It checks each character of the string against the corresponding character of the other string, and adds 1 to the counter whenever the 2 characters are not equal.
You can use Array#reduce to iterate the 1st string, by using Function#call, and compare each letter to the letter of the corresponding index in the 2nd string.
function ham(dna1, dna2) {
return [].reduce.call(dna1, function(count, l, i) {
return l !== dna2[i] ? count + 1 : count;
}, 0);
}
var diff =ham("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT");
console.log("The Hamming distance between both DNA types is " + diff + ".");

Algorithm of the greatest intersect of word in set of words

The story behind
I am creating a voice controlled application using x-webkit-speech which is surprisingly good (the feature, not my app), but sometimes the user (me) mumbles a bit. It would be nice to accept the command if some reasonable part of the word matches some reasonable part of some reasonable command. So I search for the holy grail called Algorithm of the Greatest Intersect of Word in Set of Words. Could some fresh bright mind drive me out of the cave of despair?
Example
"rotation" in ["notable","tattoo","onclick","statistically"]
should match tattoo because it has the longest intersect with rotation (tat_o). statistically is the second best (tati intersect), because longer part of the word needs to be ignored (but this is bonus condition, it would be acceptable without it).
Notes
I use Czech language where the pronunciation is very close to its written form
javascript is the preffered language, but any pseudocode is acceptable
the minimal length of the intersect should be a parameter of the algorithm
What have I tried?
Well, it is pretty embarassing....
for(var i=10; i>=4; --i) // reasonable substring
for(var word in words) // for all words in the set
for(var j=0; j<word.length-i; ++j) // search for any i substring
// aaargh... three levels of abstraction is too much for me
This is an algorithm that seems to work. I have no idea how good it performs compared to other already established algorithms (I suspect it perform worse) but maybe it gives you an idea how you could do it:
FIDDLE
var minInt = 3;
var arr = ["notable","tattoo","onclick","statistically"];
var word = "rotation";
var res = [];
if (word.length >= minInt) {
for (var i = 0; i < arr.length; i++) {
var comp = arr[i];
var m = 0;
if (comp.length >= minInt) {
for (var l = 0; l < comp.length - minInt + word.length - minInt + 1; l++) {
var subcomp = l > word.length - minInt ? comp.substring(l - word.length + minInt) : comp;
var subword = l < word.length - minInt ? word.substring(word.length - minInt - l) : word;
var minL = Math.min(subcomp.length, subword.length);
var matches = 0;
for (var k = 0; k < minL; k++) {
if (subcomp[k] === subword[k]) {
matches++;
}
}
if (matches > m) {
m = matches;
}
}
}
res[i] = m >= minInt ? m : null;
}
}
console.log(res);
What happens is, that it compares the two strings by "moving" on against the other and calculates the matching letters in each position. Here you see the compared "sub"words for rotation vs. notable:
ion / notable --> one match on index 1
tion / notable --> no match
ation / notable --> no match
tation / notable --> one match on index 2
otation / notable --> no match
rotation / notable --> three matches on index 1,2,3
rotation / otable --> no match
rotation / table --> no match
rotation / able --> no match
rotation / ble --> no match
As you see, the maximum number of matches is 3 and that is what it would return.
Here's an implementation of a Levenshtein Distance Calculator in Javascript.
It returns an object containing the matching command and distance.
var commandArr = ["cat", "dog", "fish", "copy", "delete"]
var testCommand = "bopy";
function closestMatch(str, arr)
{
//console.log("match called");
var matchDist = [];
var min, pos;
for(var i=0; i<arr.length; i++)
{
matchDist[i]=calcLevDist(str, arr[i]);
console.log("Testing "+ str + " against " + arr[i]);
}
//http://stackoverflow.com/questions/5442109/how-to-get-the-min-elements-inside-an-array-in-javascript
min = Math.min.apply(null,matchDist);
pos = matchDist.indexOf(min);
var output = { match : arr[pos],
distance : matchDist[pos]
};
return output;
}
function calcLevDist (str1, str2)
{
//console.log("calc running");
var cost = 0 , len1, len2;
var x = 1;
while(x > 0)
{
len1 = str1.length;
console.log("Length of String 1 = " + len1);
len2 = str2.length;
console.log("Length of String 2 = " + len2);
if(len1 == 0)
{
cost+= len2;
return cost;
}
if(len2 == 0)
{
cost+= len1;
return cost;
}
x = Math.min(len1,len2);
if(str1.charAt(len1 -1) != str2.charAt(len2 -1))
{
cost++;
}
else
console.log(str1.charAt(len1-1) + " matches " + str2.charAt(len2-1));
str1 = str1.substring(0, len1 -1 );
str2 = str2.substring(0, len2 -1 );
console.log("Current Cost = " + cost);
}
}
var matchObj = closestMatch(testCommand, commandArr);
var match = matchObj["match"];
var dist = matchObj["distance"];
$("#result").html("Closest match to " + testCommand + " = " + match + " with a Lev Distance of " + dist + "." )
You can mess around with the fiddle here.
Thank you basilikum and JasonNichols and also Mike and Andrew for the comments, it really helped me to finish the algorithm. I come up with my own brute force O(n^3) solution in case someone runs into this question with the same problem.
Anyone is invited to play with the fiddle to improve it.
The algorithm
/**
* Fuzzy match for word in array of strings with given accurancy
* #param string needle word to search
* #param int accurancy minimum matching characters
* #param array haystack array of strings to examine
* #return string matching word or undefined if none is found
*/
function fuzzyMatch(needle,accurancy,haystack) {
function strcmpshift(a,b,shift) {
var match=0, len=Math.min(a.length,b.length);
for(var i in a) if(a[i]==b[+i+shift]) ++match;
return match;
}
function strcmp(a,b) {
for(var i=0,max=0,now; i<b.length; ++i) {
now = strcmpshift(a,b,i);
if(now>max) max = now;
}
return max;
}
var word,best=accurancy-1,step,item;
for(var i in haystack) {
item = haystack[i];
step = Math.max(strcmp(item,needle),strcmp(needle,item));
if(step<=best) continue;
best=step, word=item;
};
return word;
}
Example
var word = "rotation";
var commands = ["notable","tattoo","onclick","statistically"];
// find the closest command with at least 3 matching characters
var command = fuzzyMatch(word,3,commands);
alert(command); // tattoo

Sorting function?

I need to organize an array of strings of random length into the least number of new strings with a max size. Is there a function or something in javascript, or something that can be translated to javascript, that will do this?
For example, the new strings might have max lengths of 1000 characters. The array might have strings of lengths 100, 48, 29, etc. I would want to combine those strings into as few new strings as possible.
edit: Sorry if this doesn't make sense, I tried my best.
No standard method in Javascript, but plenty of theoretical work has been done on this (i.e. the bin packing problem).
http://en.wikipedia.org/wiki/Bin_packing_problem
Some sample pseudo code in the link - should be trivial to translate to javascript.
The algorithm shown isn't going to be optimal in every case. To find the optimal solution to your example you'll just need to iterate over every possibility which might not be that bad depending on how many strings you have.
For my own entertainment, I wrote a simple bin packing algorithm. I picked a simple algorithm which is to sort the input strings by length. Create a new bin. Put the first (longest remaining) string into the bin and then keep filling it up with the longest strings that will fit until no more strings will fit. Create a new bin, repeat. To test it, I allocate an array of strings of random lengths and use that as input. You can see the output visually here: http://jsfiddle.net/jfriend00/FqPKe/.
Running it a bunch of times, it gets a fill percentage of between 91-98%, usually around 96%. Obviously the fill percentage is higher if there are more short strings to fill with.
Here's the code:
function generateRandomLengthStringArrays(num, maxLen) {
var sourceChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY1234567890";
var sourceIndex = 0;
var result = [];
var len, temp, fill;
function getNextSourceChar() {
var ch = sourceChars.charAt(sourceIndex++);
if (sourceIndex >= sourceChars.length) {
sourceIndex = 0;
}
return(ch);
}
for (var i = 0; i < num; i++) {
len = Math.floor(Math.random() * maxLen);
temp = new String();
fill = getNextSourceChar();
// create string
for (var j = 0; j < len; j++) {
temp += fill;
}
result.push(temp);
}
return(result);
}
function packIntoFewestBins(input, maxLen) {
// we assume that none of the strings in input are longer than maxLen (they wouldn't fit in any bin)
var result = [];
// algorithm here is to put the longest string into a bin and
// then find the next longest one that will fit into that bin with it
// repeat until nothing more fits in the bin, put next longest into a new bin
// rinse, lather, repeat
var bin, i, tryAgain, binLen;
// sort the input strings by length (longest first)
input.sort(function(a, b) {return(b.length - a.length)});
while (input.length > 0) {
bin = new String(); // create new bin
bin += input.shift(); // put first one in (longest we have left) and remove it
tryAgain = true;
while (bin.length < maxLen && tryAgain) {
tryAgain = false; // if we don't find any more that fit, we'll stop after this iteration
binLen = bin.length; // save locally for speed/convenience
// find longest string left that will fit in the bin
for (i = 0; i < input.length; i++) {
if (input[i].length + binLen <= maxLen) {
bin += input[i];
input.splice(i, 1); // remove this item from the array
tryAgain = true; // try one more time
break; // break out of for loop
}
}
}
result.push(bin);
}
return(result);
}
var binLength = 60;
var numStrings = 100;
var list = generateRandomLengthStringArrays(numStrings, binLength);
var result = packIntoFewestBins(list, binLength);
var capacity = result.length * binLength;
var fillage = 0;
for (var i = 0; i < result.length; i++) {
fillage += result[i].length;
$("#result").append(result[i] + "<br>")
}
$("#summary").html(
"Fill percentage: " + ((fillage/capacity) * 100).toFixed(1) + "%<br>" +
"Number of Input Strings: " + numStrings + "<br>" +
"Number of Output Bins: " + result.length + "<br>" +
"Bin Legnth: " + binLength + "<br>"
);

Categories

Resources