Javascript Text Statistic Algorithm Improvement

Javascript Text Statistic Algorithm Improvement - javascript

I am trying to solve one algorithm in Javascript where the user requires the input sentence then have to do statistic as the following screenshot
I have done with following code
class TextAnalytics {
getAnalytics(sentence) {
var analyzedResult = {}
var textArray = new Array();
const trimmed = sentence.replace(/\s/g, '').toUpperCase()
for (let i = 0; i < trimmed.length; i++) {
const currentChar = trimmed[i]
if (!analyzedResult[currentChar]) {
analyzedResult[currentChar] = {
count: 1,
prevChar: trimmed[i - 1] ? [trimmed[i - 1]] : [],
nextChar: trimmed[i + 1] ? [trimmed[i + 1]] : [],
index: [i]
}
} else {
analyzedResult[currentChar].count++
trimmed[i - 1] &&
analyzedResult[currentChar].prevChar.push(trimmed[i - 1])
trimmed[i + 1] &&
analyzedResult[currentChar].nextChar.push(trimmed[i + 1])
analyzedResult[currentChar].index.push(i)
}
}
return analyzedResult;
}
getMaxDistance(arr) {
let max = Math.max.apply(null, arr);
let min = Math.min.apply(null, arr);
return max - min;
}
}
var textAnalytics = new TextAnalytics();
console.log(textAnalytics.getAnalytics("its cool and awesome"));
Want to check if there is any other way to solve this problem or any refactoring require
Help will be appreciated.
Thanks

You can write it more elegantly:
class CharStats {
constructor () {
this.prevs = [];
this.nexts = [];
this.indexes = [];
}
add (prev, next, index) {
prev && this.prevs.push(prev);
next && this.nexts.push(next);
this.indexes.push(index);
return this;
}
get count () {
return this.indexes.length;
}
get maxDistance () {
// If the index array is empty, the result will be Infinite.
// But because the algorithm cannot have a situation where
// this class is used without at least one index, this case
// need not be covered.
return Math.max(...this.indexes) - Math.min(...this.indexes);
}
}
const getAnalytics = sentence =>
[...sentence.replace(/\s/g, '').toUpperCase()].reduce((map, cur, i, arr) =>
map.set(cur, (map.get(cur) || new CharStats).add(arr[i - 1], arr[i + 1], i)),
new Map);
console.log(getAnalytics('its cool and awesome'));

1) Convert string to array of chars, remove empty, change to upper case
2) Use reduce, go thru each char and build object 'keys' as Char values to have before, after and index.
3) if Char already exist in object, Append new stats and calculate max-distance.
const getAnalytics = str => {
const caps = Array.from(str.toUpperCase()).filter(x => x.trim());
return caps.reduce((acc, char, i) => {
const prepost = {
before: caps[i-1] || '',
after: caps[i+1] || '',
index: i
};
if (char in acc) {
const chars = [...acc[char].chars, prepost];
const mm = chars.reduce((acc, curr) => ({
max: Math.max(acc.max, curr.index),
min: Math.min(acc.min, curr.index)
}), {max: -Infinity, min: Infinity});
acc[char] = { chars, max_distance: mm.max - mm.min };
} else {
acc[char] = { chars: [prepost], max_distance: 0 };
}
return acc;
}, {});
}
console.log(getAnalytics('its cool and awesome'));

Related

Javascript, getting past values for an array of objects

I have a JavaScript array of objects which looks like
var myarr = [
{'xx':'2023-01-01,,1'},
{'ss':'2023-01-01,2,1.2'},
{'dd':'2023-01-01,4,'},
{'rr':'2023-01-01,,'},
{'ff':'2023-01-01,,'},
{'gg':'2023-01-01,,'}
];
The array is actually much bigger than that, but I have cut it down for testing purposes, some of my arrays are thousands of lines long
Each object contains a date and two comma-separated values, although I have some rows which contain 3 or 4 comma separate values
What I need to do, is if any blank comma-separated value is found on any row then get the previous comma separated value from that position to a maximum of 2 times going back, although I may need to change that to a bigger number in the future
So with my example, I would get the following output
var myarr = [
{'xx':'2023-01-01,,1.6'},
{'ss':'2023-01-01,2,1.2'},
{'dd':'2023-01-01,4,1.2'},
{'rr':'2023-01-01,4,1.2'},
{'ff':'2023-01-01,4,'},
{'gg':'2023-01-01,,'}
];
I have tried to solve this with
var myarr = [
{'xx':'2023-01-01,,1'},
{'ss':'2023-01-01,2,1.2'},
{'dd':'2023-01-01,4,'},
{'rr':'2023-01-01,,'},
{'ff':'2023-01-01,,'},
{'gg':'2023-01-01,,'}
];
var maxAttempts = 3;
for (var i = 0; i < myarr.length; i++) {
var obj = myarr[i];
var values = Object.values(obj)[0].split(",");
var date = values[0];
var value1 = values[1];
var value2 = values[2];
for (var j = 1; j <= maxAttempts; j++) {
if (!value1) {
value1 = (myarr[i-j] && Object.values(myarr[i-j])[0].split(",")[1]) || " ";
}
if (!value2) {
value2 = (myarr[i-j] && Object.values(myarr[i-j])[0].split(",")[2]) || " ";
}
if (value1 && value2) {
break;
}
}
console.log(date, value1, value2);
for (var k = 3; k < values.length; k++) {
var value = values[k];
console.log(value);
}
}
but it doesn't seem to provide the expected output.
Can someone help me with what might be wrong?

Maybe you can use something like this.
const myarr = [
{ "xx": "2023-01-01,,1" },
{ "ss": "2023-01-01,2,1.2" },
{ "dd": "2023-01-01,4," },
{ "rr": "2023-01-01,," },
{ "ff": "2023-01-01,," },
{ "gg": "2023-01-01,," }
]
function fillInBlanks(arr, maxLookBack) {
return arr.map((obj, index) => {
const key = Object.keys(obj)[0]
const value = Object.values(obj)[0]
.split(",")
.map((x, n) => {
if (x === "" && index > 0) {
for (let i = index - 1; i >= Math.max(0, index - maxLookBack); --i) {
const prev = Object.values(arr[i])[0].split(",")
if (prev[n] !== "") return prev[n]
}
} else return x
})
return Object.fromEntries([
[key, value.join(",")]
])
})
}
fillInBlanks(myarr, 2).forEach(x => console.log(x))

Here's my attempt. This will also work with any number of values per row.
const maxAttempts = 2;
myarr.reduce((modifiedAccumulation, currentObject, index) => {
const [key, csv] = Object.entries(currentObject)[0];
const splitCsv = csv.split(",");
const modifiedCsv = splitCsv
.reduce((fixedArray, currentElement, csvPos) => {
let numberToUse =
currentElement === ""
? myarr
.slice(Math.max(index - maxAttempts, 0), index)
.reduceRight((proposedNum, currentPastObj) => {
if (proposedNum !== "") return proposedNum;
let candidate =
Object.entries(currentPastObj)[0][1].split(",")[csvPos];
return candidate !== "" ? candidate : "";
}, "")
: currentElement;
return [...fixedArray, numberToUse];
}, [])
.join(",");
return [...modifiedAccumulation, { [key]: modifiedCsv }];
}, []);

This approach creates a 'window' array containing the last few entries, which is used to look up prior column values.
const myarr = [{"xx":"2023-01-01,,1"},{"ss":"2023-01-01,2,1.2"},{"dd":"2023-01-01,4,"},{"rr":"2023-01-01,,"},{"ff":"2023-01-01,,"},{"gg":"2023-01-01,,"}]
const windowSize = 2
const w = [], r =
myarr.map(e=>Object.entries(e).flatMap(([k,v])=>[k,...v.split(',')]))
.map(a=>(
w.unshift(a) > windowSize+1 && w.pop(),
a.map((_,i)=>w.find(x=>x[i])?.[i])
)).map(([k,...v])=>[k,v.join()]
).map(i=>Object.fromEntries([i]))
console.log(r)

Better approach for evaluating expressions

I am trying to create a function that accepts 2 parameters -
expression (string eg. 'x+y')
variables (object eg. { 'x': [7, 1], 'y': [2, 9] })
My goal is to return the maximum value possible from the function by substituting the variables with their corresponding values.
For eg., the maximum value possible from the above expression is 16 when x = 7 and y = 9.
My Code -
const findMax = (exp, variables) => {
let result = [];
for (let key in variables) {
result.push(variables[key].map((elem) => {
return key + ':' + elem;
}));
}
result = result[0].flatMap(d => result[1].map(v => d + ',' + v));
let max = 0;
for (let variables of result) {
let expression = exp;
let temp = variables.split(',');
let arr1 = temp[0].split(':');
let arr2 = temp[1].split(':');
console.log("arr1 values = ", arr1[0], arr1[1]);
console.log("arr2 values = ", arr2[0], arr2[1]);
expression = expression.replace(arr1[0], arr1[1]).replace(arr2[0], arr2[1]);
let res = eval(expression);
if (res > max) {
max = res;
}
}
return max;
}
The above function works, but I have few issues that I wish to resolve -
The above function is too complicated. Looking for an efficient/better way to process it.
It can only process for 2 variables. I want to process 1 or more variables. For eg., the expression can be - x + 2y - 3z and variables - { 'x': [7, 8], 'y': [2, 7], 'z': [4, 5] }
Want to avoid using eval()
Possible inputs -
exp - 2*x+3, variables - {'x': [1, 2]}
exp - 2*3-1, variables - {}
exp - 8/y-z+k, variables - {'y': [1, 2], 'z': [5, 2], 'k': [2, 6]}

If you don't want to use eval you'll have to write your own parser, which is a gruesome and error-prone task.
Many such parsers exist, but you'll have to adapt them to your "multiple-valued variables" idea.
Also, a brute-force search for the maximal result will have an exponential computation time, so don't expect it to work on hundreds or thousands of variables.
Parsing by hand
Here is an example of bare bone expression evaluator using a generator to enumerate all possible values of your variables.
function evaluate (expression, variables) {
// minimal sanity check
if ((expression.match(/[(]/g) ?? []).length !== (expression.match(/[)]/g) ?? []).length)
throw "Unbalanced parenthesis";
// simplistic parser (minimal handling of unary minus)
const tokens = ('('+expression+')') // extra enclosing parenthesis
.replace(/\s/g,'') // remove blanks
.replace(/\(-/g,'(:') // mark unary minus as :
.replace(/(\w+|\d+|[+\-/*:()])/g,"$1#") // tag tokens with #
.slice(0,-1) // remove trailing #
.split("#"); // split into tokens
// shunting-yard to produce a postfix expression
let postfix = [], s_op = [];
const prio = { '(':0, '+':1, '-':1, '*':2, '/':2, ':':3 }; // operator priorities
for (let tok of tokens) {
if (!isNaN(+tok)) postfix.push(+tok); // constant
else if (tok.match(/\w+/)) postfix.push( tok); // variable
else if (tok === '(') s_op.push(tok);
else if (tok === ')') {
// will fail if parenthesis are unbalanced
let op; while ((op = s_op.pop()) !== '(') postfix.push(op);
}
else {
while ((s_op.length > 0) && (prio[s_op[s_op.length-1]] >= prio[tok])) {
postfix.push(s_op.pop());
}
s_op.push(tok);
}
}
if (s_op.length != 0) throw "operator stack should be empty";
// postfix evaluator
const evaluate = function (values) {
let s_val = []; // operand stack (numerical values)
for (let tok of postfix) {
switch (tok) {
case ':' : s_val.push(-s_val.pop() ); break;
case '+' : var a = s_val.pop(); s_val.push (s_val.pop() + a); break;
case '-' : var a = s_val.pop(); s_val.push (s_val.pop() - a); break;
case '*' : var a = s_val.pop(); s_val.push (s_val.pop() * a); break;
case '/' : var a = s_val.pop(); s_val.push (s_val.pop() / a); break;
default : s_val.push ((typeof (tok) == 'number') ? tok : values[tok]); break;
}
}
return s_val[0];
}
// generates all possible tuples of values
const combinations = function* (variables) {
let max = 1; for (let values of Object.values(variables)) max *= values.length;
for (let iter = 0 ; iter != max ; iter++) {
let values = {}, i = iter;
for (let name of Object.keys(variables)) {
let len = variables[name].length
values[name] = variables[name][i % len];
i = Math.floor(i/len);
}
yield values;
}
}
// maximize the expression over all possible combinations of values
let val_max = -Number.MAX_VALUE, vars_max;
for (let vars of combinations(variables)) {
let val = evaluate (vars);
console.log (JSON.stringify(vars),val);
if (val > val_max) {
vars_max = vars;
val_max = val;
}
}
console.log (">>>",JSON.stringify(vars_max),val_max);
return { vars:vars_max, val:val_max };
}
evaluate("2*x+3", { x:[1,2] });
evaluate("2*3-1", {});
evaluate("8/y-z+k", { y:[1, 2], z:[5, 2], k:[2, 6] });
evaluate("(x-5)*(y+5)/((z-3)*(t+5))", { x:[-5,3], y:[-1,3], z:[5, 3, 2], t:[-5, 6] });
Beware: a divide by zero can generate +Infinity, which might lead to the selection of an unexpected set of values.
The parser only barely supports the unary minus, i.e. -5 or -(-5) will work, but not --5 or other fancy stuff a proper grammar would allow.
Unary + is not supported at all.
Error checking is minimal and nothing is optimized.
An ill-formed expression is very likely to cause a disaster (infinite loop, out of memory, you name it).
Using the Boooh! function
The Function That Shall Not Be Used Ever allows to write far less code, but God might kill some kittens for this blasphemy...
function evaluate (expression, variables) {
// generates all possible tuples of values
const combinations = function* (variables) {
let max = 1; for (let values of Object.values(variables)) max *= values.length;
for (let iter = 0 ; iter != max ; iter++) {
let setter = "", i = iter;
for (let name of Object.keys(variables)) {
let len = variables[name].length
setter += name + "=" + variables[name][i % len] + ",";
i = Math.floor(i/len);
}
yield setter;
}
}
// maximize the expression over all possible combinations of values
let val_max = -Number.MAX_VALUE, vars_max;
for (let setter of combinations(variables)) {
let val = eval (setter+expression);
console.log (setter," -> ",val);
if (val > val_max) {
vars_max = setter;
val_max = val;
}
}
console.log (">>>",vars_max,val_max);
return { vars:vars_max, val:val_max };
}
evaluate("2*x+3", { x:[1,2] });
evaluate("2*3-1", {});
evaluate("8/y-z+k", { y:[1, 2], z:[5, 2], k:[2, 6] });
evaluate("(x-5)*(y+5)/((z-3)*(t+5))", { x:[-5,3], y:[-1,3], z:[5, 3, 2], t:[-5, 6] });
The result is a bit awkward, it's just the variable initializations list, but you can easily reformat it to your liking.

There it is:
let findMax = (exp, variables) => {
let clearRepeatedArrayItems = (array) => {
const obj = {};
array.forEach(item => obj[item] = item);
return Object.keys(obj);
};
let allCombinations = (variables, str) => {
let combinations = [];
const keys = Object.keys(variables);
keys.forEach(key => {
const variable = variables[key];
const allExceptCurrentKeys = keys.filter(k => k !== key);
const allExceptCurrentObj = {};
allExceptCurrentKeys.forEach(k => {
allExceptCurrentObj[k] = variables[k];
});
variable.forEach(value => {
const item = `${key}:${value}`;
if (allExceptCurrentKeys.length) {
combinations = combinations.concat(allCombinations(allExceptCurrentObj, str ? `${str},${item}` : item));
} else {
combinations.push(`${str},${item}`);
}
});
});
return combinations;
};
const combinations = allCombinations(variables);
const combWithSortedKey = combinations.map(comb => comb.split(',').sort((a, b) => a.split(':')[0].localeCompare(b.split(':')[0])).join(','));
const uniqueCombinations = clearRepeatedArrayItems(combWithSortedKey);
let max = 0;
let combinationUsed = '';
uniqueCombinations.forEach(comb => {
let expression = exp;
comb.split(',').forEach(pair => {
const tokens = pair.split(':');
expression = expression.replace(new RegExp(tokens[0], 'g'), tokens[1]);
});
const value = eval(expression);
if (value > max) {
max = value;
combinationUsed = `{ ${comb.replace(/,/g, ', ').replace(/:/g, ': ')} }`;
}
});
return { max, combinationUsed };
};

Dynamically update value of a key in Map in JavaScript

I currently have a function below to find the first nonrepeating letter. For example, for the string carro, that letter would be c; for the string total, that letter would be o.
I have the following code that works:
function findFirstNonrepeatedChar(str) {
const store = {};
const arr = str.split('');
arr.forEach(item => {
if(!store[item]) {
store[item] = 1;
} else {
store[item] = store[item] + 1;
}
})
for(let char in store) {
if(store[char] === 1) return char;
}
}
However, now I want to use a Map instead of just a plain object, and I'm having difficulty to update the frequency of the duplicate word like below:
function findFirstNonrepeatedChar(str) {
const store = new Map();
const arr = str.split('');
arr.forEach(item => {
if(!store.has(item)) {
store.set(item, 1);
} else {
store[item]++;
}
})
console.log(store, 'store')
for(let char in store) {
if(store[char] === 1) return char;
}
}
What would be the best way to do so?

There are 2 things here:
you set to save the key-value to store, use get to get the value by key
store.set(item, (store.get(item) || 0) + 1);
you iterate the key-value pairs of Map by for..of, not for..in
function findFirstNonrepeatedChar(str) {
const store = new Map();
const arr = str.split("");
arr.forEach((item) => {
store.set(item, (store.get(item) || 0) + 1);
});
for (let [char, occurrences] of store) {
if (occurrences === 1) {
return char;
}
}
}
console.log(findFirstNonrepeatedChar("carro"));
console.log(findFirstNonrepeatedChar("total"));

Here's how I'd do it using Array.prototype.find() if you're interested in alternative solutions.
const findFirstNonrepeatedChar = (str) => str.split('').find(
(val) => str.match(new RegExp(val, 'g')).length === 1
);
console.log(findFirstNonrepeatedChar('total'));
console.log(findFirstNonrepeatedChar("carro"));
console.log(findFirstNonrepeatedChar("aabbcc"));

Testing a js function with Jest returns a bad value in for loop

It is my first use of Jest and I try to test a function with many many values like this :
const convertConfig = require('../tools/convertNumWord/config');
const numToWordConstructor = require('../tools/convertNumWord/num2words/numToWords');
const wordToNumConstructor = require('../tools/convertNumWord/words2num/wordsToNum');
describe('EN-CONFIG', () => {
const config = convertConfig['en']; // It returns an object with configuration values for the language
const numToWord = numToWordConstructor(config); // It return a function builds with config object
const wordToNum = wordToNumConstructor(config);
for (let i = 0; i <= 4; i++) {
it(`Test for value ${i}`, () => {
expect(wordToNum(numToWord(i))).toBe(Number(i));
});
}
});
Jest returns this error :
● ENGLISH CONFIG › Test for value 2
expect(received).toBe(expected) // Object.is equality
Expected: 2
Received: 1
69 | for (let i = 0; i <= 4; i++) {
70 | it(`Test for value ${i}`, () => {
> 71 | expect(wordToNum(numToWord(i))).toBe(Number(i));
| ^
72 | });
73 | }
74 | });
at Object.<anonymous> (__tests__/loopConvertNum.test.js:71:39)
● ENGLISH CONFIG › Test for value 3
expect(received).toBe(expected) // Object.is equality
Expected: 3
Received: 1
69 | for (let i = 0; i <= 4; i++) {
70 | it(`Test for value ${i}`, () => {
> 71 | expect(wordToNum(numToWord(i))).toBe(Number(i));
| ^
72 | });
73 | }
74 | });
at Object.<anonymous> (__tests__/loopConvertNum.test.js:71:39)
● ENGLISH CONFIG › Test for value 4
expect(received).toBe(expected) // Object.is equality
Expected: 4
Received: 1
69 | for (let i = 0; i <= 4; i++) {
70 | it(`Test for value ${i}`, () => {
> 71 | expect(wordToNum(numToWord(i))).toBe(Number(i));
| ^
72 | });
73 | }
74 | });
at Object.<anonymous> (__tests__/loopConvertNum.test.js:71:39)
Test Suites: 1 failed, 2 passed, 3 total
Tests: 3 failed, 1 todo, 3 passed, 7 total
Snapshots: 0 total
Time: 1.773s
Ran all test suites.
My first test worked but for 2 days, Jest does not seem to execute my function for each round of the for loop, as if it kept value in memory (often that of the second round of the loop) and the toBe test is ultimately false. In some cases, the result of my functions is completely inconsistent (null or some kind of increment from the previous result). When I launch my function with the node with the same arguments it works well.
I tried with it.each and the problem persists. Each mode is not good for me because I want to test my function for many many many values 😅.
After your feedback, here are the two functions imported and the config constant. But these functions work well with the arguments sent in my loop. The problem does not seem to come from what returns the functions because when I launch them individually with node they work well with the same arguments as in the for loop of my Jest test.
This is the function imported in numToWordConstructor :
const config = require('../config');
const _ = require('lodash');
const numToWordsConstructor = config => {
config = _.cloneDeep(config);
const letters = {
..._.invertBy(config.oneDigits),
..._.invertBy(config.doubleDigits),
..._.invertBy(config.tripleDigits),
};
const hundredWords = config.hundredWords;
const oneDigits = config.oneDigits;
const doubleDigits = config.doubleDigits;
const tripleDigits = config.isEachHundredsWords ? config.tripleDigits : null;
const separators = config.separators;
let words = [];
// Convertion function
const num2Words = number => {
let result = '';
const numbersWords = Object.keys(letters);
const nbString = number.toString();
const nb = parseFloat(nbString.replace(/ /gi, ''));
if (nb > 999999999999)
throw new Error(
'Function numToWords for number',
number,
': Number to big',
);
if (isNaN(nb))
throw new Error(
'Function numToWords for number',
number,
'Not a valid number',
);
if (Math.ceil(nb) != nb) {
separateFloatNumbers = nbString.split('.');
return (
num2Words(separateFloatNumbers[0]) +
' virgule ' +
num2Words(separateFloatNumbers[1])
);
}
if (Number(number) === 0) return oneDigits[0][0];
// Build numByThousand object to split number to 3 digits arrays
let numByThousand = [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]];
if (numbersWords.includes(nbString)) {
result = letters[nbString][0];
} else {
const nbReversedDigits = nbString
.split('')
.map(digit => parseInt(digit))
.reverse();
numByThousand = numByThousand
.map(thousand => {
if (nbReversedDigits.length >= 3) {
return (thousand = nbReversedDigits.splice(0, 3).reverse());
} else {
return (thousand = [0, 0, 0]
.map(
defaultDigit =>
nbReversedDigits.splice(0, 1)[0] || defaultDigit,
)
.reverse());
}
})
.reverse();
}
// Build array of words depending of numByThousand
const convertNumByThousandToWords = numByThousand => {
const hundred = numByThousand[0] * 100;
const ten = numByThousand[1] * 10 + numByThousand[2];
const newWords = [];
if (numByThousand[0] > 0) {
if (config.isEachHundredsWords) {
newWords.push(tripleDigits[hundred][0]);
} else if (numByThousand[0] === 1 && !config.isOneBeforeOnlyOne[0]) {
newWords.push(config.hundredWords[0]);
} else {
newWords.push(oneDigits[numByThousand[0]][0]);
newWords.push(config.hundredWords[0]);
}
}
if (ten > 0) {
if (Object.keys(doubleDigits).includes(ten.toString())) {
newWords.push(doubleDigits[ten][0]);
return newWords;
} else {
if (numByThousand[1] > 0)
newWords.push(doubleDigits[numByThousand[1] * 10][0]);
if (numByThousand[2] > 0)
newWords.push(oneDigits[numByThousand[2]][0]);
return newWords;
}
}
return newWords;
};
if (config.noMoreMillionSeparator) separators.push(config.separators[0]);
numByThousand.map((thousand, i) => {
if (thousand.reduce((acc, cur) => acc + cur) > 0) {
words = _.concat(words, convertNumByThousandToWords(thousand));
// Handle thousand separators
separators[config.separators.length - i - 1] &&
words.push(separators[config.separators.length - i - 1][0]);
}
});
//Handle specifics spelling rules
words = config.spellingRules(words, oneDigits, doubleDigits);
// console.log('words : ', words.join(' '));
return words.join(' ');
};
return num2Words;
};
module.exports = numToWordsConstructor;
This is the function imported in wordToNumConstructor :
const _ = require('lodash');
const arrayAwareInvert = obj => {
return _.reduce(
obj,
(result, values, key) => {
return _.reduce(
values,
(result, value) => {
result[value] = key;
return result;
},
result,
);
},
{},
);
};
const word2numConstructor = config => {
config = _.cloneDeep(config);
const separators = config.separators;
const hundredWords = config.hundredWords;
const oneDigits = arrayAwareInvert(config.oneDigits);
const doubleDigits = arrayAwareInvert(config.doubleDigits);
const tripleDigits = config.isEachHundredsWords
? arrayAwareInvert(config.tripleDigits)
: null;
const oneDigitsWords = Object.values(config.oneDigits).reduce(
(acc, cur) => acc.concat(cur),
[],
);
const doubleDigitsWords = Object.values(config.doubleDigits).reduce(
(acc, cur) => acc.concat(cur),
[],
);
const tripleDigitsWords = Object.values(config.tripleDigits).reduce(
(acc, cur) => acc.concat(cur),
[],
);
return stringNumber => {
separators.forEach(thousandSeparators =>
thousandSeparators.forEach(separator => {
// Remove spaces from separator's word in sentence to not split it with split(' ') later
const regex = new RegExp(`${separator}`, 'gi');
stringNumber = stringNumber.replace(
regex,
separator.split(' ').join(''),
);
// Remove spaces from separator's word
return separator.split(' ').join('');
}),
);
let thousandsWords = [];
if (config.noMoreMillionSeparator) {
thousandsWords = separators[0];
} else {
thousandsWords = _.flatten(separators);
}
stringNumber.trim();
// Apply on stringNumber the rules of locale key of config object
stringNumber = config.rulesOnSentence(stringNumber);
// Apply lowerCase on stringNumber and split words at each spaces
let words = stringNumber.toLowerCase().split(' ');
// Apply on each words the rules of locale key of config object
words = config.rulesOnWords(words);
const convertWordsByThousandToNumber = wordsByThousand => {
return wordsByThousand.map(values => {
if (values.length === 0) return 0;
let thousand = null;
let temp;
// Function to handle ten and unit if temp is hundred value
const handleTen_Unit = (i, coef = 100) => {
if (!values[i] || thousandsWords.includes(values[i])) {
thousand = temp * coef;
} else if (oneDigitsWords.includes(values[i])) {
thousand = temp * coef + Number(oneDigits[values[i]]);
} else if (doubleDigitsWords.includes(values[i])) {
if (!values[i + 1] || thousandsWords.includes(values[i + 1])) {
thousand = temp * coef + Number(doubleDigits[values[i]]);
} else if (oneDigitsWords.includes(values[i + 1])) {
thousand =
temp * coef +
Number(doubleDigits[values[i]]) +
Number(oneDigits[values[i + 1]]);
}
}
};
//Convert values
if (values.length === 1 && thousandsWords.includes(values[0])) {
thousand = 1;
} else if (hundredWords.includes(values[0])) {
temp = 1;
handleTen_Unit(1);
} else if (doubleDigitsWords.includes(values[0])) {
temp = Number(doubleDigits[values[0]]);
if (!values[1] || thousandsWords.includes(values[1])) {
thousand = temp;
} else if (oneDigitsWords.includes(values[1])) {
thousand = temp + Number(oneDigits[values[1]]);
}
} else if (oneDigitsWords.includes(values[0])) {
temp = Number(oneDigits[values[0]]);
if (!values[1] || thousandsWords.includes(values[1])) {
thousand = temp;
} else if (hundredWords.includes(values[1])) {
handleTen_Unit(2);
} else {
thousand = temp;
}
} else if (tripleDigitsWords.includes(values[0])) {
temp = Number(tripleDigits[values[0]]);
if (!values[1] || thousandsWords.includes(values[1])) {
thousand = temp;
} else {
handleTen_Unit(1, 1);
}
}
return thousand;
});
};
const buildWordsByThousand = words => {
const wordsByThousand = [];
separators
.slice(0) // Make a shallow copy
.reverse()
.map(thousandSeparators => {
const index = _.findIndex(words, word =>
thousandSeparators.includes(word),
);
index > -1
? wordsByThousand.push(words.splice(0, index + 1))
: wordsByThousand.push([]);
});
wordsByThousand.push(words); // Push the rest of words for hundred's part
return wordsByThousand;
};
let results = [];
let indexOfMillionWords = -1;
words.map((word, i) => {
if (separators[1].includes(word)) indexOfMillionWords = i;
});
if (config.noMoreMillionSeparator && indexOfMillionWords >= 0) {
const wordsAboveMillion = words.splice(indexOfMillionWords + 1);
const wordsOverMillion = words.splice(0, words.length - 1);
const wordsByThousandOverMillion = buildWordsByThousand(
wordsOverMillion,
).splice(1); // Splice is necessary to remove array of million thousand part
const wordsByThousandAboveMillion = buildWordsByThousand(
wordsAboveMillion,
).splice(1); // Splice is necessary to remove array of million thousand part
results = results.concat(
convertWordsByThousandToNumber(wordsByThousandOverMillion),
);
results = results.concat(
convertWordsByThousandToNumber(wordsByThousandAboveMillion),
);
} else {
const wordsByThousand = buildWordsByThousand(words);
results = results.concat(convertWordsByThousandToNumber(wordsByThousand));
}
results;
return results
.reverse()
.reduce(
(acc, cur, i) =>
acc === null || cur === null ? null : acc + cur * Math.pow(1000, i),
0,
);
};
};
module.exports = word2numConstructor;
And this is the result of const config = convertConfig['en'] :
{
rulesOnSentence: sentence => {
// Write your rules's function to apply on sentence before splitted it
return sentence
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[\-]/g, ' ')
.replace(/[^a-zA-Z ]/g, '');
},
rulesOnWords: words => {
// Write your rules's function to apply on words array (after to have splitted sentence)
return words.filter(word => word !== 'and');
},
spellingRules: (words, oneDigits, doubleDigits) => {
const unit = _.flatten(Object.values(oneDigits).slice(1));
const ten = _.flatten(
[20, 30, 40, 50, 60, 70, 80, 90].map(num => doubleDigits[num]),
);
const newWords = [...words];
let j = 0;
words.forEach((word, i) => {
// Hyphenate all compound numbers
if (ten.includes(word) && unit.includes(words[i + 1])) {
newWords.splice(i - j, 2, word + '-' + words[i + 1]);
j++;
}
});
return newWords;
},
noMoreMillionSeparator: false,
separators: [
['thousand', 'thousands'],
['million', 'millions'],
['billion', 'billions'],
],
hundredWords: ['hundred', 'hundreds'],
isOneBeforeOnlyOne: [true, true, true, true],
isEachHundredsWords: false,
tripleDigits: {},
oneDigits: {
0: ['zero'],
1: ['one'],
2: ['two'],
3: ['three'],
4: ['four'],
5: ['five'],
6: ['six'],
7: ['seven'],
8: ['eight'],
9: ['nine'],
},
doubleDigits: {
10: ['ten'],
11: ['eleven'],
12: ['twelve'],
13: ['thirteen'],
14: ['fourteen'],
15: ['fifteen'],
16: ['sixteen'],
17: ['seventeen'],
18: ['eighteen'],
19: ['nineteen'],
20: ['twenty'],
30: ['thirty'],
40: ['forty'],
50: ['fifty'],
60: ['sixty'],
70: ['seventy'],
80: ['eighty'],
90: ['ninety'],
},
}
Thank you in advance for your valuable feedback.

Can you try putting the loop inside the it block.
it('convert range of 1..4 to word and back to number', () => {
for (let i = 0; i <= 4; i++) {
expect(wordToNum(numToWord(i))).toBe(i);
}
});
Also remove Number() the iterator i is already a number.
If this doesn't work you might need to post the function body of wordToNum & numToWord.

The problem was that my functions are stateful and the results of the second test are cross-contaminated by the first test.
Thank you to Estus Flask (you can see more details in comments).
This test work good :
describe('ENGLISH CONFIG', () => {
let config, numToWord, wordToNum;
beforeEach(() => {
config = convertConfig['en'];
numToWord = numToWordConstructor(config);
wordToNum = wordToNumConstructor(config);
});
for (let i = 0; i <= 10000; i++) {
it(`Test for value `, () => {
expect(wordToNum(numToWord(i))).toBe(i);
});
}
});
I will work on my functions so that they do not modify the variables of closure (in constructor) 😅.
EDIT :
Better keep my first jest function (faster) and just put the variable words
let words = []; // line 17 of numToWords function
inside the returned function of constructor 😅 :
// [...]
const tripleDigits = config.isEachHundredsWords ? config.tripleDigits : null;
const separators = config.separators;
// Convertion function
const num2Words = number => {
let words = [];
let result = '';
// [...]

Alibaba interview: print a sentence with min spaces

I saw this interview question and gave a go. I got stuck. The interview question is:
Given a string
var s = "ilikealibaba";
and a dictionary
var d = ["i", "like", "ali", "liba", "baba", "alibaba"];
try to give the s with min space
The output may be
i like alibaba (2 spaces)
i like ali baba (3 spaces)
but pick no.1
I have some code, but got stuck in the printing.
If you have better way to do this question, let me know.
function isStartSub(part, s) {
var condi = s.startsWith(part);
return condi;
}
function getRestStr(part, s) {
var len = part.length;
var len1 = s.length;
var out = s.substring(len, len1);
return out;
}
function recPrint(arr) {
if(arr.length == 0) {
return '';
} else {
var str = arr.pop();
return str + recPrint(arr);
}
}
// NOTE: have trouble to print
// Or if you have better ways to do this interview question, please let me know
function myPrint(arr) {
return recPrint(arr);
}
function getMinArr(arr) {
var min = Number.MAX_SAFE_INTEGER;
var index = 0;
for(var i=0; i<arr.length; i++) {
var sub = arr[i];
if(sub.length < min) {
min = sub.length;
index = i;
} else {
}
}
return arr[index];
}
function rec(s, d, buf) {
// Base
if(s.length == 0) {
return;
} else {
}
for(var i=0; i<d.length; i++) {
var subBuf = [];
// baba
var part = d[i];
var condi = isStartSub(part, s);
if(condi) {
// rest string
var restStr = getRestStr(part, s);
rec(restStr, d, subBuf);
subBuf.unshift(part);
buf.unshift(subBuf);
} else {
}
} // end loop
}
function myfunc(s, d) {
var buf = [];
rec(s, d, buf);
console.log('-- test --');
console.dir(buf, {depth:null});
return myPrint(buf);
}
// Output will be
// 1. i like alibaba (with 2 spaces)
// 2. i like ali baba (with 3 spaces)
// we pick no.1, as it needs less spaces
var s = "ilikealibaba";
var d = ["i", "like", "ali", "liba", "baba", "alibaba"];
var out = myfunc(s, d);
console.log(out);
Basically, my output is, not sure how to print it....
[ [ 'i', [ 'like', [ 'alibaba' ], [ 'ali', [ 'baba' ] ] ] ] ]

This problem is best suited for a dynamic programming approach. The subproblem is, "what is the best way to create a prefix of s". Then, for a given prefix of s, we consider all words that match the end of the prefix, and choose the best one using the results from the earlier prefixes.
Here is an implementation:
var s = "ilikealibaba";
var arr = ["i", "like", "ali", "liba", "baba", "alibaba"];
var dp = []; // dp[i] is the optimal solution for s.substring(0, i)
dp.push("");
for (var i = 1; i <= s.length; i++) {
var best = null; // the best way so far for s.substring(0, i)
for (var j = 0; j < arr.length; j++) {
var word = arr[j];
// consider all words that appear at the end of the prefix
if (!s.substring(0, i).endsWith(word))
continue;
if (word.length == i) {
best = word; // using single word is optimal
break;
}
var prev = dp[i - word.length];
if (prev === null)
continue; // s.substring(i - word.length) can't be made at all
if (best === null || prev.length + word.length + 1 < best.length)
best = prev + " " + word;
}
dp.push(best);
}
console.log(dp[s.length]);

pkpnd's answer is along the right track. But word dictionaries tend to be quite large sets, and iterating over the entire dictionary at every character of the string is going to be inefficient. (Also, saving the entire sequence for each dp cell may consume a large amount of space.) Rather, we can frame the question, as we iterate over the string, as: given all the previous indexes of the string that had dictionary matches extending back (either to the start or to another match), which one is both a dictionary match when we include the current character, and has a smaller length in total. Generally:
f(i) = min(
f(j) + length(i - j) + (1 if j is after the start of the string)
)
for all j < i, where string[j] ended a dictionary match
and string[j+1..i] is in the dictionary
Since we only add another j when there is a match and a new match can only extend back to a previous match or to the start of the string, our data structure could be an array of tuples, (best index this match extends back to, total length up to here). We add another tuple if the current character can extend a dictionary match back to another record we already have. We can also optimize by exiting early from the backwards search once the matched substring would be greater than the longest word in the dictionary, and building the substring to compare against the dictionary as we iterate backwards.
JavaScript code:
function f(str, dict){
let m = [[-1, -1, -1]];
for (let i=0; i<str.length; i++){
let best = [null, null, Infinity];
let substr = '';
let _i = i;
for (let j=m.length-1; j>=0; j--){
let [idx, _j, _total] = m[j];
substr = str.substr(idx + 1, _i - idx) + substr;
_i = idx;
if (dict.has(substr)){
let total = _total + 1 + i - idx;
if (total < best[2])
best = [i, j, total];
}
}
if (best[0] !== null)
m.push(best);
}
return m;
}
var s = "ilikealibaba";
var d = new Set(["i", "like", "ali", "liba", "baba", "alibaba"]);
console.log(JSON.stringify(f(s,d)));
We can track back our result:
[[-1,-1,-1],[0,0,1],[4,1,6],[7,2,10],[11,2,14]]
[11, 2, 14] means a total length of 14,
where the previous index in m is 2 and the right index
of the substr is 11
=> follow it back to m[2] = [4, 1, 6]
this substr ended at index 4 (which means the
first was "alibaba"), and followed m[1]
=> [0, 0, 1], means this substr ended at index 1
so the previous one was "like"
And there you have it: "i like alibaba"

As you're asked to find a shortest answer probably Breadth-First Search would be a possible solution. Or you could look into A* Search.
Here is working example with A* (cause it's less bring to do than BFS :)), basically just copied from Wikipedia article. All the "turning string into a graph" magick happens in the getNeighbors function
https://jsfiddle.net/yLeps4v5/4/
var str = 'ilikealibaba'
var dictionary = ['i', 'like', 'ali', 'baba', 'alibaba']
var START = -1
var FINISH = str.length - 1
// Returns all the positions in the string that we can "jump" to from position i
function getNeighbors(i) {
const matchingWords = dictionary.filter(word => str.slice(i + 1, i + 1 + word.length) == word)
return matchingWords.map(word => i + word.length)
}
function aStar(start, goal) {
// The set of nodes already evaluated
const closedSet = {};
// The set of currently discovered nodes that are not evaluated yet.
// Initially, only the start node is known.
const openSet = [start];
// For each node, which node it can most efficiently be reached from.
// If a node can be reached from many nodes, cameFrom will eventually contain the
// most efficient previous step.
var cameFrom = {};
// For each node, the cost of getting from the start node to that node.
const gScore = dictionary.reduce((acc, word) => { acc[word] = Infinity; return acc }, {})
// The cost of going from start to start is zero.
gScore[start] = 0
while (openSet.length > 0) {
var current = openSet.shift()
if (current == goal) {
return reconstruct_path(cameFrom, current)
}
closedSet[current] = true;
getNeighbors(current).forEach(neighbor => {
if (closedSet[neighbor]) {
return // Ignore the neighbor which is already evaluated.
}
if (openSet.indexOf(neighbor) == -1) { // Discover a new node
openSet.push(neighbor)
}
// The distance from start to a neighbor
var tentative_gScore = gScore[current] + 1
if (tentative_gScore >= gScore[neighbor]) {
return // This is not a better path.
}
// This path is the best until now. Record it!
cameFrom[neighbor] = current
gScore[neighbor] = tentative_gScore
})
}
throw new Error('path not found')
}
function reconstruct_path(cameFrom, current) {
var answer = [];
while (cameFrom[current] || cameFrom[current] == 0) {
answer.push(str.slice(cameFrom[current] + 1, current + 1))
current = cameFrom[current];
}
return answer.reverse()
}
console.log(aStar(START, FINISH));

You could collect all possible combinations of the string by checking the starting string and render then the result.
If more than one result has the minimum length, all results are taken.
It might not work for extrema with string who just contains the same base string, like 'abcabc' and 'abc'. In this case I suggest to use the shortest string and update any part result by iterating for finding longer strings and replace if possible.
function getWords(string, array = []) {
words
.filter(w => string.startsWith(w))
.forEach(s => {
var rest = string.slice(s.length),
temp = array.concat(s);
if (rest) {
getWords(rest, temp);
} else {
result.push(temp);
}
});
}
var string = "ilikealibaba",
words = ["i", "like", "ali", "liba", "baba", "alibaba"],
result = [];
getWords(string);
console.log('all possible combinations:', result);
console.log('result:', result.reduce((r, a) => {
if (!r || r[0].length > a.length) {
return [a];
}
if (r[0].length === a.length) {
r.push(a);
}
return r;
}, undefined))

Use trie data structure
Construct a trie data structure based on the dictionary data
Search the sentence for all possible slices and build a solution tree
Deep traverse the solution tree and sort the final combinations
const sentence = 'ilikealibaba';
const words = ['i', 'like', 'ali', 'liba', 'baba', 'alibaba',];
class TrieNode {
constructor() { }
set(a) {
this[a] = this[a] || new TrieNode();
return this[a];
}
search(word, marks, depth = 1) {
word = Array.isArray(word) ? word : word.split('');
const a = word.shift();
if (this[a]) {
if (this[a]._) {
marks.push(depth);
}
this[a].search(word, marks, depth + 1);
} else {
return 0;
}
}
}
TrieNode.createTree = words => {
const root = new TrieNode();
words.forEach(word => {
let currentNode = root;
for (let i = 0; i < word.length; i++) {
currentNode = currentNode.set(word[i]);
}
currentNode.set('_');
});
return root;
};
const t = TrieNode.createTree(words);
function searchSentence(sentence) {
const marks = [];
t.search(sentence, marks);
const ret = {};
marks.map(mark => {
ret[mark] = searchSentence(sentence.slice(mark));
});
return ret;
}
const solutionTree = searchSentence(sentence);
function deepTraverse(tree, sentence, targetLen = sentence.length) {
const stack = [];
const sum = () => stack.reduce((acc, mark) => acc + mark, 0);
const ret = [];
(function traverse(tree) {
const keys = Object.keys(tree);
keys.forEach(key => {
stack.push(+key);
if (sum() === targetLen) {
const result = [];
let tempStr = sentence;
stack.forEach(mark => {
result.push(tempStr.slice(0, mark));
tempStr = tempStr.slice(mark);
});
ret.push(result);
}
if(tree[key]) {
traverse(tree[key]);
}
stack.pop();
});
})(tree);
return ret;
}
const solutions = deepTraverse(solutionTree, sentence);
solutions.sort((s1, s2) => s1.length - s2.length).forEach((s, i) => {
console.log(`${i + 1}. ${s.join(' ')} (${s.length - 1} spaces)`);
});
console.log('pick no.1');

Develop Reference

JavaScript is the programming language of the Web.

Javascript Text Statistic Algorithm Improvement - javascript

Related

Javascript, getting past values for an array of objects

Better approach for evaluating expressions

Dynamically update value of a key in Map in JavaScript

Testing a js function with Jest returns a bad value in for loop

Alibaba interview: print a sentence with min spaces

Categories

Resources