Find line by character position

Find line by character position - javascript

I have a string of around 4MB (4 million characters) and around 30.000 lines in a variable. Next I have the index of a character, lets say 3605506, what would be the quickest most efficient way to find on which line this character is? I need to do this hundreds of times after each other, so that's why it's relatively important it's efficient.

Pass the string and and index to the below function. It splits the string based on new line characters and checks if the count has passed the index value.
function getlineNumberofChar(data,index) {
var perLine = data.split('\n');
var total_length = 0;
for (i = 0; i < perLine.length; i++) {
total_length += perLine[i].length;
if (total_length >= index)
return i + 1;
}
}

Similar to brute_force but with the off-by-1 error fixed. Also returns column number.
const lines = code.split('\n')
function findLineColForByte(lines, index) {
let totalLength = 0
let lineStartPos = 0
for (let lineNo = 0; lineNo < lines.length; lineNo++) {
totalLength += lines[lineNo].length + 1 // Because we removed the '\n' during split.
if (index < totalLength) {
const colNo = index - lineStartPos
return [lineNo + 1, colNo]
}
lineStartPos = totalLength
}
}

You mentioned that
I need to do this hundreds of times after each other, so that's why it's relatively important it's efficient.
Most of these solutions require the computations to be done for each lookup, which means you are doing a lot of work over and over again.
To checkpoint some of these computations would (could) improve efficiency greatly.
Of course, first things first we need to split the lines up:
/**
* Returns a tuple (array with two elements) containing the split lines
* and whether or not the last character was a newLine
*
* #param {string} stringData The string to split
*
* #return {array} a tuple containing the lines
* and a boolean for if the last line has a newLine
*/
function splitLines( stringData ) {
var lines = stringData.split("\n");
if(stringData.slice(-1) === '\n') {
lines.pop(); // Remove last empty line
return [lines, true];
} else {
return [lines, false];
}
}
This will ensure that our last line is not an empty string, if this is arbitrary, you don't need to check for this.
Next up is computing the cumulative character count for each line, that is, after line x there have been n total characters.
/**
* Returns an array with the cumulative character count from the beginning,
* based on the line number
*
* #param {array} lineData The lines of the string
* #param {boolean} lastLineHasNewLineChar Whether or not the last line had a newLineChar
*
* #return {array} The cumulative character counts for each line
* (e.g.) Line 0 has 18 chars plus a newLine, or 19; Line 1 has 8 chars, so 28, etc, etc.
*/
function buildLineEndingPositions( lineData, lastLineHasNewLineChar = false ) {
var cumulativeSum = (sum => lineCharCount => sum += lineCharCount)(0); // Start sum at 0, keep adding the chars from each line.
var numLines = lineData.length;
var lineLengths = lineData.map( (line, index) => {
if(numLines - 1 === index && !lastLineHasNewLineChar) {
return line.length; // last line, last char was not a new line
} else {
return line.length + 1; // new line char was stripped
}
});
return lineLengths.map(cumulativeSum);
}
Finally, we can compute these once, and access them for any number of future lookups based on character position to determine the line (the first index to be less than or equal to the cumulative character count)
const testString = "There once was a guy from france\nHe really liked to dance\nUntil one day, his legs ran away\nIdk where I was going with this";
const [testLines, lastLineHadNewLineChar] = splitLines(testString);
const cumulativeCharCounts = buildLineEndingPositions(testLines, lastLineHadNewLineChar);
console.log(cumulativeCharCounts); //[33, 58, 91, 122]
By iterating through the cumulativeCharCounts we can now use the index to determine the line number with a simple boolean compare to the desired char position, until we reach the first cumulative position that is less than or equal to our desired position. The split and cumulative counts are figured out 1x and reused, thus less overhead for each of the hundreds of calls.

// Let this be your 4MB string.
var str = "This \n is a\n test\n string."
// Let this be the index of the character you are finding within the 4MB string.
var index = str.indexOf("test")
// Create substring from beginning to index of character.
var substr = str.substring(0, index)
// Count the number of new lines.
var numberOfLines = (function(){
try{
// Add 1 to final result to account for the first line.
return substr.match(new RegExp("\n", "g")).length + 1
} catch(e){
// Return 1 if none found because the character is found on the first line.
return 1
}})()

Related

Find the longest anagram with array javascript

I try to find the longest anagram in Javascript. For this, I have an array with 10 letters and a dictionary that contains every words.
I would like that the program test every combination possible.
We started from 10 (the array length of letters) and we check if it's an anagram
If not, we remove the char at the very end, and we check, if not, we shift the removed char by one to the left... When the entire combinations with 9 letters is tested, we test for 8, 7, 6, 5, 4, 3, 2 letters.
var wordFound = '' // The longest word found
var copyArr = [] // I don't manipulate the lettersChosen array, so I save a copy in copyArr
var savedWord = [] // A copy of copyArr but i'm not sure about this
var lengthLetters = 0 // The length of the numbers left
var lettersChosen = ['A', 'S', 'V', 'T', 'S', 'E', 'A', 'M', 'N'] //This the the array of letters
function isAnagram(stringA, stringB) {
stringA = stringA.toLowerCase().replace(/[\W_]+/g, "");
stringB = stringB.toLowerCase().replace(/[\W_]+/g, "");
const stringASorted = stringA.split("").sort().join("");
const stringBSorted = stringB.split("").sort().join("");
return stringASorted === stringBSorted;
}
function checkForEachWord(arr) {
strLetters = ''
for (i in arr)
strLetters = strLetters + arr[i]
for (var i in file)
if (isAnagram(strLetters, file[i])) {
wordFound = file[i]
return true
}
return false
}
function getOneOfTheLongestWord() {
lettersChosen.forEach(letter => {
copyArr.push(letter) // I copy the array
})
var index = 1 // The index of the letter to remove
var countLetter = 1 // How much letters I have to remove
var position = copyArr.length - index // The actual position to remove
var savedArray = [] // The copy of CopyArr but i'm not sure about that
var iteration = 0 // The total of combination possible
var test = checkForEachWord(copyArr) // I try with 10 letters
if (test == true)
return true // I found the longest word
while (test == false) {
copyArr.splice(position, 1) // I remove the char at current position
index++ // Change letter to remove
if (index > copyArr.length + 1) { // If I hit the first character, then restart from the end
index = 1
countLetter++ // Remove one more letter
}
console.log(copyArr + ' | ' + position)
position = copyArr.length - index // Get the position based on the actual size of the array letters
test = checkForEachWord(copyArr) // Test the anagram
copyArr = [] // Reset array
lettersChosen.forEach(letter => { // Recreate the array
copyArr.push(letter)
})
}
return true // Word found
}
getOneOfTheLongestWord()
My code is not optimal there is so many way to improve it.
Actually my output is good with 9 letters.
copyArr | position
A,S,V,T,S,E,A,M | 8
A,S,V,T,S,E,M,N | 6
A,S,V,T,S,A,M,N | 5
A,S,V,T,E,A,M,N | 4
A,S,V,S,E,A,M,N | 3
A,S,T,S,E,A,M,N | 2
A,V,T,S,E,A,M,N | 1
S,V,T,S,E,A,M,N | 0
But not with 8 letters, I don't see how I can use my countLetter to test all combinations...
Thank you very much.

Short answer, put the sorted versions of dictionary words into a trie, then do an A* search.
Longer answer because you probably haven't encountered those things.
A trie is a data structure which at each point gives you a lookup by character of the next level of the trie. You can just use a blank object as a trie. Here is some simple code to add a word to one.
function add_to_trie (trie, word) {
let letters = word.split('').sort();
for (let i in letters) {
let letter = letters[i];
if (! trie[letter]) {
trie[letter] = {};
}
trie = trie[letter];
}
trie['final'] = word;
}
An A* search simply means that we have a priority queue that gives us the best option to look at next. Rather than implement my own priority queue I will simply use an existing one at flatqueue. It returns the lowest priority possible. So I'll use as a priority one that puts the longest possible word first, and if there is a tie then goes with whatever word we are farthest along on. Here is an implementation.
import FlatQueue from "flatqueue";
function longest_word_from (trie, letters) {
let sorted_letters = letters.sort();
let queue = new FlatQueue();
// Entries will be [position, current_length, this_trie]
// We prioritize the longest word length first, then the
// number of characters. Since we get the minimum first,
// we make priorities negative numbers.
queue.push([0, 0, trie], - (letters.length ** 2));
while (0 < queue.length) {
let entry = queue.pop();
let position = entry[0];
let word_length = entry[1];
let this_trie = entry[2];
if (position == letters.length) {
if ('final' in this_trie) {
return this_trie['final'];
}
}
else {
if (letters[position] in this_trie) {
queue.push(
[
position + 1, // Advance the position
word_length + 1, // We added a letter
this_trie[letters[position]] // And the sub-trie after that letter
],
- letters.length * (
letters.length + position - word_length
) - word_length - 1
);
}
queue.push(
[
position + 1, // Advance the position
word_length, // We didn't add a a letter
this_trie // And stayed at the same position.
],
- letters.length * (
letters.length + position - word_length - 1
) - word_length
);
}
}
return null;
}
If the import doesn't work for you, you can simply replace that line with the code from index.js. Simply remove the leading export default and the rest will work.
And with that, here is sample code that demonstrates it in action.
let file = ['foo', 'bar', 'baz', 'floop'];
let letters = 'fleaopo'.split('')
let this_trie = {};
for (var i in file) {
add_to_trie(this_trie, file[i]);
}
console.log(longest_word_from(this_trie, letters));
If you have a long dictionary, loading the dictionary into the trie is most of your time. But once you've done that you can call it over and over again with different letters, and get answers quite quickly.

Is there a way to avoid number to string conversion & nested loops for performance?

I just took a coding test online and this one question really bothered me. My solution was correct but was rejected for being unoptimized. The question is as following:
Write a function combineTheGivenNumber taking two arguments:
numArray: number[]
num: a number
The function should check all the concatenation pairs that can result in making a number equal to num and return their count.
E.g. if numArray = [1, 212, 12, 12] & num = 1212 then we will have return value of 3 from combineTheGivenNumber
The pairs are as following:
numArray[0]+numArray[1]
numArray[2]+numArray[3]
numArray[3]+numArray[2]
The function I wrote for this purpose is as following:
function combineTheGivenNumber(numArray, num) {
//convert all numbers to strings for easy concatenation
numArray = numArray.map(e => e+'');
//also convert the `hay` to string for easy comparison
num = num+'';
let pairCounts = 0;
// itereate over the array to get pairs
numArray.forEach((e,i) => {
numArray.forEach((f,j) => {
if(i!==j && num === (e+f)) {
pairCounts++;
}
});
});
return pairCounts;
}
console.log('Test 1: ', combineTheGivenNumber([1,212,12,12],1212));
console.log('Test 2: ', combineTheGivenNumber([4,21,42,1],421));
From my experience, I know conversion of number to string is slow in JS, but I am not sure whether my approach is wrong/lack of knowledge or does the tester is ignorant of this fact. Can anyone suggest further optimization of the code snipped?
Elimination of string to number to string will be a significant speed boost but I am not sure how to check for concatenated numbers otherwise.

Elimination of string to number to string will be a significant speed boost
No, it won't.
Firstly, you're not converting strings to numbers anywhere, but more importantly the exercise asks for concatenation so working with strings is exactly what you should do. No idea why they're even passing numbers. You're doing fine already by doing the conversion only once for each number input, not every time your form a pair. And last but not least, avoiding the conversion will not be a significant improvement.
To get a significant improvement, you should use a better algorithm. #derpirscher is correct in his comment: "[It's] the nested loop checking every possible combination which hits the time limit. For instance for your example, when the outer loop points at 212 you don't need to do any checks, because regardless, whatever you concatenate to 212, it can never result in 1212".
So use
let pairCounts = 0;
numArray.forEach((e,i) => {
if (num.startsWith(e)) {
//^^^^^^^^^^^^^^^^^^^^^^
numArray.forEach((f,j) => {
if (i !== j && num === e+f) {
pairCounts++;
}
});
}
});
You might do the same with suffixes, but it becomes more complicated to rule out concatenation to oneself there.
Optimising further, you can even achieve a linear complexity solution by putting the strings in a lookup structure, then when finding a viable prefix just checking whether the missing part is an available suffix:
function combineTheGivenNumber(numArray, num) {
const strings = new Map();
for (const num of numArray) {
const str = String(num);
strings.set(str, 1 + (strings.get(str) ?? 0));
}
const whole = String(num);
let pairCounts = 0;
for (const [prefix, pCount] of strings) {
if (!whole.startsWith(prefix))
continue;
const suffix = whole.slice(prefix.length);
if (strings.has(suffix)) {
let sCount = strings.get(suffix);
if (suffix == prefix) sCount--; // no self-concatenation
pairCounts += pCount*sCount;
}
}
return pairCounts;
}
(the proper handling of duplicates is a bit difficile)

I like your approach of going to strings early. I can suggest a couple of simple optimizations.
You only need the numbers that are valid "first parts" and those that are valid "second parts"
You can use the javascript .startsWith and .endsWith to test for those conditions. All other strings can be thrown away.
The lengths of the strings must add up to the length of the desired answer
Suppose your target string is 8 digits long. If you have 2 valid 3-digit "first parts", then you only need to know how many valid 5-digit "second parts" you have. Suppose you have 9 of them. Those first parts can only combine with those second parts, and give you 2 * 9 = 18 valid pairs.
You don't actually need to keep the strings!
It struck me that if you know you have 2 valid 3-digit "first parts", you don't need to keep those actual strings. Knowing that they are valid 2-digit first parts is all you need to know.
So let's build an array containing:
How many valid 1-digit first parts do we have?,
How many valid 2-digit first parts do we have?,
How many valid 3-digit first parts do we have?,
etc.
And similarly an array containing the number of valid 1-digit second parts, etc.
X first parts and Y second parts can be combined in X * Y ways
Except if the parts are the same length, in which case we are reusing the same list, and so it is just X * (Y-1).
So not only do we not need to keep the strings, but we only need to do the multiplication of the appropriate elements of the arrays.
5 1-char first parts & 7 3-char second parts = 5 * 7 = 35 pairs
6 2-char first part & 4 2-char second parts = 6 * (4-1) = 18 pairs
etc
So this becomes extremely easy. One pass over the strings, tallying the "first part" and "second part" matches of each length. This can be done with an if and a ++ of the relevant array element.
Then one pass over the lengths, which will be very quick as the array of lengths will be very much shorter than the array of actual strings.
function combineTheGivenNumber(numArray, num) {
const sElements = numArray.map(e => "" + e);
const sTarget = "" + num;
const targetLength = sTarget.length
const startsByLen = (new Array(targetLength)).fill(0);
const endsByLen = (new Array(targetLength)).fill(0);
sElements.forEach(sElement => {
if (sTarget.startsWith(sElement)) {
startsByLen[sElement.length]++
}
if (sTarget.endsWith(sElement)) {
endsByLen[sElement.length]++
}
})
// We can now throw away the strings. We have two separate arrays:
// startsByLen[1] is the count of strings (without attempting to remove duplicates) which are the first character of the required answer
// startsByLen[2] similarly the count of strings which are the first 2 characters of the required answer
// etc.
// and endsByLen[1] is the count of strings which are the last character ...
// and endsByLen[2] is the count of strings which are the last 2 characters, etc.
let pairCounts = 0;
for (let firstElementLength = 1; firstElementLength < targetLength; firstElementLength++) {
const secondElementLength = targetLength - firstElementLength;
if (firstElementLength === secondElementLength) {
pairCounts += startsByLen[firstElementLength] * (endsByLen[secondElementLength] - 1)
} else {
pairCounts += startsByLen[firstElementLength] * endsByLen[secondElementLength]
}
}
return pairCounts;
}
console.log('Test 1: ', combineTheGivenNumber([1, 212, 12, 12], 1212));
console.log('Test 2: ', combineTheGivenNumber([4, 21, 42, 1], 421));

Depending on a setup, the integer slicing can be marginally faster
Although in the end it falls short
Also, when tested on higher N values, the previous answer exploded in jsfiddle. Possibly a memory error.
As far as I have tested with both random and hand-crafted values, my solution holds. It is based on an observation, that if X, Y concantenated == Z, then following must be true:
Z - Y == X * 10^(floor(log10(Y)) + 1)
an example of this:
1212 - 12 = 1200
12 * 10^(floor((log10(12)) + 1) = 12 * 10^(1+1) = 12 * 100 = 1200
Now in theory, this should be faster then manipulating strings. And in many other languages it most likely would be. However in Javascript as I just learned, the situation is a bit more complicated. Javascript does some weird things with casting that I haven't figured out yet. In short - when I tried storing the numbers(and their counts) in a map, the code got significantly slower making any possible gains from this logarithm shenanigans evaporate. Furthermore, storing them in a custom-crafted data structure isn't guaranteed to be faster since you have to build it etc. Also it would be quite a lot of work.
As it stands this log comparison is ~ 8 times faster in a case without(or with just a few) matches since the quadratic factor is yet to kick in. As long as the possible postfix count isn't too high, it will outperform the linear solution. Unfortunately it is still quadratic in nature with the breaking point depending on a total number of strings as well as their length.
So if you are searching for a needle in a haystack - for example you are looking for a few pairs in a huge heap of numbers, this can help. In the other case of searching for many matches, this won't help. Similarly, if the input array was sorted, you could use binary search to push the breaking point further up.
In the end, unless you manage to figure out how to store ints in a map(or some custom implementation of it) in a way that doesn't completely kill the performance, the linear solution of the previous answer will be faster. It can still be useful even with the performance hit if your computation is going to be memory heavy. Storing numbers takes less space then storing strings.
var log10 = Math.log(10)
function log10floored(num) {
return Math.floor(Math.log(num) / log10)
}
function combineTheGivenNumber(numArray, num) {
count = 0
for (var i=0; i!=numArray.length; i++) {
let portion = num - numArray[i]
let removedPart = Math.pow(10, log10floored(numArray[i]))
if (portion % (removedPart * 10) == 0) {
for (var j=0; j!=numArray.length; j++) {
if (j != i && portion / (removedPart * 10) == numArray[j] ) {
count += 1
}
}
}
}
return count
}
//The previous solution, that I used for timing, comparison and check purposes
function combineTheGivenNumber2(numArray, num) {
const strings = new Map();
for (const num of numArray) {
const str = String(num);
strings.set(str, 1 + (strings.get(str) ?? 0));
}
const whole = String(num);
let pairCounts = 0;
for (const [prefix, pCount] of strings) {
if (!whole.startsWith(prefix))
continue;
const suffix = whole.slice(prefix.length);
if (strings.has(suffix)) {
let sCount = strings.get(suffix);
if (suffix == prefix) sCount--; // no self-concatenation
pairCounts += pCount*sCount;
}
}
return pairCounts;
}
var myArray = []
for (let i =0; i!= 10000000; i++) {
myArray.push(Math.floor(Math.random() * 1000000))
}
var a = new Date()
t1 = a.getTime()
console.log('Test 1: ', combineTheGivenNumber(myArray,15285656));
var b = new Date()
t2 = b.getTime()
console.log('Test 2: ', combineTheGivenNumber2(myArray,15285656));
var c = new Date()
t3 = c.getTime()
console.log('Test1 time: ', t2 - t1)
console.log('test2 time: ', t3 - t2)
Small update
As long as you are willing to take a performance hit with the setup and settle for the ~2 times performance, using a simple "hashing" table can help.(Hashing tables are nice and tidy, this is a simple modulo lookup table. The principle is similar though.)
Technically this isn't linear, practicaly it is enough for the most cases - unless you are extremely unlucky and all your numbers fall in the same bucket.
function combineTheGivenNumber(numArray, num) {
count = 0
let size = 1000000
numTable = new Array(size)
for (var i=0; i!=numArray.length; i++) {
let idx = numArray[i] % size
if (numTable[idx] == undefined) {
numTable[idx] = [numArray[i]]
} else {
numTable[idx].push(numArray[i])
}
}
for (var i=0; i!=numArray.length; i++) {
let portion = num - numArray[i]
let removedPart = Math.pow(10, log10floored(numArray[i]))
if (portion % (removedPart * 10) == 0) {
if (numTable[portion / (removedPart * 10) % size] != undefined) {
let a = numTable[portion / (removedPart * 10) % size]
for (var j=0; j!=a.length; j++) {
if (j != i && portion / (removedPart * 10) == a[j] ) {
count += 1
}
}
}
}
}
return count
}

Here's a simplified, and partially optimised approach with 2 loops:
// let's optimise 'combineTheGivenNumber', where
// a=array of numbers AND n=number to match
const ctgn = (a, n) => {
// convert our given number to a string using `toString` for clarity
// this isn't entirely necessary but means we can use strict equality later
const ns = n.toString();
// reduce is an efficient mechanism to return a value based on an array, giving us
// _=[accumulator], na=[array number] and i=[index]
return a.reduce((_, na, i) => {
// convert our 'array number' to an 'array number string' for later concatenation
const nas = na.toString();
// iterate back over our array of numbers ... we're using an optimised/reverse loop
for (let ii = a.length - 1; ii >= 0; ii--) {
// skip the current array number
if (i === ii) continue;
// string + number === string, which lets us strictly compare our 'number to match'
// if there's a match we increment the accumulator
if (a[ii] + nas === ns) ++_;
}
// we're done
return _;
}, 0);
}

Given this hash function, an expected output, and the length of the input string, how do I find the input string that returns the given result?

I have this hash function below.
I know that for an input string of length 8 I get a hash with the value of 16530092119764772
The input string can only consist of the characters "abcdefghijklmnop"
What is the best approach to find the input string?
Is there a way to break down the problem mathematically without relying on a brute-force approach to find the string?
Would a recursive solution overflow the stack?
function hash(str) {
let g = 8;
let charset = "abcdefghijklmnop";
for(let i = 0; i < str.length; i++) {
g = (g * 82 + charset.indexOf(str[i]));
}
return g;
}
As an example for the string "agile" it hashes to 29662550362

That’s not even really a hash, because charset doesn’t have 82 characters in it. It’s more like parsing a string as a base-82 number where you can only use the first 16 symbols. It’d be completely reversible if it didn’t use floating-point numbers, which are imprecise for integers that big. In case you’re not familiar with why, the simplified version is that the operation inside the loop:
g * 82 + d
gives a different result for every possible value of g and d as long as d is less than 82, because there’s enough space between g * 82 and (g + 1) * 82 to fit 82 different ds (from 0 to 81). Each different result is reversible back to g and d by dividing by 82; the whole value is g and the remainder is d. When every operation inside the loop is reversible, you can reverse the whole thing.
So, like you might convert a number to decimal manually with a loop that divides out one digit at a time, you can convert this imprecise number into base 82:
const getDigits = (value, base) => {
const result = [];
while (value) {
result.push(value % base);
value /= base;
}
return result.reverse();
};
const getLetter = index =>
String.fromCharCode(97 + index);
const getPreimage = value =>
getDigits(value, 82n)
.map(Number)
.map(getLetter)
.join('');
console.log(getPreimage(29662550362n));
console.log(getPreimage(16530092119764772n));
The results start with “i” because g starts at 8 instead of 0. The second number is also big enough to not be unique (in contrast to agile’s “hash”, which can be represented exactly by a JavaScript number), but if you were just trying to find any preimage, it’s good enough.
function hash(str) {
let g = 8;
let charset = "abcdefghijklmnop";
for(let i = 0; i < str.length; i++) {
g = (g * 82 + charset.indexOf(str[i]));
}
return g;
}
for (const s of ['hijackec', 'hijacked', 'hijackee', 'hijackef', 'hijackeg']) {
console.log(s, hash(s) === 16530092119764772);
}

You could make a recursive function that starts from 8, iterates over the charset indices and stops (returns) whenever the current value gets over the passed hash.
Check the comments below for more details:
const charset = 'abcdefghijklmnop';
function bruteforce(hash, base = 8, result = {value: ''}) {
// Always multiply the previous value by 82
base *= 82;
for (let i = 0; i < charset.length; i++) {
// Add the char index to the value
value = base + i;
// If we found the hash, append the current char and return
if (value === hash) {
result.value += charset[i];
return base === 656 ? result.value : value;
}
// If we went past the hash, return null to mark this iteration as failed
if (value > hash) {
return null;
}
// Otherwise, attempt next level starting from current value
value = bruteforce(hash, value, result);
// If we found the hash from there, prepend the current char and return
if (value === hash) {
result.value = charset[i] + result.value;
return base === 656 ? result.value : value;
}
}
// We tried everything, no match found :(
return null;
}
console.log(bruteforce(29662550362));

JavaScript: Subtracting ranges of numbers

I'm trying to write a JS function which has two parameters, include and exclude, each an array of objects {X, Y} which represents a range of numbers from X to Y, both included.
The output is the subtraction of all the ranges in include with all the ranges in exclude.
For example:
include = [ {1,7}, {9,10}, {12,14} ]
exclude = [ {4,5}, {11,20} ]
output = [ {1,3}, {6,7}, {9,10} ]
{4,5} broke {1,7} into two range objects: {1,3} and {6,7}
{9,10} was not affected
{12,14} was removed entirely

You can use sweep line algorithm. For every number save what it represents (start and end, inclusion and exclusion ). Then put all the number in an array and sort it. Then iteratively remove elements from the array and perform the appropriate operation.
include_list = [[1,7]]
exclude_list = [[4,5]]
(1,start,inclusion),(4,start,exclusion),(5,end,exclusion),(7,end,inclusion)
include = 0
exclude = 0
cur_element = (1,start,inclusion) -> include = 1, has_open_range = 1, range_start = 1 // we start a new range starting at 1
cur_element = (4,start,exclusion) -> exclude = 1, has_open_range = 0, result.append ( [1,4] ) // we close the open range and add range to result
cur_element = (5,end,exclusion) -> exclude = 0, has_open_range = 1, range_start = 5 // because include was 1 and exclude become 0 we must create a new range starting at 5
cur_element = (7,end,inclusion) -> include = 0, has_open_range = 0, result.append([5,7]) // include became zero so we must close the current open range so we add [5,7] to result
maintain variables include and exclude increment them with start of the respective elements and decrement them upon receiving end elements. According to the value of include and exclude you can determine wether you should start a new range, close the open range, or do nothing at all.
This algorithm runs in linear time O(n).

The rule for integer set arithmetic for subtraction of two sets X,Y is
X − Y := {x − y | x ∈ X, y ∈ Y }
but that's not what you want, as it seems.
You can assume ordered sets in your example which allows you to set every occurrence of x==y as an arbitrary value in a JavaScript array and use that to split there. But you don't need that.
The set difference {1...7}\{4...5} gets expanded to {1,2,3,4,5,6,7}\{4,5}. As you can easily see, a subtraction with the rule of set arithmetic would leave {1,2,3,0,0,6,7} and with normal set subtraction (symbol \) you get {1,2,3,6,7}.
The set difference {12...14}\{11...20} gets expanded to {12,13,14}\{11,12,13,14,15,16,17,18,19,20}; the set arithm. difference is {-11,0,0,0,-15,-16,...,-20} but the normal set-subtraction leaves the empty set {}.
Handling operations with the empty set is equivalent to normal arithmetic {x}-{}={x} and {}-{x} = {-x} for arithmetic set rules and {x}\{}={x},{}\{x}= {} with normal rules
So what you have to use here, according to your example, are the normal set rules. There is no need to expand the sets, they can be assumed to be dense.
You can use relative differences(you may call them distances).
With {1...7}\{4...5} the first start is small then the second start and the first end is greater the the second end, which resulted in two different sets.
With {12...14}\{11...20} the first start is greater than the second start and the first end is lower then the second end which resulted in an empty set.
The third example makes use of the empty-set rule.
Do you need an example snippet?

Here's an answer that works with fractions and that isnt just brute forcing. I've added comments to explain how it works. It may seem big the the premise is simple:
create a method p1_excluding_p2 that accepts points p1 and p2 and returns of an array of points that exist after doing p1 - p2
create a method points_excluding_p2 which performs the EXACT same operation as above, but this time allow us to pass an array of points, and return an array of points that exist after subtracting p2 from all the points in our array, so now we have (points) - p2
create a method p1_excluding_all which takes the opposite input as above. This time, accept one point p1 and many exclusion points, and return the array of points remaining after subtracting all the exclusion points. This is actually very easy to create now. We simply start off with [p1] and the first exclusion point (exclusion1) and feed this into points_excluding_p2. We take the array that comes back (which will be p1 - exclusion1) and feed this into points_excluding_p2 only this time with exclusion2. We continue this process until we've excluded every exclusion point, and we're left with an array of p1 - (all exclusion points)
now that we have the power to perform p1 - (all exclusion points), its just a matter of looping over all our points and calling p1_excluding_all, and we're left with an array of every point subtract every exclusion point. We run our results through remove_duplicates incase we have any duplicate entries, and that's about it.
The code:
var include = [ [1,7], [9,10], [12,14] ]
var exclude = [ [4,5], [11,20] ]
/* This method is just a small helper method that takes an array
* and returns a new array with duplicates removed
*/
function remove_duplicates(arr) {
var lookup = {};
var results = [];
for(var i = 0; i < arr.length; i++) {
var el = arr[i];
var key = el.toString();
if(lookup[key]) continue;
lookup[key] = 1;
results.push(el);
}
return results;
}
/* This method takes 2 points p1 and p2 and returns an array of
* points with the range of p2 removed, i.e. p1 = [1,7]
* p2 = [4,5] returned = [[1,3],[6,7]]
*/
function p1_excluding_p2(p1, p2) {
if(p1[1] < p2[0]) return [p1]; // line p1 finishes before the exclusion line p2
if(p1[0] > p2[1]) return [p1]; // line p1 starts after exclusion line p1
var lines = [];
// calculate p1 before p2 starts
var line1 = [ p1[0], Math.min(p1[1], p2[0]-1) ];
if(line1[0] < line1[1]) lines.push(line1);
// calculate p1 after p2 ends
var line2 = [ p2[1]+1, p1[1] ];
if(line2[0] < line2[1]) lines.push(line2);
// these contain the lines we calculated above
return lines;
}
/* this performs the exact same operation as above, only it allows you to pass
* multiple points (but still just 1 exclusion point) and returns results
* in an identical format as above, i.e. points = [[1,7],[0,1]]
* p2 = [4,5] returned = [[0,1],[1,3],[6,7]]
*/
function points_excluding_p2(points, p2) {
var results = [];
for(var i = 0; i < points.length; i++) {
var lines = p1_excluding_p2(points[i], p2);
results.push.apply(results, lines); // append the array lines to the array results
}
return results;
}
/* this method performs the same operation only this time it takes one point
* and multiple exclusion points and returns an array of the results.
* this is the important method of: given 1 point and many
* exclusion points, return the remaining new ranges
*/
function p1_excluding_all(p1, excluded_pts) {
var checking = [p1];
var points_leftover = [];
for(var i = 0; i < exclude.length; i++) {
checking = points_excluding_p2(checking, exclude[i]);
}
return remove_duplicates(checking);
}
/* now that we have a method that we can feed a point and an array of exclusion
* points, its just a simple matter of throwing all our points into this
* method, then at the end remove duplicate results for good measure
*/
var results = [];
for(var i = 0; i < include.length; i++) {
var lines = p1_excluding_all(include[i], exclude);
results.push.apply(results, lines); // append the array lines to the array results
}
results = remove_duplicates(results);
console.log(results);
which returns:
[[1,3],[6,7],[9,10]]

NOTE: include = [ {1,7}, {9,10}, {12,14} ] is not valid javascript, so I assumed you as passing in arrays of arrays instead such as:
include = [ [1,7], [9,10], [12,14] ]
Brute force method (a solution, may not be the most eloquent):
function solve_range(include, exclude) {
numbers = [];
include.forEach(function (range) {
for (i = range[0]; i <= range[1]; i++) {
numbers[i] = true;
}
});
exclude.forEach(function (range) {
for (i = range[0]; i <= range[1]; i++) {
numbers[i] = false;
}
});
contiguous_start = null;
results = [];
for (i = 0; i < numbers.length; i++) {
if (numbers[i] === true) {
if (contiguous_start == null) {
contiguous_start = i;
}
} else {
if (contiguous_start !== null) {
results[results.length] = [contiguous_start, i - 1];
}
contiguous_start = null;
}
}
return results;
}
var include = [
[1, 7],
[9, 10],
[12, 14]
];
var exclude = [
[4, 5],
[11, 20]
];
var output = solve_range(include, exclude);
https://jsfiddle.net/dwyk631d/2/

Here's a working solution that handles the 4 possible overlap scenarios for an exclusion range.
var include = [{from:1, to: 7},{from: 9, to: 10},{from: 12, to: 14}];
var exclude = [{from:4, to: 5}, {from: 11, to: 20}];
//result: {1,3}, {6,7}, {9,10}
var resultList = [];
for (var i=0;i<include.length;i++){
var inc = include[i];
var overlap = false;
for (var x=0;x<exclude.length;x++ ){
var exc = exclude[x];
//4 scenarios to handle
if (exc.from >= inc.from && exc.to <= inc.to){
//include swallows exclude - break in two
resultList.push({from: inc.from, to: exc.from - 1});
resultList.push({from: exc.to + 1, to: inc.to});
overlap = true;
}else if (exc.from <= inc.from && exc.to >= inc.to){
//exclude swallows include - exclude entire range
overlap = true;
break;
}else if (exc.from <= inc.from && exc.to <= inc.to && exc.to >= inc.from){
//exclusion overlaps on left
resultList.push({from: exc.to, to: inc.to});
overlap = true;
}else if (exc.from >= inc.from && exc.to >= inc.to && exc.from <= inc.to){
//exclusion overlaps on right
resultList.push({from: inc.from, to: exc.from - 1});
overlap = true;
}
}
if (!overlap){
//no exclusion ranges touch the inclusion range
resultList.push(inc);
}
}
console.log(resultList);

Perhaps we can make it slightly more efficient by merging labeled intervals into one sorted list:
include = [ {1,7}, {9,10}, {12,14} ]
exclude = [ {4,5}, {11,20} ]
merged = [ [1,7,0], [4,5,1], [9,10,0], [11,20,1], [12,14,0] ];
Then, traverse the list and for any excluded interval, update any surrounding affected intervals.

try this
function excludeRange(data, exclude) {
data = [...data] // i don't want inplace edit
exclude.forEach(e=>{
data.forEach((d,di)=>{
// check intersect
if (d[0] <= e[1] && e[0] <= d[1]) {
// split into two range: [Ax, Bx-1] and [By+1, Ay]
var ranges = [
[d[0], e[0]-1],
[e[1]+1, d[1]],
]
// keep only valid range where x <= y
ranges = ranges.filter(e=>e[0]<=e[1])
// replace existing range with new ranges
data.splice(di, 1, ...ranges)
}
})
})
return data
}
I try to implement this short and simple as possible
edit: add explain and update more readable code
the algorithm with A-B
if intersect -> we split into two range: [Ax, Bx-1] and [By+1, Ay]
then we filter out invalid range (where x > y)
else: keep A

get the number of n digit in a 2+ digit number

For example, getting "5" in "256". The closest I've gotten is Math.floor(256/10)), but that'll still return the numbers in front. Is there any simple way to get what I want or would I have to make a big function for it? Also, for clarity: "n digit" would be defined. Example, getDigit(2,256) would return 5 (second digit)

Math.floor((256 / 10) % 10)
or more generally:
Math.floor(N / (Math.pow(10, n)) % 10)
where N is the number to be extracted, and n is the position of the digit. Note that this counts from 0 starting from the right (i.e., the least significant digit = 0), and doesn't account for invalid values of n.

how about
(12345 + "")[3]
or
(12345 + "").charAt(3)
to count from the other end
[length of string - digit you want] so if you want the 2 it's:
5 - 4 = 1
(12345 + "")[1] = "2"
function getNumber (var num, var pos){
var sNum = num + "";
if(pos > sNum.length || pos <= 0){return "";}
return sNum[sNum.length - pos];
}

First, you need to cast the number to a string, then you can access the character as normal:
var num = 256;
var char = num.toString()[1]; // get the 2nd (0-based index) character from the stringified version of num
Edit: Note also that, if you want to access it without setting the number as a variable first, you need a double dot .. to access the function:
var char = 256..toString()[1];
The first dot tells the interpreter "this is a number"; the second accesses the function.

Convert to string and substring(2,2)?

This should do it:
function getDigit ( position, number ) {
number = number + ""; // convert number to string
return number.substr ( position + 1, 1 ); // I'm adding 1 to position, since 0 is the position of the first character and so on
}

Try this, last line is key:
var number = 12345;
var n = 2;
var nDigit = parseInt((number + '').substr(1,1));

If you want to try to do everything mathematically:
var number = 256;
var digitNum = 2;
var digit = ((int)(number/(Math.pow(10,digitNum-1))%10;
This code counts the digit from the right starting with 1, not 0. If you wish to change it to start at 0, delete the -1 portion in the call.
If you wish to count from the left, it gets more complicated and similar to other solutions:
var number = 256;
var digitNum = 2;
var digit = ((int)(number/(Math.pow(10,number.tostring().length-digitNum))%10;
edit:
Also, this assumes you want base 10 for your number system, but both of those will work with other bases. All you need to do is change instances of 10 in the final line of code to the number representing the base for the number system you'd like to use. (ie. hexadecimal =16, binary = 2)

// You do not say if you allow decimal fractions or negative numbers-
// the strings of those need adjusting.
Number.prototype.nthDigit= function(n){
var s= String(this).replace(/\D+/g,'');
if(s.length<=n) return null;
return Number(s.charAt(n))
}

use variable "count" to control loop
var count = 1; //starting 1
for(i=0; i<100; i++){
console.log(count);
if(i%10 == 0) count++;
}
output will fill
1
2
3
4
5
6
7
8
9

Develop Reference

JavaScript is the programming language of the Web.

Find line by character position - javascript

Related

Find the longest anagram with array javascript

Is there a way to avoid number to string conversion & nested loops for performance?

Given this hash function, an expected output, and the length of the input string, how do I find the input string that returns the given result?

JavaScript: Subtracting ranges of numbers

get the number of n digit in a 2+ digit number

Categories

Resources