Lets say I have the following array of strings, var = array_of_strings["abc","abcd"]
My goal is to run a function and have this return roughly 75% (0.75). Implying that the results are roughly 75% in common. Roughly being defined as within a certain error range, let us say 5% or some settable number.
I'm currently using the the Levenshtein algorithm to compute differences in the strings, however, this is extremely slow and taxing on the CPU in my situation as the strings I'm using are thousands and thousands of lines long.
Levenshtein gives me what the differences are; and while useful in certain situations, my particular use case is simply looking to see what percentage the strings are roughly different from each other and not what each difference actually is necessarily.
The current levenshtein algorithm I'm using is below (which I borrowed from another answer here on stackoverflow). It will return how many differences it found which I can then use to calculate a percentage difference, but it's very slow! Sometimes taking a couple of seconds to run and freezes up the computer as well.
async function levenshtein(s, t) {
return new Promise((resolve, reject) => {
console.log("levenshtein active");
if (s === t) {
return 0;
}
var n = s.length, m = t.length;
if (n === 0 || m === 0) {
return n + m;
}
var x = 0, y, a, b, c, d, g, h, k;
var p = new Array(n);
for (y = 0; y < n;) {
p[y] = ++y;
}
for (; (x + 3) < m; x += 4) {
var e1 = t.charCodeAt(x);
var e2 = t.charCodeAt(x + 1);
var e3 = t.charCodeAt(x + 2);
var e4 = t.charCodeAt(x + 3);
c = x;
b = x + 1;
d = x + 2;
g = x + 3;
h = x + 4;
for (y = 0; y < n; y++) {
k = s.charCodeAt(y);
a = p[y];
if (a < c || b < c) {
c = (a > b ? b + 1 : a + 1);
}
else {
if (e1 !== k) {
c++;
}
}
if (c < b || d < b) {
b = (c > d ? d + 1 : c + 1);
}
else {
if (e2 !== k) {
b++;
}
}
if (b < d || g < d) {
d = (b > g ? g + 1 : b + 1);
}
else {
if (e3 !== k) {
d++;
}
}
if (d < g || h < g) {
g = (d > h ? h + 1 : d + 1);
}
else {
if (e4 !== k) {
g++;
}
}
p[y] = h = g;
g = d;
d = b;
b = c;
c = a;
}
}
for (; x < m;) {
var e = t.charCodeAt(x);
c = x;
d = ++x;
for (y = 0; y < n; y++) {
a = p[y];
if (a < c || d < c) {
d = (a > d ? d + 1 : a + 1);
}
else {
if (e !== s.charCodeAt(y)) {
d = c + 1;
}
else {
d = c;
}
}
p[y] = d;
c = a;
}
h = d;
}
resolve(h);
})
}
My question is, is there a way to calculate the difference faster when large string sets are used? In my case accuracy is not too important just as long as a rough difference is known of a certain percentage.
For example, if a research paper was published and I have the original paper and the students paper I want to know if roughly 10% of the students paper is plagiarized.
Maybe if I cut a random parts out of the strings this can help to save on time but this feels very dirty/inefficient.
RSA is an encryption algorithm based on factoring large integers. In RSA, two large prime numbers and a supplementary value are generated as public key. Anyone can use the public key to encrypt a message, but only those with the prime factors can decode the message. There are three phases in the process:
key generation - The public key and private key are generated. The construction method of the keys
generated should be secret.
encryption - The message can be encrypted via public key
decryption - Only the private key can be used to decrypt the
message
Encryption process is as shown:
m - message:
m^e % n = c
c - encrypted message
Decryption process is as shown:
c^d % n = m
This is the implementation of calculating d:
function modInverse(e, phi) {
var m0 = phi, t, q;
var x0 = 0, x1 = 1;
if (phi == 1)
return 0;
while (e > 1) {
// q is quotient
q = Math.floor(e / phi);
t = phi;
// phi is remainder now, process same as
// Euclid's algo
phi = e % phi, e = t;
t = x0;
x0 = x1 - q * x0;
x1 = t;
}
// Make x1 positive
if (x1 < 0)
x1 += m0;
return x1;
}
modInverse(7, 40) // 23
Key pairs of a public key and a private key also need to be generated. Let’s pick 5 and 11 as the primes:
function modInverse(e, phi) {
var m0 = phi, t, q;
var x0 = 0, x1 = 1;
if (phi == 1)
return 0;
while (e > 1) {
// q is quotient
q = Math.floor(e / phi);
t = phi;
// phi is remainder now, process same as
// Euclid's algo
phi = e % phi, e = t;
t = x0;
x0 = x1 - q * x0;
x1 = t;
}
// Make x1 positive
if (x1 < 0)
x1 += m0;
return x1;
}
function isPrime(n){
var prime_numbers=[2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97]
for(let i of prime_numbers){
if(n===i){
return true
}
}
}
function RSAKeyPair(p, q) {
// Need to check that they are primes
if (!(isPrime(p) && isPrime(q)))
return;
// Need to check that they're not the same
if (p == q)
return;
var n = p * q,
phi = (p - 1) * (q - 1),
e = 3,
d = modInverse(e, phi);
// Public key: [e,n], Private key: [d,n]
return [[e, n], [d, n]]
}
RSAKeyPair(5,11) //Public key: [3,55], Private key: [27,55]
Complete: Encryption and Decryption
function modInverse(e, phi) {
var m0 = phi, t, q;
var x0 = 0, x1 = 1;
if (phi == 1) {
return 0;
}
while (e > 1) {
// q is quotient
q = Math.floor(e / phi);
t = phi;
// phi is remainder now, process same as
// Euclid's algo
phi = e % phi // 3 % 40
e = t; // e = 40
t = x0; // t = 0
x0 = x1 - q * x0; // 1-0|13|3 x 0
x1 = t; // 0
}
// Make x1 positive
if (x1 < 0) {
x1 += m0;
}
return x1;
}
function isPrime(n){
var prime_numbers=[2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97]
for(let i of prime_numbers){
if(n===i){
return true
}
}
}
function RSAKeyPair(p, q) {
// Need to check that they are primes
if (!(isPrime(p) && isPrime(q))) {
return;
}
// Need to check that they're not the same
if (p==q) {
return;
}
var n = p * q,
phi = (p-1)*(q-1),
e = 3,
d = modInverse(e,phi);
// Public key: [e,n], Private key: [d,n]
return [[e,n], [d,n]]
}
RSAKeyPair(5,11)
for (let i in RSAKeyPair(5,11)){
var encrypted_message;
const encryption=c=>{
var m = 2,e = c[0], n = c[1], Encrypted_Message = m ** e % n
console.log("Encryption: " + Encrypted_Message)
encrypted_message=Encrypted_Message
}
const decryption=c=>{
var d = c[0], n = c[1], Decrypted_Message = encrypted_message ** d % n
console.log("Decryption: " + Decrypted_Message)
}
i=="0"?encryption(RSAKeyPair(5, 11)[0]) : i == "1" ? decryption(RSAKeyPair(5, 11)[1]) : false
}
Run it:
function modInverse(e, phi) {
var m0 = phi, t, q;
var x0 = 0, x1 = 1;
if (phi == 1) {
return 0;
}
while (e > 1) {
// q is quotient
q = Math.floor(e / phi);
t = phi;
// phi is remainder now, process same as
// Euclid's algo
phi = e % phi // 3 % 40
e = t; // e = 40
t = x0; // t = 0
x0 = x1 - q * x0; // 1-0|13|3 x 0
x1 = t; // 0
}
// Make x1 positive
if (x1 < 0) {
x1 += m0;
}
return x1;
}
function isPrime(n){
var prime_numbers=[2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97]
for(let i of prime_numbers){
if(n===i){
return true
}
}
}
function RSAKeyPair(p, q) {
// Need to check that they are primes
if (!(isPrime(p) && isPrime(q))) {
return;
}
// Need to check that they're not the same
if (p==q) {
return;
}
var n = p * q,
phi = (p-1)*(q-1),
e = 3,
d = modInverse(e,phi);
// Public key: [e,n], Private key: [d,n]
return [[e,n], [d,n]]
}
RSAKeyPair(5,11)
for (let i in RSAKeyPair(5,11)){
var encrypted_message;
const encryption=c=>{
var m=2,e=c[0],n=c[1],Encrypted_Message=m**e%n
console.log("Encryption: "+Encrypted_Message)
encrypted_message=Encrypted_Message
}
const decryption=c=>{
var d=c[0],n=c[1],Decrypted_Message=encrypted_message**d % n
console.log("Decryption: "+Decrypted_Message)
}
i=="0"?encryption(RSAKeyPair(5,11)[0]):i=="1"?decryption(RSAKeyPair(5,11)[1]):false
}
This encrypts the message 2, and the receiver can decrypt that back to 2. However, when I change the message 2 to 3:
function modInverse(e, phi) {
var m0 = phi, t, q;
var x0 = 0, x1 = 1;
if (phi == 1) {
return 0;
}
while (e > 1) {
// q is quotient
q = Math.floor(e / phi);
t = phi;
// phi is remainder now, process same as
// Euclid's algo
phi = e % phi // 3 % 40
e = t; // e = 40
t = x0; // t = 0
x0 = x1 - q * x0; // 1-0|13|3 x 0
x1 = t; // 0
}
// Make x1 positive
if (x1 < 0) {
x1 += m0;
}
return x1;
}
function isPrime(n) {
var prime_numbers = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]
for (let i of prime_numbers) {
if (n === i) {
return true
}
}
}
function RSAKeyPair(p, q) {
// Need to check that they are primes
if (!(isPrime(p) && isPrime(q))) {
return;
}
// Need to check that they're not the same
if (p == q) {
return;
}
var n = p * q,
phi = (p - 1) * (q - 1),
e = 3,
d = modInverse(e, phi);
// Public key: [e,n], Private key: [d,n]
return [[e, n], [d, n]]
}
RSAKeyPair(5, 11)
for (let i in RSAKeyPair(5, 11)) {
var encrypted_message;
const encryption = c => {
var m = 3, e = c[0], n = c[1], Encrypted_Message = m ** e % n
console.log("Encryption: " + Encrypted_Message)
encrypted_message = Encrypted_Message
}
const decryption = c => {
var d = c[0], n = c[1], Decrypted_Message = encrypted_message ** d % n
console.log("Decryption: " + Decrypted_Message)
}
i == "0" ? encryption(RSAKeyPair(5, 11)[0]) : i == "1" ? decryption(RSAKeyPair(5, 11)[1]) : false
}
It gives different result. I expect 3 should be the answer, what is wrong?
The posted example uses p = 5 and q = 11 and determines for the modulus N = 55, the public exponent e = 3 and the private exponent d = 27 (returned by RSAKeyPair(5, 11)). This corresponds to a valid key pair.
Although small values are used, the intermediate results can be quite large.
With the plaintext m = 3 the ciphertext c = me mod 55 = 27 results for the encyrption. The value 33 = 27 is obviously uncritical.
For decryption, however, the decrypted data is m = cd mod 55 = 2727 mod 55. The value 2727 (approx. 4.4 * 1038) is critical, since it is above the maximum (safe) integer possible for JavaScript Number.MAX_SAFE_INTEGER = 253 - 1 = 9,007,199,254,740,991. This generally results in a wrong plaintext during decryption.
The problem can be solved by using BigInt for larger numbers:
var e = 3;
var d = 27;
var N = 55;
// Encryption
var m = 3; // getRandomInt(N) // For arbitrary plaintexts uncomment getRandomInt(N)
var c = m ** e % N;
console.log("Plaintext : " + m);
console.log("Ciphertext : " + c);
// Decryption without BigInt
var dec = c ** d % N;
console.log("Result without BigInt: " + dec); // Wrong
// Decryption with BigInt
var dec = BigInt(c) ** BigInt(d) % BigInt(N);
console.log("Result with BigInt : " + dec); // Correct
function getRandomInt(max) {
return Math.floor(Math.random() * Math.floor(max));
}
Of course this applies in general, i.e. not only to encryption and decryption, but also to the key generation, as soon as the values (including intermediate results) become accordingly large.
Edit: As mentioned in the comment, there are more efficient implementations for modular exponentiation than the direct one (= exponentiate, then taking the result modulo). For this, also existing libraries can be used, e.g. bigint-mod-arith, which applies the right-to-left binary method for modular exponentiation.
Is there any way to deobfuscate some javascript code that produced with webpack 4 and is also splitChunked?
It's a little more than 1MB js code and I only need to understand a small portion of the code, which is this function :
function l(e) {
t.d(8, function(e) {
for (var n = e.length, r = t.b(n), f = a(), c = 0; c < n; c++) {
var i = e.charCodeAt(c);
if (i > 127)
break;
f[r + c] = i
}
if (c !== n) {
0 !== c && (e = e.slice(c)),
r = t.c(r, n, n = c + 3 * e.length);
var d = a().subarray(r + c, r + n);
c += o(e, d).written
}
return u = c,
r
}(e), u);
var n, r, f = (null !== i && i.buffer === t.e.buffer || (i = new Int32Array(t.e.buffer)),
i), c = (n = f[2],
r = f[3],
d.decode(a().subarray(n, n + r))).slice();
return t.a(f[2], 1 * f[3]),
c
}
I used chrome debugger and set some breakpoints and I was able to grasp what it's doing but I need to do the exact same thing in my project So I need a more readable code to do that.
As far as I know, there is no easy way to do so, but I want to share some tips with you:
Replace all ',' with ', '\n'
Review and correct the possible code breaks
Use a for loop to iterate between static value arrays and replace all the usages.
Change the encoding in case of language dependent breaks
Replace all ; with ; \n and correct possible code breaks
Know it is easier to rename functions and write comments for them.
I am trying to take ed = 1 mod((p-1)(q-1)) and solve for d, just like the RSA algorithm.
e = 5, (p-1)*(q-1) = 249996
I've tried a lot of code in javascript such as:
function modInverse(){
var e = 5;
var p = 499;
var q = 503;
var d = e.modInverse((p-1) * (q-1));
DisplayResult(d, "privateKeyResultLabel")
}
or
function modInverse(){
System.out.println(BigInteger.valueOf(5).modInverse(BigInteger.valueOf(249996)));
}
I just can't figure out the correct way to solve for d, the modular inverse, in javascript.
I was just going through the definition of modular multiplicative inverse and from what I understand:
ax = 1 (mod m)
=> m is a divisor of ax -1 and x is the inverse we are looking for
=> ax - 1 = q*m (where q is some integer)
And the most important thing is gcd(a, m) = 1
i.e. a and m are co-primes
In your case:
ed = 1 mod((p-1)(q-1)) //p, q and e are given
=> ed - 1 = z*((p-1)(q-1)) //where z is some integer and we need to find d
Again from the wikipedia entry, one can compute the modular inverse using the extended Euclidean GCD Algorithm which does the following:
ax + by = g //where g = gcd(a,b) i.e. a and b are co-primes
//The extended gcd algorithm gives us the value of x and y as well.
In your case the equation would be something like this:
ed - z*((p-1)(q-1)) = 1; //Compare it with the structure given above
a -> e
x -> d
b -> (p-1)(q-1)
y -> z
So if we just apply that algorithm to this case, we will get the values of d and z.
For ax + by = gcd(a,b), the extended gcd algorithm could look something like (source):
function xgcd(a, b) {
if (b == 0) {
return [1, 0, a];
}
temp = xgcd(b, a % b);
x = temp[0];
y = temp[1];
d = temp[2];
return [y, x-y*Math.floor(a/b), d];
}
This algorithm runs in time O(log(m)^2), assuming |a| < m, and is generally more efficient than exponentiation.
I don't know if there is an inbuilt function for this in javascript. I doubt if there is, and I am a fan of algorithms, so I thought you might want to give this approach a try. You can fiddle with it and change it to handle your range of values and I hope it gets you started in the right direction.
This implementation of modular inverse can accept any type of inputs. If input types are not supported, NaN is returned. Also, it does not use recursion.
function modInverse(a, m) {
// validate inputs
[a, m] = [Number(a), Number(m)]
if (Number.isNaN(a) || Number.isNaN(m)) {
return NaN // invalid input
}
a = (a % m + m) % m
if (!a || m < 2) {
return NaN // invalid input
}
// find the gcd
const s = []
let b = m
while(b) {
[a, b] = [b, a % b]
s.push({a, b})
}
if (a !== 1) {
return NaN // inverse does not exists
}
// find the inverse
let x = 1
let y = 0
for(let i = s.length - 2; i >= 0; --i) {
[x, y] = [y, x - y * Math.floor(s[i].a / s[i].b)]
}
return (y % m + m) % m
}
// Tests
console.log(modInverse(1, 2)) // = 1
console.log(modInverse(3, 6)) // = NaN
console.log(modInverse(25, 87)) // = 7
console.log(modInverse(7, 87)) // = 25
console.log(modInverse(19, 1212393831)) // = 701912218
console.log(modInverse(31, 73714876143)) // = 45180085378
console.log(modInverse(3, 73714876143)) // = NaN
console.log(modInverse(-7, 87)) // = 62
console.log(modInverse(-25, 87)) // = 80
console.log(modInverse(0, 3)) // = NaN
console.log(modInverse(0, 0)) // = NaN