Match a string and get what is after the match - javascript

I have this content from a torrent file
d8:announce39:http://torrent.ubuntu.com:6969/announce13:announce-listll39:http://torrent.ubuntu.com:6969/announceel44:http://ipv6.torrent.ubuntu.com:6969/announceee7:comment29:Ubuntu CD releases.ubuntu.com13:creation datei1539860537e4:infod6:lengthi1999503360e4:name30:ubuntu-18.10-desktop-amd64.iso12:piece lengthi524288e6:pieces76280
or I can have also
d8:announce39:http://torrent.centos.org:6969/announce13:announce-listll39:http://torrent.centos.org:6969/announceel44:http://ipv6.torrent.centos.org:6969/announceee7:comment27:CentOS x86_64 LiveGNOME ISO10:created by13:mktorrent 1.013:creation datei1526053398e4:infod5:filesld6:lengthi1388314624e4:pathl34:CentOS-7-x86_64-LiveGNOME-1804.isoeed6:lengthi454e4:pathl11:sha1sum.txteed6:lengthi1314e4:pathl15:sha1sum.txt.asceed6:lengthi598e4:pathl13:sha256sum.txteed6:lengthi1458e4:pathl17:sha256sum.txt.asceee4:name30:CentOS-7-x86_64-LiveGNOME-180412:piece lengthi524288e6:pieces52980:
I want to retrieve the name part like ubuntu-18.10-desktop-amd64.iso
Here is what I tried but it check the full content :
$.get('search', function(data) {
var lines = data.split("\n");
var $result = false
var url_check
var search= [];
$.each(lines, function(n, data) {
search.push(data)
})
}).done(function(search){
search= search.split("\n");
var $result = [];
$.each(search, function(n, search) {
var regex = new RegExp('^(?=.*' + search.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').split(/\\?[\s,_.:*-]+/).join(')(?=.*') + ')', 'gi');
if(regex.test(url_check) === true){
$result.push('true');
}else{
$result.push('false');
}
})
console.log($result)
if($result.indexOf("true") !== -1){
alert('Found !')
}else {
alert('Not found !');
}
})
Content of the file search :
*Ubuntu.18*
*centos 7*

According to the BitTorrent specification, those trailing digits match the length of the following string. For example
ubuntu-18.10-desktop-amd64.iso
is 30 characters long, thus the number that comes before it is 30:
30:ubuntu-18.10-desktop-amd64.iso
So, a pure regex solution won't really work, at least not in any concise fashion. You could extract the name<digits>: and the rest of the string following it, and then in Javascript use slice to extract <digits> number of characters after the colon:
const input = [
'd8:announce39:http://torrent.ubuntu.com:6969/announce13:announce-listll39:http://torrent.ubuntu.com:6969/announceel44:http://ipv6.torrent.ubuntu.com:6969/announceee7:comment29:Ubuntu CD releases.ubuntu.com13:creation datei1539860537e4:infod6:lengthi1999503360e4:name30:ubuntu-18.10-desktop-amd64.iso12:piece lengthi524288e6:pieces76280',
'd8:announce39:http://torrent.centos.org:6969/announce13:announce-listll39:http://torrent.centos.org:6969/announceel44:http://ipv6.torrent.centos.org:6969/announceee7:comment27:CentOS x86_64 LiveGNOME ISO10:created by13:mktorrent 1.013:creation datei1526053398e4:infod5:filesld6:lengthi1388314624e4:pathl34:CentOS-7-x86_64-LiveGNOME-1804.isoeed6:lengthi454e4:pathl11:sha1sum.txteed6:lengthi1314e4:pathl15:sha1sum.txt.asceed6:lengthi598e4:pathl13:sha256sum.txteed6:lengthi1458e4:pathl17:sha256sum.txt.asceee4:name30:CentOS-7-x86_64-LiveGNOME-180412:piece lengthi524288e6:pieces52980:'
];
function getName(str) {
const match = str.match(/:name(\d+):(.+)$/);
if (!match) {
return console.log('No name found');
}
const [, length, rest] = match;
console.log(
rest.slice(0, length)
);
}
input.forEach(getName)
If there may be line-terminators in the string, then (.+)$ won't work because . does not match newlines - either remove the $, or, if there may be newlines in the name that you want to match, use ([\s\S]+)$ instead, to match any character, including newlines.

Related

Truncate characters in each word on php

I need to have no more than 25 characters in each word. Yes, I could use break-word:break-all, however, I don't like how it all works against long words.
I wrote a JavaScript function that truncates these words by letter and adds a separator. Could you suggest a better option or rewrite this program in php, since I don't know php very well yet
if(document.getElementById('title_parent') && document.getElementById('title')) {
document.getElementById('title').oninput = function() {
const parent = document.getElementsByClassName('edit-project-title')[0];
parent.innerHTML = this.value ? truncate(this.value,20,'...') : " ";
}
}
function truncate(str,maxWordLength,endLetters) {
if ("string" !== typeof str) {
return '';
}
const words = str.split(/\s+/g);
const completedWords = [];
for (let word of words) {
if (word.length > maxWordLength)
completedWords.push(word.slice(0,maxWordLength+1) + endLetters);
else
completedWords.push(word);
}
return completedWords.join(' ').replace(/\</g, "<");
}
Try this for a PHP rewrite of your JavaScript function. See comments for step-by-step explanation.
Outputs: here are some short words and a <really long one> now: pneumonoultramicrosco...
<?php
// Set some default values for max word length and end letters arguments.
// If you pass these, your paremeter values will be used.
function truncate(string $input, int $maxWordLength = 20, string $endLetters = '...')
{
// No manual type checking required if you use declare(strict_types=1),
// in combination with type hinting in the argument list.
// foreach() replaces for ... of
// preg_split() replaces String.prototype.split()
foreach (preg_split('/\s+/', $input) as $word)
{
// strlen() replaces .length
if (strlen($word) > $maxWordLength)
// substr() replaces String.prototype.slice()
$completedWords[] = substr($word, 0, $maxWordLength + 1) . $endLetters;
else
$completedWords[] = $word;
}
// implode() replaces .join()
// str_replace() replaces .replace()
return str_replace('<', '<', implode(' ', $completedWords));
}
$input = 'here are some short words and a <really long one> now: pneumonoultramicroscopicsilicovolcanoconiosis';
echo truncate($input);

How I run the replace function one time?

I want the replaceFunction to run only one time. For now works correctly only on first time, E-1 return Ε-1 (APPLE) but when user try to edit text field again system detect
Ε-1 and return Ε-1 (APPLE) (APPLE)..
td.onchange = function(e) {
this.value = this.value.replace(/(\E-(\d+))/g, replaceFunction);
function replaceFunction(match) {
// add additional rules here for more cases
if (match === "E-1") return "Ε-1 (APPLE)";
if (match === "E-2") return "Ε-2 (SUMSUNG)";
.
.
.
if(match === "E-99") return "Ε-99 (LG)";
return match;
}
}
How I stop this?
You can use something like this one more condition:
if (match === "E-1" && match !== "Ε-1 (APPLE)") return "Ε-1 (APPLE)";
this can be optimized, if you put the mapping into object:
var map = {
"E-1": "Ε-1 (APPLE)",
...
}
if (map[match] && !map[match] !== match) { return map[match]; }
and for this to work you will need regex that also match the word after in bracket:
var names = ['APPLE', 'SAMSUNG'];
var re = new RegExp('(E-(\\d+))(?! \\((?:' + names.join('|') + ')\\))', 'g');
Yet another solution is to use only array (this will only work if you E-NUM match index in array)
var names = ['APPLE', 'SAMSUNG'];
var re = new RegExp('(E-(\\d+))(?! \\((?:' + names.join('|') + ')\\))', 'g');
// regex explanation, same as yours but \\d is because it's a string
// we create negative look ahead so we check if next text
// after E-1 is not " (" and any of the names.
// we use (?: to group what's inside it's the same as with ()
// but the value will not be captured so there will be
// no param in function for this group
// so this regex will be the same as yours but will not match "E-1 (APPLE)"
// only "E-1"
this.value = this.value.replace(re, replaceFunction);
function replaceFunction(match, eg, num) {
// convert string to number E starts
var i = parseInt(num, 10) - 1;
if (i <= names.length) {
return match + ' (' + names[i] + ')';
}
}
the regex and function can be created outside of the change function, so it don't create new function on each change.
When replacing, also optionally lookahead for a space and parentheses that come after. This way, in the replacer function, you can check to see if what follows is already the value you want (eg, (APPLE)). If it is, then do nothing - otherwise, replace with the new string:
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^)]+)\)|)/g, replaceFunction);
function replaceFunction(match, digits, followingString) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString) {
return match;
}
return `E-${digits} (${replacement})`;
}
}
<input id="td">
What /E-(\d+)(?= \(([^)]+)\)|)/ does is:
E- - Match E-
(\d+) - Capture digits in a group
(?= \(([^)]+)\)|) Lookahead for either:
\(([^)]+)\) A literal (, followed by non-) characters, followed by ). If this is matched, the non-) characters will be the second capture group
| - OR match the empty string (so that the lookahead works)
The digits will be the first capture group; the digits variable in the callback. The non-) characters will be the second capture group; the followingString variable in the callback.
If you also want to permit the final ) to be deleted, then make the final ) optional, and also make sure the character set does not match spaces (so that the space following APPLE, with no end ), doesn't get matched):
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^) ]+)\)?|)/g, replaceFunction);
function replaceFunction(match, digits, followingString) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString) {
return match;
}
console.log(followingString)
return `E-${digits} (${replacement})`;
}
}
<input id="td">
If you want to permit any number of characters before the final ) to be deleted, then check if the replacement startsWith the following string:
const replacementsE = [
, // nothing for E-0
'APPLE',
'SUMSUNG',
];
td.onchange = function(e) {
td.value = td.value.replace(/E-(\d+)(?= \(([^) ]+)\)?|)/g, replaceFunction);
function replaceFunction(match, digits, followingString, possibleTrailingParentheses) {
const replacement = replacementsE[digits];
if (!replacement || replacement === followingString || replacement.startsWith(followingString)) {
return match;
}
return `E-${digits} (${replacement})`;
}
}
<input id="td">

Split a CSV with javascript

Is there a way to split a CSV string with javascript where the separator can also occur as an escaped value. Other regex implementations solve this problem with a lookbehind, but since javascript does not support lookbehind I wonder how I could accomplish this in a neatly fashion using a regex expression.
A csv line might look like this
"This is\, a value",Hello,4,'This is also\, possible',true
This must be split into (strings containing)
[0] => "This is\, a value"
[1] => Hello
[2] => 4
[3] => 'This is also\, possible'
[4] => true
Instead of trying to split you can try a global match for all that is not a , with this pattern:
/"[^"]+"|'[^']+'|[^,]+/g
for example you can use this regex:
(.*?[^\\])(,|$)
regex takes everything .*? until first comma, which does not have \ in front of it, or end of line
Here's some code that changes csv to json (assuming the first row it prop names). You can take the first part (array2d) and do other things with it very easily.
// split rows by \r\n. Not sure if all csv has this, but mine did
const rows = rawCsvFile.split("\r\n");
// find all commas, or chunks of text in quotes. If not in quotes, consider it a split point
const splitPointsRegex = /"(""|[^"])+?"|,/g;
const array2d = rows.map((row) => {
let lastPoint = 0;
const cols: string[] = [];
let match: RegExpExecArray;
while ((match = splitPointsRegex.exec(row)) !== null) {
if (match[0] === ",") {
cols.push(row.substring(lastPoint, match.index));
lastPoint = match.index + 1;
}
}
cols.push(row.slice(lastPoint));
// remove leading commas, wrapping quotes, and unneeded \r
return cols.map((datum) =>
datum.replace(/^,?"?|"$/g, "")
.replace(/""/g, `\"`)
.replace(/\r/g, "")
);
})
// assuming first row it props name, create an array of objects with prop names of the values given
const out = [];
const propsRow = array2d[0];
array2d.forEach((row, i) => {
if (i === 0) { return; }
const addMe: any = {};
row.forEach((datum, j) => {
let parsedData: any;
if (isNaN(Number(datum)) === false) {
parsedData = Number(datum);
} else if (datum === "TRUE") {
parsedData = true;
} else if (datum === "FALSE") {
parsedData = false;
} else {
parsedData = datum;
}
addMe[propsRow[j]] = parsedData;
});
out.push(addMe);
});
console.log(out);
Unfortunately this doesn't work with Firefox, only in Chrome and Edge:
"abc\\,cde,efg".split(/(?<!\\),/) will result in ["abc\,cde", "efg"].
You will need to remove all (unescaped) escapes in a second step.

Regex to capture Ids from text

I have the following regex where I am trying to capture the Ids of each start comment. But for some reason I am only able to capture the first one. It won't grab the Id of the nested comment. It only prints 1000 to the console. I am trying to get it to capture both 1000 and 2000. Can anyone spot the error in my regex?
<script type="text/javascript">
function ExtractText() {
var regex = /\<!--Start([0-9]{4})-->([\s\S]*?)<!--End[0-9]{4}-->/gm;
var match;
while (match = regex.exec($("#myHtml").html())) {
console.log(match[1]);
}
}
</script>
<div id="myHtml">
<!--Start1000-->Text on<!--Start2000-->the left<!--End1000-->Text on the right<!--End2000-->
</div>
Based on Mike Samuel's answer I updated my JS to the following:
function GetAllIds() {
var regex = /<!--Start([0-9]{4})-->([\s\S]*?)<!--End\1-->/g;
var text = $("#myHtml").html();
var match;
while (regex.test(text)) {
text = text.replace(
regex,
function (_, id, content) {
console.log(id);
return content;
});
}
}
In
<!--Start1000-->Text on<!--Start2000-->the left<!--End1000-->Text on the right<!--End2000-->
the "1000" region overlaps the "2000" region, but the exec loop only finds non-overlapping matches since each call to exec with the same regex and string starts at the end of the last match. To solve this problem, try
var regex = /<!--Start([0-9]{4})-->([\s\S]*?)<!--End\1-->/g;
for (var s = $("#myHtml").html(), sWithoutComment;
// Keep going until we fail to replace a comment bracketed chunk
// with the chunk minus comments.
true;
s = sWithoutComment) {
// Replace one group of non-overlapping comment pairs.
sWithoutComment = s.replace(
regex,
function (_, id, content) {
console.log(id);
// Replace the whole thing with the body.
return content;
});
if (s === sWithoutComment) { break; }
}
You can use grouping and then another regexp:
var regex = /(<!--Start)([0-9]{4})/ig;
var str = document.getElementById('myHtml').innerHTML;
var matches = str.match(regex);
for(var i=0;i<matches.length;i++){
var m = matches[i];
var num = m.match(/(\d+)/)[1];
console.log(num);
}

Live replacement for regular expressions with Javascript

I'm writing a code for live replacement of specific words in a text field as the user types.
I'm using regex and javascript:
The first array has the regular expressions to be found, and the second array has the words that should replace any them.
source = new Array(/\srsrs\s/,/\sñ\s/,/\snaum\s/,/\svc\s/,/\scd\s/,/\sOq\s/,/\soke\s/,/\so\sq\s/,
/\soque\s/,/\soqe\s/,/\spq\s/,/\sq\s/,/\sp\/\s/g,/\spra\s/,/\sp\s/,/\stbm\s/,
/\stb\s/,/\std\s/,/\sblz\s/,/\saki\s/,/\svlw\s/,/\smara\s/,/\sqlq\s/,/\sqq\s/,
/\srpz\s/,/\smsm\s/,/\smto\s/,/\smtu\s/,/\sqro\s/,/\sqdo\s/,/\sqd\s/,/\sqnd\s/,
/\sqto\s/,/\sqm\s/,/\sjah\s/, /\sc\/\s/,/\scmg\s/,/\s\+\sou\s\-\s/,/\sflw\s/,
/\sxau\s/,/\sto\s/,/\sta\s/);
after = new Array("risos","não","não","você","cadê","o que","o que","o que","o que","o que","porque",
"que","para","para","para","também","também","tudo","beleza","aqui","valeu","maravilhoso",
"qualquer","qualquer","rapaz","mesmo","muito","muito","quero","quando","quando","quando",
"quanto","quem","Já","com","comego","mais ou menos","falow","tchau","estou","está");
This is the function that does the replacement:
function replacement(){
for(i=0; i<source.length; i++){
newtext = " "+document.getElementById("translation").value+" ";
console.log(newtext);
if(myregex = newtext.match(source[i])){
newafter = after[i];
rafael = myregex+" ";
document.getElementById("translation").value = document.getElementById("translation").value.replace(rafael, newafter);
}
}
}
My problem is every time the function is called to replace an expression with only one letter, the replacement is being made on the first occurrence of that letter, even within a word. I thought looking for that letter with \s before and after would solve it, but it didn't.
If you're looking only to match a word, you should put \b before and after (word boundary). This will ensure that you don't match parts of words. Also note that you are corrupting your regex by concatenating a string. Try this instead:
var in = document.getElementById("translation").value;
if( in.charAt(in.length-1) == " ") { // user has just finished typing a word
// this avoids interrupting the word being typed
var l = source.length, i;
for( i=0; i<l; i++) in = in.replace(source[i],after[i]);
document.getElementById("translation").value = in;
}
You need to add a g (global) modified to regexes so that it will replace all occurrences and use \b instead of \s to mark word boundaries.
source = new Array(/\brsrs\b/g,/\bñ\b/g, etc
On a side note, since all your regexes follow the same pattern it might be easier to just do:
source = new Array( 'rsr', 'ñ', 'naum', etc );
if( myregex = newtext.match( new Regexp( "\b"+source[i]+"\b", 'g' ) ) ) {
...
If by "live replacement" you mean calling function replacement at each keystroke then \b at the end will not help you, you should indeed use \s. However in your replacement function your are adding a space to the text field value so your single character words are triggering the replacement.
Here is my refactoring of your code :
(function () { // wrap in immediate function to hide local variables
source = [ [/\brsrs\s$/, "risos"], // place reg exp and replacement next to each other
[/\b(ñ|naum)\s$/, "não"], // note combined regexps
[/\bvc\s$/, "você"]
// ...
]; // not also use of array literals in place of new Array
document.getElementById ("translation"​​​​​​​).addEventListener ('keyup', function (ev) {
var t = this.value // fetch text area value
, m
, i = source.length;
while (i--) // for each possible match
if ((m = t.match(source[i][0]))) { // does this one match ?
// replace match : first remove the match string (m[0]) from the end of
// the text string, then add the replacement word followed by a space
this.value = t.slice (0, -m[0].length) + source[i][1] + ' ';
return; // done
}
}, false);
}) ();​
And the fiddle is : http://jsfiddle.net/jFYuV
In a somewhat different style, you could create a function that encapsulated the list of substitutions:
var substitutions = {
"rsrs": "risos",
"ñ": "não",
"naum": "não",
"vc": "você",
// ...
};
var createSubstitutionFunction = function(subs) {
var keys = [];
for (var key in subs) {
if (subs.hasOwnProperty(key)) {
keys[keys.length] = key;
}
}
var regex = new RegExp("\\b" + keys.join("\\b|\\b") + "\\b", "g");
return function(text) {
return text.replace(regex, function(match) {
return subs[match];
});
};
};
var replacer = createSubstitutionFunction(substitutions);
You would use it like this:
replacer("Some text with rsrs and naum and more rsrs and vc")
// ==> "Some text with risos and não and more risos and você"

Categories

Resources