add quotes to each item in a text file - javascript

I want to use this list of bad words for my input filtering. It's a plain list right now, but I need to convert it to JSON for my server to use.
I don't want to go through each line and add quotes and a ,. Is there a regex or fast way to add " ", to each line in a txt file?
Such that:
2g1c
2 girls 1 cup
acrotomophilia
alabama hot pocket
alaskan pipeline
Becomes
"2g1c",
"2 girls 1 cup",
"acrotomophilia",
"alabama hot pocket",
"alaskan pipeline",
...

Use backtick `
var txt=`2g1c
2 girls 1 cup
acrotomophilia
alabama hot pocket
alaskan pipeline`;
var arrayUntrimmed = txt.split("\n");
var array=arrayUntrimmed.map(function(a){return a.trim()});
(Note: This ECMAScript 6 feature supported from Firefox 34 and Chrome 41)

All you have to do is split the string at the new lines and drop the last item in the array (since it's empty).
var txt = '2g1c\n2 girls 1 cup\nacrotomophilia\nalabama hot pocket\nalaskan pipeline\n';
var array = txt.split('\n').slice(0, -1);
console.log(array)
You can then use Array.prototype.some as a predicate method to find out if a given string contains one or more of the blacklisted words.
var txt = '2g1c\n2 girls 1 cup\nacrotomophilia\nalabama hot pocket\nalaskan pipeline\n';
var array = txt.split('\n').slice(0, -1);
var input1 = 'not bad';
var input2 = 'An alaskan pipeline is quite creative...';
var input1HasBadWords = array.some(function (word) {
return input1.indexOf(word) > -1;
});
var input2HasBadWords = array.some(function (word) {
return input2.indexOf(word) > -1;
});
console.log('input1 is: ' + input1HasBadWords);
console.log('input2 is: ' + input2HasBadWords);
Your controller would look something like so:
const fs = require('fs');
app.post('/route', (req, res) => {
fs.readFile('/etc/hosts', 'utf8', (err, data) => {
if (err) {
res.sendStatus(500);
}
const badWords = data.split('\n').slice(0, -1);
const hasBadWords = badWords.some((word) => {
return req.body.input.indexOf(word) > -1;
});
if(hasBadWords) {
res.send('Dirty mouth? Clean it with orbit!');
} else {
res.send('You are very polite');
}
});
});

http://pastebin.com/U5phzWUM
I guess the easiest way is to use a software for this. It took me 30 sec to do this with SublimeText
http://www.sublimetext.com/docs/selection

You can use readline module. read and add quotes in each line.
readline: https://nodejs.org/api/readline.html

Related

JavaScript search using includes method in js with exact match

Here is my code and the problem is whether you search for "remember" or "member" it returns the result.As 'remember' has 'member' and I just want to search for exact matches. I should not return anything when I search for 'member'.
txt= `38
00:04:17.795 --> 00:04:23.551
Two previous cases
were open and shut.
39
00:04:23.601 --> 00:04:29.140
It was January 3, 1995,
my daughter's birthday.
40
00:04:29.140 --> 00:04:30.441
I remember.`
const searchedWord = 'member';
var res = txt
.toLowerCase()
.split('\n\n')
.filter((x) => {
return x.includes(searchedWord.toLowerCase());
});
console.log(res);
It sounds like you want to require word breaks on either side of the word you're searching for. You can do that with a regular expression:
const searchRegex = /\bmember\b/i; // ***
var res = txt
.toLowerCase()
.split('\n\n')
.filter((x) => {
return searchRegex.test(x); // ***
});
Live Example:
const txt = `38
00:04:17.795 --> 00:04:23.551
Two previous cases
were open and shut.
39
00:04:23.601 --> 00:04:29.140
It was January 3, 1995,
my daughter's birthday.
40
00:04:29.140 --> 00:04:30.441
I remember.
41
He was a member of the club
42
Something else`;
const searchRegex = /\bmember\b/i;
var res = txt
.toLowerCase()
.split('\n\n')
.filter((x) => {
return searchRegex.test(x);
});
console.log(res);
If you need to create the regular expression dynamically (that is, starting with a string variable or similar containing "member"), see this question's answers and, if relevant, this one's.

Telegram app to remove multiple words from input message

I'm trying to create a small app that reads a chat message and echoes a filtered text
I need it to take this message:
INPUT:
#notthis/remove
remove this line
This one too
Output:
notthis
At the moment all it does is remove the second word + the first hashtag:
"hello#hello" which becomes hello
I tried adding the / like this input.split("#","/"); but all it does is crash the program.
Not asking to do the program for me, but I'd really appreciate any hints.
Thank you!
const Telegraf = require('telegraf');
const bot = new Telegraf('182049');
const helpMessage = `
Say something to me
/start - start the bot
/help - command reference
`;
bot.start((ctx) => {
ctx.reply("Hi I am echoo bot");
ctx.reply(helpMessage);
});
bot.help((ctx) => {
ctx.reply(helpMessage)
});
bot.on("text", (ctx) => {
let input = ctx.message.text;
let inputArray = input.split("#");
console.log(inputArray);
let message = "";
if (inputArray.length == 1) {
message = "no separator";
} else {
inputArray.shift();
message = inputArray.join(" ");
}
ctx.reply(message);
});
bot.launch()
Regular Expression (regex) is a great way to find substrings in an unknown string, even though it might be a bit daunting to work with. You can use this website to create your regex.
To find a substring starting with # and ending with /, you can use the following expression:
/#\w+\//g
This will match 1 or more word characters that are in between a # and a /.
For example:
#foo/ will match
#f/ will match
#/ will not match
#foo bar/ will not match (whitespace is not a word character)
An example JavaScript code:
const regex = /#\w+\//g;
const text = "something#fooo/bar #lorem/ipsum";
const found = text.match(regex);
// For each found item, remove the first and last characters
const cleaned = found.map((a) => a.slice(1, -1));
console.log(cleaned);
I think I made it
const Telegraf = require('telegraf');
const bot = new Telegraf('18204DjYLa9o9Y');
const regex = /#\w+\//g;
const helpMessage = `
Say something to me
/start - start the bot
/help - command reference
`;
bot.start((ctx) => {
ctx.reply("Hi I am echoo bot");
ctx.reply(helpMessage);
})
bot.help((ctx) => {
ctx.reply(helpMessage)
})
bot.on("text", (ctx) => {
let input = ctx.message.text;
let inputArray = input.split("#");
console.log(inputArray);
let message = "";
inputArray.shift();
message = inputArray.join(" ");
let messageArray = message.split("/");
console.log(messageArray);
for(let index = 0; message[index] != '/'; index++ )
{
ctx.reply("/ta " + message[index]);
}
return null
})
bot.hears("cat", (ctx) =>{
ctx.reply("Meow");
})
bot.launch()
Only a little issue: if my coin has more than 3 letters it gets truncated.
For loop solves the problem but every letter starts with a new line, and the second imput gets messed up
Eg: echobot, [13.05.21 00:54]
E
echobot, [13.05.21 00:54]
O
echobot, [13.05.21 00:54]
S
2nd attempt:
echobot, [13.05.21 00:54]
S
echobot, [13.05.21 00:54]
O
echobot, [13.05.21 00:54]
E

How do I split text by every other line?

Or in other words by every two lines? Right now i only seem to be able to split by every line.
the list would be something like this copied into the text area:
Style:
CGV7
Fabric:
95% Polyester, 5% Elastane
Source:
Imported
Guarantee:
Lifetime Warranty
this is the result I want
Style: CGV7
Fabric: 95% Polyester, 5% Elastane
Source: Imported
Guarantee: Lifetime Warranty
here is my code:
<textarea id="tables" name="" cols="78" rows="10" onchange="splitIt()">
</textarea>
function splitIt(){
var items = [];
var tablevalues = document.getElementById("tables").value;
var splitItems =tablevalues.split(/\n/);
items.push(splitItems);
console.log(items);
}
let p = text.split('\n');
let results = '';
p.forEach((x, index) => {
if (index%2 !== 0) {
results = results + x + '\n';
}
else {
results = results +x;
}
});
console.log(results);
You can achieve this by splitting the whole string on \n and then you can add it back on every odd element concat.
I hope that this is what you are looking for:
str.replace(/[\r\n]/g, " ")
.split(" ")
.filter((x) => x.trim())
.join("\n")
Updated, this might be short and cool :)
a.split(/:\n/).join(":")

Get initials and full last name from a string containing names

Assume there are some strings containing names in different format (each line is a possible user input):
'Guilcher, G.M., Harvey, M. & Hand, J.P.'
'Ri Liesner, Peter Tom Collins, Michael Richards'
'Manco-Johnson M, Santagostino E, Ljung R.'
I need to transform those names to get the format Lastname ABC. So each surename should be transformed to its initial which are appended to the lastname.
The example should result in
Guilcher GM, Harvey M, Hand JP
Liesner R, Collins PT, Richards M
Manco-Johnson M, Santagostino E, Ljung R
The problem is the different (possible) input format. I think my attempts are not very smart, so I'm asking for
Some hints to optimize the transformation code
How do I put those in a single function at all? I think first of all I have to test which format the string has...??
So let me explain how far I tried to solve that:
First example string
In the first example there are initials followed by a dot. The dots should be removed and the comma between the name and the initals should be removed.
firstString
.replace('.', '')
.replace(' &', ', ')
I think I do need an regex to get the comma after the name and before the initials.
Second example string
In the second example the name should be splitted by space and the last element is handled as lastname:
const elm = secondString.split(/\s+/)
const lastname = elm[elm.length - 1]
const initials = elm.map((n,i) => {
if (i !== elm.length - 1) return capitalizeFirstLetter(n)
})
return lastname + ' ' + initals.join('')
...not very elegant
Third example string
The third example has the already the correct format - only the dot at the end has to be removed. So nothing else has to be done with that input.
It wouldn't be possible without calling multiple replace() methods. The steps in provided solution is as following:
Remove all dots in abbreviated names
Substitute lastname with firstname
Replace lastnames with their beginning letter
Remove unwanted characters
Demo:
var s = `Guilcher, G.M., Harvey, M. & Hand, J.P.
Ri Liesner, Peter Tom Collins, Michael Richards
Manco-Johnson M, Santagostino E, Ljung R.`
// Remove all dots in abbreviated names
var b = s.replace(/\b([A-Z])\./g, '$1')
// Substitute first names and lastnames
.replace(/([A-Z][\w-]+(?: +[A-Z][\w-]+)*) +([A-Z][\w-]+)\b/g, ($0, $1, $2) => {
// Replace full lastnames with their first letter
return $2 + " " + $1.replace(/\b([A-Z])\w+ */g, '$1');
})
// Remove unwanted preceding / following commas and ampersands
.replace(/(,) +([A-Z]+)\b *[,&]?/g, ' $2$1');
console.log(b);
Given your example data i would try to make guesses based on name part count = 2, since it is very hard to rely on any ,, & or \n - which means treat them all as ,.
Try this against your data and let me know of any use-cases where this fails because i am highly confident that this script will fail at some point with more data :)
let testString = "Guilcher, G.M., Harvey, M. & Hand, J.P.\nRi Liesner, Peter Tom Collins, Michael Richards\nManco-Johnson M, Santagostino E, Ljung R.";
const inputToArray = i => i
.replace(/\./g, "")
.replace(/[\n&]/g, ",")
.replace(/ ?, ?/g, ",")
.split(',');
const reducer = function(accumulator, value, index, array) {
let pos = accumulator.length - 1;
let names = value.split(' ');
if(names.length > 1) {
accumulator.push(names);
} else {
if(accumulator[pos].length > 1) accumulator[++pos] = [];
accumulator[pos].push(value);
}
return accumulator.filter(n => n.length > 0);
};
console.log(inputToArray(testString).reduce(reducer, [[]]));
Here's my approach. I tried to keep it short but complexity was surprisingly high to get the edge cases.
First I'm formatting the input, to replace & for ,, and removing ..
Then, I'm splitting the input by \n, then , and finally (spaces).
Next I'm processing the chunks. On each new segment (delimited by ,), I process the previous segment. I do this because I need to be sure that the current segment isn't an initial. If that's the case, I do my best to skip that inital-only segment and process the previous one. The previous one will have the correct initial and surname, as I have all the information I neeed.
I get the initial on the segment if there's one. This will be used on the start of the next segment to process the current one.
After finishing each line, I process again the last segment, as it wont be called otherwise.
I understand the complexity is high without using regexp, and probably would have been better to use a state machine to parse the input instead.
const isInitial = s => [...s].every(c => c === c.toUpperCase());
const generateInitial = arr => arr.reduce((a, c, i) => a + (i < arr.length - 1 ? c[0].toUpperCase() : ''), '');
const formatSegment = (words, initial) => {
if (!initial) {
initial = generateInitial(words);
}
const surname = words[words.length - 1];
return {initial, surname};
}
const doDisplay = x => x.map(x => x.surname + ' ' + x.initial).join(', ');
const doProcess = _ => {
const formatted = input.value.replace(/\./g, '').replace(/&/g, ',');
const chunks = formatted.split('\n').map(x => x.split(',').map(x => x.trim().split(' ')));
const peoples = [];
chunks.forEach(line => {
let lastSegment = null;
let lastInitial = null;
let lastInitialOnly = false;
line.forEach(segment => {
if (lastSegment) {
// if segment only contains an initial, it's the initial corresponding
// to the previous segment
const initialOnly = segment.length === 1 && isInitial(segment[0]);
if (initialOnly) {
lastInitial = segment[0];
}
// avoid processing last segments that were only initials
// this prevents adding a segment twice
if (!lastInitialOnly) {
// if segment isn't an initial, we need to generate an initial
// for the previous segment, if it doesn't already have one
const people = formatSegment(lastSegment, lastInitial);
peoples.push(people);
}
lastInitialOnly = initialOnly;
// Skip initial only segments
if (initialOnly) {
return;
}
}
lastInitial = null;
// Remove the initial from the words
// to avoid getting the initial calculated for the initial
segment = segment.filter(word => {
if (isInitial(word)) {
lastInitial = word;
return false;
}
return true;
});
lastSegment = segment;
});
// Process last segment
if (!lastInitialOnly) {
const people = formatSegment(lastSegment, lastInitial);
peoples.push(people);
}
});
return peoples;
}
process.addEventListener('click', _ => {
const peoples = doProcess();
const display = doDisplay(peoples);
output.value = display;
});
.row {
display: flex;
}
.row > * {
flex: 1 0;
}
<div class="row">
<h3>Input</h3>
<h3>Output</h3>
</div>
<div class="row">
<textarea id="input" rows="10">Guilcher, G.M., Harvey, M. & Hand, J.P.
Ri Liesner, Peter Tom Collins, Michael Richards
Manco-Johnson M, Santagostino E, Ljung R.
Jordan M, Michael Jackson & Willis B.</textarea>
<textarea id="output" rows="10"></textarea>
</div>
<button id="process" style="display: block;">Process</button>

Javascript REGEX: need to retrieve the ID as well as start and end time from embed URL

Here is an example of url structure I'll be working with (ignore the age of electric video :) )
http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50
Basically I want to be able to grab the video id, the chosen start time (20) and end chosen time (50) and save them as variables from any URL that follows the pattern above.
So a simple setup is this:
var url = 'http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50'
// get youtube id
function youtubeid(url) {
var ytid = url.match(dont know);
ytid = ytid[1];
return ytid;
}
// get youtube start time
function youtubeStart(url) {
var ytStart = url.match(dont know);
ytStart=ytStart[1];
return ytStart;
}
// get youtube end time
function youtubeEnd(url) {
var ytEnd = url.match(dont know);
ytEnd=ytEnd[1];
return ytEnd;
}
If you could help me fill in the blanks that would be most amazing. I've been staring at regex documentation for a while now and just getting more and more confused.
This other Stack Overflow answer may help you. I used Peter Mortensen's answer below.
Get query string values in JavaScript
To obtain the actual YouTube Id, you can use this regular expression:
http:\/\/www.youtube.com\/embed\/(.{11})
That regex will return the value in parenthesis. You can test it here:
http://www.pagecolumn.com/tool/regtest.htm
Sample code:
var url = 'http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50'
// get youtube id
function youtubeid(url) {
var ytid = url.match(/http:\/\/www.youtube.com\/embed\/(.{11})/);
ytid = ytid[1];
return ytid;
}
alert(youtubeid(url));
function getParameterByName(name, url) {
var match = RegExp('[?&]' + name + '=([^&]*)')
.exec(url);
return match && decodeURIComponent(match[1].replace(/\+/g, ' '));
}
alert(getParameterByName('start', url));
alert(getParameterByName('end', url));
1
/http:\/\/www\.youtube\.com\/embed\/([^?]+)/
2
/http:\/\/www\.youtube\.com\/embed\/[^?]+.*[?&]start=(\d+)(?:&|$)/
3
/http:\/\/www\.youtube\.com\/embed\/[^?]+.*[?&]end=(\d+)(?:&|$)/
This'll only work if you know your URLs will look exactly like the one you gave (no extra query parameters; start and end always in that order; no HTTPS; etc.). But you can get them all at once:
js> str = 'http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50'
http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50
js> rxp = /http:\/\/www.youtube.com\/embed\/(.*)\?&start=(\d+)?&end=(\d+)?/
/http:\/\/www.youtube.com\/embed\/(.*)\?&start=(\d+)?&end=(\d+)?/
js> result = rxp.exec(str)
http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50,ABCumLrphFA,20,50
js> result[0]
http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50
js> result[1]
ABCumLrphFA
js> result[2]
20
js> result[3]
50
I believe it's possible to write a regex that can cope with all the quirks I mentioned above, but it's way uglier and makes it harder to understand. Anyway - hope this helps!
See also: JavaScript Regex Escape Sequences and JavaScript Regex Methods
var url = "http://www.youtube.com/embed/ABCumLrphFA?&start=20&end=50";
// get youtube id
function youtubeid(url) {
q = url.substring(url.lastIndexOf("/")+1);
var ytid = q.substring(q.lastIndexOf("?"), -1);
return ytid;
}
// get youtube start time
function youtubeStart(url) {
q = url.substring(url.lastIndexOf("/")+1);
var ytStart = q.substring(q.indexOf("&start")+7,q.indexOf("&end"));
return ytStart;
}
// get youtube end time
function youtubeEnd(url) {
q = url.substring(url.lastIndexOf("/")+1);
var ytEnd = q.substring(q.indexOf("&end")+5);
return ytEnd;
}
console.log(youtubeid(url));
console.log(youtubeStart(url));
console.log(youtubeEnd(url));
To retrieve the id
url.match(/embed\/(.*)\?/)
The best way to retrieve URL params (start and end) is to do something like Get Querystring with Dojo Then you could use the following to retrieve start and end
var qs = getUrlParams();
console.log("start is " + qs.start + " and end is " + qs.end )

Categories

Resources