Using Google NodeJS speech-to-text with audio from client microphone - javascript

I am trying to use google's speech-to-text nodeJS library (https://github.com/googleapis/nodejs-speech) and stream audio from the client's microphone input with navigator.mediaDevices.getUserMedia.
I was able to stream microphone with sox to nodejs-speech streamingRecognize and it was working.
I was also able to stream audio from the client side and make and pipe it to the speaker on the server side. However, when I try to pipe it to streamingRecognize it doesn't recognize any word.
Server-side
var io = require("socket.io")(server);
const speech = require('#google-cloud/speech');
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
//singleUtterance: false
};
let recognizeStream = speechClient
.streamingRecognize(request)
.on('data', data => {
console.log(data);
process.stdout.write(
data.results[0] && data.results[0].alternatives[0] ?
`Transcription: ${data.results[0].alternatives[0].transcript}\n` :
`\n\nReached transcription time limit, press Ctrl+C\n`
)
});
io.on("connection", function (client) {
client.on("userSpeaking", function (data) {
if (recognizeStream !== null) {
recognizeStream.write(new Uint8Array(data));
}
});
});
Client-side
function convertFloat32ToInt16(buffer) {
let l = buffer.length;
let buf = new Int16Array(l);
while (l--) {
buf[l] = Math.min(1, buffer[l]) * 0x7FFF;
}
return buf.buffer;
}
AudioContext = window.AudioContext || window.webkitAudioContext;
context = new AudioContext();
processor = context.createScriptProcessor(bufferSize, 1, 1);
processor.connect(context.destination);
context.resume();
function microphoneProcess(e) {
var left = e.inputBuffer.getChannelData(0);
var left16 = convertFloat32ToInt16(left);
socket.emit('userSpeaking', left16);
}
navigator.mediaDevices
.getUserMedia({
video: false,
audio: true
}, {
type: 'audio',
sampleRate: 16000,
desiredSampleRate: 16000,
audioBitsPerSecond: 16000,
mimeType: 'audio/webm;codecs=opus'
})
.then((stream) => {
globalStream = stream;
input = context.createMediaStreamSource(stream);
input.connect(processor);
processor.onaudioprocess = function (e) {
microphoneProcess(e);
};
})
.catch(console.error);

Related

Audio Worklet - How to capture microphone streams

I am trying to capture live microphone streams from my desktop, When converting float 32 bits to Int16 Array all values are coming 0. Not sure what i have done wrong here. how can i capture the live microphone stream in audio worklet?
Below is javascript code:
try {
navigator.getUserMedia = navigator.getUserMedia
|| navigator.webkitGetUserMedia
|| navigator.mozGetUserMedia;
microphone = navigator.getUserMedia({
audio : true,
video : false
}, onMicrophoneGranted, onMicrophoneDenied);
} catch (e) {
alert(e)
}
async function onMicrophoneGranted(stream) {
console.log(stream)
context = new AudioContext();
source = context.createMediaStreamSource(stream);
await context.audioWorklet.addModule('/assets/js/buffer-detector.js');
// Create our custom node.
bufferDetectorNode= new AudioWorkletNode(context, 'buffer-detector');
bufferDetectorNode.port.onmessage = (event) => {
};
source.connect(bufferDetectorNode);
bufferDetectorNode.connect(context.destination);
//source.connect(context.destination);
}
function onMicrophoneDenied() {
console.log('denied')
}
Below is AudioWorklet
class BufferProcessor extends AudioWorkletProcessor {
process (inputs) {
console.log(inputs)
inputs.forEach(floatArray => {
floatArray.forEach(elem => {
const intArray = Int16Array.from(elem)
console.log(intArray)
})
})
//const input = inputs[0];
//this.append(input[0])
return true;
}
static get parameterDescriptors() {
return [{
name: 'Buffer Detector',
}]
}
constructor() {
super();
this.initBuffer()
}
}
registerProcessor('buffer-detector',BufferProcessor );

Chrome extension video recording blob not able to convert in to video file

I am creating a chrome extension to record screen, facing an issue in converting the video recording blob into a video file, in background js video is getting recorded correctly but in content.js not able to convert the video blob to a video file
I am creating a chrome extension to record screen, facing an issue in converting the video recording blob into a video file, in background js video is getting recorded correctly but in content.js not able to convert the video blob to a video file
function startRecording() {
var constraints = {
audio: true,
video: true,
maxframeRate: fps,
};
navigator.mediaDevices.getDisplayMedia(constraints).then(function (stream) {
let output = new MediaStream();
if (output.getAudioTracks().length == 0) {
// Get microphone audio (system audio is unreliable & doesn't work on Mac)
if (micable) {
micsource.connect(destination);
output.addTrack(destination.stream.getAudioTracks()[0]);
}
} else {
syssource = audioCtx.createMediaStreamSource(stream);
if (micable) {
micsource.connect(destination);
}
syssource.connect(destination);
output.addTrack(destination.stream.getAudioTracks()[0]);
}
output.addTrack(stream.getVideoTracks()[0]);
mediaConstraints = {
audio: true,
video: true,
mimeType: "video/webm;codecs=vp8,opus",
};
mediaRecorder = new MediaRecorder(stream, mediaConstraints);
mediaRecorder.start(1000);
var recordedBlobs = [];
let writer = "";
mediaRecorder.ondataavailable = (event) => {
if (event.data && event.data.size > 0) {
recordedBlobs.push(event.data);
}
console.log("recordedBlobs", recordedBlobs);
};
mediaRecorder.onstop = () => {
chrome.tabs.getSelected(null, (tab) => {
chrome.tabs.sendMessage(tab.id, {
message: "download-video",
obj: {
blobs: recordedBlobs,
},
// camerasize: camerasize
});
});
endRecording(stream, writer, recordedBlobs);
};
stream.getVideoTracks()[0].onended = function () {
cancel = false;
mediaRecorder.stop();
};
});
}
content.js
function convertVideoBlobToVideo(obj) {
let chunk = obj.blobs;
// mediaRecorder.onstop = () => {
var superBuffer;
superBuffer = new Blob(chunks, {
type: "video/webm",
});
chunks = [];
// Create a video or audio element
// that stores the recorded media
const recordedMedia = document.createElement("video");
recordedMedia.controls = true;
const recordedMediaURL = URL.createObjectURL(superBuffer);
recordedMedia.src = recordedMediaURL;
const downloadButton = document.createElement("a");
downloadButton.download = "Recorded-Media";
downloadButton.href = recordedMediaURL;
downloadButton.innerText = "Download it!";
downloadButton.onclick = () => {
URL.revokeObjectURL(recordedMedia);
};
document.body.appendChild(recordedMedia, downloadButton);
// };
}

Live stream output from JS MediaRecorder to Python speech recognition server via soket.io

I'm trying to stream microphone from my browser to a server running a Python service connected to the google cloud speech-to-text. For the transfer I'm using socket.io. Everything seems to work but the speech recognition doesn't return any result. I suspect a problem with the format of the sent data.
On my browser I'm using the MediaRecorder with the mime type audio/webm;codecs=opus.
// simpleTest.js
'strict';
// Configuration
var language = 'fr-FR';
var mimeType = 'audio/webm;codecs=opus'; // Valid sample rates: 8000, 12000, 16000, 24000, 48000
var format = 'WEBM_OPUS'; // Valid sample rates: 8000, 12000, 16000, 24000, 48000
var sampleRate = 16000;
var recording = false;
var audioStream = null;
var mediaRecorder = null;
var audioChunks = [];
var namespace = '/ingestor'; // change to an empty string to use the global namespace
// Initialize socket
var socket = io(namespace);
socket.on('connect', function () {
console.log("connected to the SocketServer " + namespace);
});
socket.on('my_response', function (msg, callback) {
console.log("received message from the SocketServer " + namespace);
$('#log').append('<br>' + $('<div/>').text('logs #' + msg.count + ': ' + msg.data).html());
if (callback)
callback();
});
socket.on('connect_error', (error) => {
console.error("Socket connection error: " + error);
});
socket.on('disconnect', (reason) => {
console.log("Socket disconnected: " + reason);
if (reason === "io server disconnect") {
// the disconnection was initiated by the server, you need to reconnect manually
socket.connect();
}
});
const sendMessage = async (aSocket, msg) => {
if (aSocket.connected) {
aSocket.emit('my_event', {data: msg});
}
}
const initRecording = () => {
recording = false;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
if (navigator.mediaDevices === undefined) {
navigator.mediaDevices = {};
}
if (navigator.mediaDevices.getUserMedia === undefined) {
navigator.mediaDevices.getUserMedia = function (constraints) {
// First get ahold of the legacy getUserMedia, if present
const getUserMedia = navigator.webkitGetUserMedia || navigator.mozGetUserMedia;
// Some browsers just don't implement it - return a rejected promise with an error
// to keep a consistent interface
if (!getUserMedia) {
return Promise.reject(new Error('getUserMedia is not implemented in this browser'));
}
// Otherwise, wrap the call to the old navigator.getUserMedia with a Promise
return new Promise(function (resolve, reject) {
getUserMedia.call(navigator, constraints, resolve, reject);
});
}
}
// Initialize audio stream
console.log("Creating audio stream");
navigator.mediaDevices.getUserMedia({audio: true})
.then((stream) => {
console.log("Audio stream successfully created");
audioStream = stream;
mediaRecorder = new MediaRecorder(stream, {
audioBitsPerSecond: sampleRate,
mimeType: mimeType
});
console.log('mimeType: ' + mediaRecorder.mimeType);
mediaRecorder.ondataavailable = handleDataAvailable;
}).catch((error) => {
console.log("Error while creating the audio stream");
console.log(error);
});
};
const startRecording = () => {
recording = true;
console.log('startRecording');
mediaRecorder.start(1000);
};
const stopRecording = () => {
recording = false;
console.log('stopRecording');
mediaRecorder.stop();
};
const handleDataAvailable = (event) => {
console.log('handleDataAvailable');
if (event.data && event.data.size > 0) {
console.log(event.data);
handleBlob(event.data);
}
};
const handleBlob = (blob) => {
console.log('handleBlob - blob type: ' + blob.type);
blob.arrayBuffer()
.then((buffer) => {
console.log(buffer);
console.log(audioChunks.length + '. ' + buffer);
sendMessage(socket, JSON.stringify({
type: 'audio',
content: {
command: 'stream',
audioData: new Uint8Array(buffer)
}
}));
})
.catch(function(err) {
console.log(err);
});
};
window.toggleRecording = () => {
if (!recording) {
startRecording();
} else {
stopRecording();
}
}
initRecording();
On the server side I specify in the google.cloud.speech.RecognitionConfig the encoding is google.cloud.speech.AudioEncoding.WEBM_OPUS. This way I suppose I'm using the same container and codec. Right?
The server is divided in two parts:
the ingestor reading the socket and writing the data as received to a redis queue
the transcriber reading the redis queue and transferring the data to the Google cloud text-to-speech
# Ingestor
import queue
import time
import eventlet
from flask import Flask, render_template, session, copy_current_request_context
from flask_socketio import SocketIO, emit, disconnect
import redis
eventlet.monkey_patch()
host = '127.0.0.1'
port = 5000
redisHost = 's2t_memory_store'
redisPort = 6379
redisQueue = 'livequeue'
id = 'ingestor'
maxPackets = 500
async_mode = None
app = Flask(__name__)
socketio = SocketIO(app, async_mode='eventlet')
thread = None
redisDatabase = redis.Redis(host=redisHost, port=redisPort, db=0,
health_check_interval=2, socket_timeout=3)
buffer = queue.Queue()
#socketio.on('connect', namespace='/ingestor')
def connect():
print('%s socket connected!' % id)
global thread
if thread is None:
thread = socketio.start_background_task(_enqueue_audio, redisQueue)
#socketio.on('my_event', namespace='/ingestor')
def handle_data(data):
"""Stores the received audio data in a local buffer."""
buffer.put(data['data'], block=False)
session['receive_count'] = session.get('receive_count', 0) + 1
emit('my_response',
{'data': data['data'], 'count': session['receive_count']})
def _enqueue_audio(redis_queue):
"""Blocking-reads data from the buffer and adds to Redis queue."""
print('%s enqueue_audio thread started!' % id)
while True:
try:
chunk = buffer.get(block=True)
print('Buffer read: {}'.format(chunk))
val = redisDatabase.lpush(redis_queue, chunk)
# debugging; under normal circumstances audio should not be accumulating
if val > 5:
print('Ingested audio queue length: %d' % val)
except redis.exceptions.RedisError as err:
print('Error pushing into Redis queue: %s' % err)
#app.route('/')
def index():
return render_template('test.html', sync_mode=socketio.async_mode)
if __name__ == '__main__':
print("Starting ingestor")
socketio.init_app(app)
# Transcriber
import redis
import json
from google.cloud import speech
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
sample_rate = 16000
language_code = 'fr-FR'
host = '127.0.0.1'
port = 5000
redis_host = 's2t_memory_store'
redis_port = 6379
redis_queue = 'livequeue'
id = 'transcriber'
class redisStream(object):
def __init__(self, host, port, queue):
self.host = host
self.port = port
self.queue = queue
self.redis_conn = redis.Redis(host=self.host, port=self.port)
def redis_generator(redis_conn, redis_queue):
while True:
yield redis_conn.blpop(redis_queue)[1]
def main():
redis_conn = redis.Redis(host=redis_host, port=redis_port, db=0,
health_check_interval=2,
socket_timeout=None,
socket_connect_timeout=None)
speech_client = speech.SpeechClient()
recognition_config = speech.RecognitionConfig(
encoding=encoding,
sample_rate_hertz=sample_rate,
language_code=language_code)
streaming_config = speech.StreamingRecognitionConfig(
config=recognition_config,
interim_results=True)
for message in redis_generator(redis_conn, redis_queue):
print(f'REDIS STREAM: {message}')
messageData = json.loads(message)
if messageData['content']['command'] == 'stream':
print('AUDIO DATA: %s' % messageData['content']['audioData'])
chunk = bytes(messageData['content']['audioData'].values())
print('CHUNK: %s' % chunk)
request = speech.StreamingRecognizeRequest(audio_content=chunk)
responses = speech_client.streaming_recognize(config=streaming_config, requests=[request])
print('RESPONSES: %s' % responses)
if responses:
for response in responses:
for i, result in response.results:
alternative = result.alternatives[0]
print("-" * 20)
print(u"First alternative of result {}".format(i))
print(u"Transcript: {}".format(alternative.transcript))
print(u"Confidence: {}".format(alternative.confidence))
print("-" * 20)
else:
print('No response')
if __name__ == "__main__":
print("Starting transcriber")
main()
What is wrong? Do you have somewhere an example of the best (right) way to realize the transfer of such a live stream?
I have read many threads and publications on the web but I was never able to make it run correctly.
Thanks for your answer.
You need to write some Python code or show your code that did not fix the issue you were seeing.

How to send MediaStream AUDIO data with socket.io

I have been having trouble taking audio data that is being recorded from a mic and sending it to the other clients in the room so that people can speak to each other in real time. I have a method of doing this, but it is inefficient and choppy...
setInterval(() => {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
const mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", () => {
socket.emit('liveAudioToServer', audioChunks)
});
setTimeout(() => {
mediaRecorder.stop();
},2000);
});
}, 2000);
This snippet records audio and sends a buffer every two seconds so that the client side compiles it and plays it upon receiving the data. I know there's got to be another way to do this. I tried a different method, but just receive an error.
socket.on('active', () => {
if(navigator.getUserMedia) {
navigator.getUserMedia(
{audio: true},
function(stream) {
const audioContext3 = new AudioContext();
const audioSource3 = audioContext3.createMediaStreamSource(stream);
const analyser3 = audioContext3.createAnalyser();
audioSource3.connect(analyser3);
analyser3.fftSize = 256;
const bufferLength = analyser3.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
function sendAudioChunks(){
analyser3.getByteFrequencyData(dataArray);
requestAnimationFrame(sendAudioChunks);
socket.emit('liveAudioToServer', dataArray)
}
sendAudioChunks();
},
function(){ console.log("Error 003.")}
);
}
Can anyone help me?

How to transform an audioblob in wav file using Reactjs or Javascript?

I am working on VUI interface with Reactjs frontend. I got a BLOB file that I can play but I want to convert it to .WAV file using REACT or Javascript to send it to my server.
I tried lot of things, but found no solution
toggleRecording() {
if (this.state.start === 1) {
console.log("we start recording", this.state.start)
this.setState({ start: 0, recognition: "" })
const constraints = {
audio: {
sampleRate: 16000,
channelCount: 1,
}
}
navigator.mediaDevices.getUserMedia(constraints)
.then(stream => {
console.log(this);
this.recorder = new MediaRecorder(stream);
this.recorder.start();
const audioChunks = [];
this.recorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
this.recorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks, { 'type': 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
console.log("test: ", audioUrl)
console.log(audioBlob.type)
fetch('http://127.0.0.1:6060/api/sendaudio', {
method: "post",
headers: { 'Content-Type': 'audio/wav' },
body: audioBlob
})
.then(response => {
return response.text()
}).then(text => {
console.log(text);
this.setState({ recognition: text })
});
//to play the audio file:
const audio = new Audio(audioUrl);
audio.play();
});
});
}
I expect to get a Wav file to post to my server but don't know how to do that ....
You can try this package if you don't have a problem to add new dependency: https://www.npmjs.com/package/audiobuffer-to-wav
Hope it will work for you

Categories

Resources