I would like to do speech analysis in the browser. I have a microphone input as my main stream that is created when I start the speech recognition object. I would like to get frequencies from the same stream. How do I connect the audio context source to the same microphone stream as the voice recognition one? Do I have to request for microphone permissions twice? I tried the code below but the getMicData() will only log '0' values.
JS
var SpeechRecognition = SpeechRecognition || webkitSpeechRecognition
var requestAnimationFrame = window.requestAnimationFrame || window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame || window.msRequestAnimationFrame;
var cancelAnimationFrame = window.cancelAnimationFrame || window.mozCancelAnimationFrame;
let audioCtx, analyser;
let amplitude;
let bufferLength;
let dataArray;
let bassArray;
let trebleArray;
let recognition = new SpeechRecognition();
recognition.continuous = false;
recognition.lang = 'en-US';
recognition.interimResults = false;
recognition.maxAlternatives = 1
let animationRequest;
const recordbtn = document.getElementById('record');
recordbtn.addEventListener('click', () => {
// start speech rec
recognition.start();
audioCtx = new(window.AudioContext || window.webkitAudioContext)();
analyser = audioCtx.createAnalyser();
analyser.fftSize = 512;
analyser.smoothingTimeConstant = 0.85;
})
recognition.onstart = function () {
document.getElementById('font-name').innerHTML = "START SPEAKING";
getMicData()
}
recognition.onspeechend = function () {
cancelAnimationFrame(animationRequest);
}
function getMicData() {
animationRequest = window.requestAnimationFrame(getMicData)
bufferLength = analyser.fftSize;
dataArray = new Uint8Array(bufferLength);
analyser.getByteFrequencyData(dataArray);
let maxAmp = 0;
let sumOfAmplitudes = 0;
for (let i = 0; i < bufferLength; i++) {
let thisAmp = dataArray[i]; // amplitude of current bin
if (thisAmp > maxAmp) {
sumOfAmplitudes = sumOfAmplitudes + thisAmp;
}
}
let averageAmplitude = sumOfAmplitudes / bufferLength;
console.log(averageAmplitude)
return averageAmplitude;
}
Related
I have a web application of my own, which is based on the peerjs library (It is a video conference).
I'm trying to make a recording with 'MediaRecorder', but I'm facing a very unpleasant case.
The code for capturing my desktop stream is the following:
let chooseScreen = document.querySelector('.chooseScreenBtn')
chooseScreen.onclick = async () => {
let desktopStream = await navigator.mediaDevices.getDisplayMedia({ video:true, audio: true });
}
I then successfully apply my received desktopStream to videoElement in DOM:
const videoElement = doc.querySelector('.videoElement')
videoElement.srcObject = desktopStream
videoElement.muted = false;
videoElement.onloadedmetadata = ()=>{videoElement.play();}
For example, I get desktopStream on the page with an active conference where everyone hears and sees each other.
To check the video and audio in desktopStream I play some video on the video player on the desktop.
I can hear any audio from my desktop but audio from any participant cannot be heard.
Of course, when I put the desktopStream in MediaRecorder I get a video file with no sound from anyone except my desktop. Any ideas on how to solve it?
Chrome's MediaRecorder API can only output one track.
The createMediaStreamSource can take streams from desktop audio and microphone, by connecting both together into one object created by createMediaStreamDestination it gives you the ability to pipe this one stream into the MediaRecorder API.
const mergeAudioStreams = (desktopStream, voiceStream) => {
const context = new AudioContext();
// Create a couple of sources
const source1 = context.createMediaStreamSource(desktopStream);
const source2 = context.createMediaStreamSource(voiceStream);
const destination = context.createMediaStreamDestination();
const desktopGain = context.createGain();
const voiceGain = context.createGain();
desktopGain.gain.value = 0.7;
voiceGain.gain.value = 0.7;
source1.connect(desktopGain).connect(destination);
// Connect source2
source2.connect(voiceGain).connect(destination);
return destination.stream.getAudioTracks();
};
It is also possible to use two or more audio inputs + video input.
window.onload = () => {
const warningEl = document.getElementById('warning');
const videoElement = document.getElementById('videoElement');
const captureBtn = document.getElementById('captureBtn');
const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const download = document.getElementById('download');
const audioToggle = document.getElementById('audioToggle');
const micAudioToggle = document.getElementById('micAudioToggle');
if('getDisplayMedia' in navigator.mediaDevices) warningEl.style.display = 'none';
let blobs;
let blob;
let rec;
let stream;
let voiceStream;
let desktopStream;
const mergeAudioStreams = (desktopStream, voiceStream) => {
const context = new AudioContext();
const destination = context.createMediaStreamDestination();
let hasDesktop = false;
let hasVoice = false;
if (desktopStream && desktopStream.getAudioTracks().length > 0) {
// If you don't want to share Audio from the desktop it should still work with just the voice.
const source1 = context.createMediaStreamSource(desktopStream);
const desktopGain = context.createGain();
desktopGain.gain.value = 0.7;
source1.connect(desktopGain).connect(destination);
hasDesktop = true;
}
if (voiceStream && voiceStream.getAudioTracks().length > 0) {
const source2 = context.createMediaStreamSource(voiceStream);
const voiceGain = context.createGain();
voiceGain.gain.value = 0.7;
source2.connect(voiceGain).connect(destination);
hasVoice = true;
}
return (hasDesktop || hasVoice) ? destination.stream.getAudioTracks() : [];
};
captureBtn.onclick = async () => {
download.style.display = 'none';
const audio = audioToggle.checked || false;
const mic = micAudioToggle.checked || false;
desktopStream = await navigator.mediaDevices.getDisplayMedia({ video:true, audio: audio });
if (mic === true) {
voiceStream = await navigator.mediaDevices.getUserMedia({ video: false, audio: mic });
}
const tracks = [
...desktopStream.getVideoTracks(),
...mergeAudioStreams(desktopStream, voiceStream)
];
console.log('Tracks to add to stream', tracks);
stream = new MediaStream(tracks);
console.log('Stream', stream)
videoElement.srcObject = stream;
videoElement.muted = true;
blobs = [];
rec = new MediaRecorder(stream, {mimeType: 'video/webm; codecs=vp8,opus'});
rec.ondataavailable = (e) => blobs.push(e.data);
rec.onstop = async () => {
blob = new Blob(blobs, {type: 'video/webm'});
let url = window.URL.createObjectURL(blob);
download.href = url;
download.download = 'test.webm';
download.style.display = 'block';
};
startBtn.disabled = false;
captureBtn.disabled = true;
audioToggle.disabled = true;
micAudioToggle.disabled = true;
};
startBtn.onclick = () => {
startBtn.disabled = true;
stopBtn.disabled = false;
rec.start();
};
stopBtn.onclick = () => {
captureBtn.disabled = false;
audioToggle.disabled = false;
micAudioToggle.disabled = false;
startBtn.disabled = true;
stopBtn.disabled = true;
rec.stop();
stream.getTracks().forEach(s=>s.stop())
videoElement.srcObject = null
stream = null;
};
};
Audio capture with getDisplayMedia is only fully supported with Chrome for Windows. Other platforms have a number of limitations:
there is no support for audio capture at all under Firefox or Safari;
on Chrome/Chromium for Linux and Mac OS, only the audio of a Chrome/Chromium tab can be captured, not the audio of a non-browser application window.
I am looking for a solution to allow a website to access and process a stream of audio from the users Microphone. It's unfamiliar territory for me. I've been working with webRTC examples but so far have only got it working on:
firefox and chrome on a 2011 mac air running Sierra.
firefox on windows 10.
My script throws errors on other Browser/OS combinations and with some it doesn't but regardless it doesn't function.
Is there a better solution?
!function(t,e){
"use strict";
t.AudioContext = t.AudioContext||t.webkitAudioContext,
t.OfflineAudioContext = t.OfflineAudioContext||t.webkitOfflineAudioContext;
var o=AudioContext.prototype,
r=new AudioContext,n=function(t,e){
return void 0===t&&void 0!==e
},
c=r.createBufferSource().constructor.prototype;
if(n(c.start,c.noteOn)||n(c.stop,c.noteOff)){
var i=o.createBufferSource;
o.createBufferSource=function(){
var t=i.call(this);
return t.start=t.start||t.noteOn,t.stop=t.stop||t.noteOff,t
}
}
if("function"==typeof r.createOscillator){
var a=r.createOscillator().constructor.prototype;
if(n(a.start,a.noteOn)||n(a.stop,a.noteOff)){
var s=o.createOscillator;o.createOscillator=function(){
var t=s.call(this);
return t.start=t.start||t.noteOn,t.stop=t.stop||t.noteOff,t
}
}
}
if(void 0===o.createGain&&void 0!==o.createGainNode&&(o.createGain=o.createGainNode),void 0===o.createDelay&&void 0!==o.createDelayNode&&(o.createDelay=o.createGainNode),void 0===o.createScriptProcessor&&void 0!==o.createJavaScriptNode&&(o.createScriptProcessor=o.createJavaScriptNode),-1!==navigator.userAgent.indexOf("like Mac OS X")){
var u=AudioContext;t.AudioContext=function(){
function t(){
r.start(0),r.connect(n),n.connect(e.destination)
}
var e=new u,
o=document.body,
r=e.createBufferSource(),
n=e.createScriptProcessor(256,1,1);
return o.addEventListener("touchstart",t,!1),n.onaudioprocess=function(){
r.disconnect(),
n.disconnect(),
o.removeEventListener("touchstart",t,!1),
n.onaudioprocess=null
},e
}
}
}(window);
var context, analyser, gUM, dataArray, bufferLength, connect_source;
if (AudioContext){
context = new AudioContext();
analyser = context.createAnalyser();
function success(stream){
// Create a new volume meter and connect it.
var source = context.createMediaStreamSource(stream);
compressor = context.createDynamicsCompressor();
compressor.threshold.value = -50;
compressor.knee.value = 40;
compressor.ratio.value = 12;
compressor.reduction.value = -20;
compressor.attack.value = 0;
compressor.release.value = 0.25;
filter = context.createBiquadFilter();
filter.Q.value = 8.30;
filter.frequency.value = 355;
filter.gain.value = 3.0;
filter.type = 'bandpass';
filter.connect(compressor);
source.connect( filter );
source.connect(analyser);
analyser.fftSize = 512;
bufferLength = analyser.frequencyBinCount; // half the FFT value
dataArray = new Uint8Array(bufferLength); // create an array to store the data
};
function fail(e){
if(e){}
console.log(e);
aizuchi.error();
};
var select = document.getElementById("AudioSourceSelect");
function generateSelector(devices){
while(select.firstChild) select.removeChild(select.firstChild);
var opt;
for(var l = devices.length; l--;){
console.log(devices[l]);
if(devices[l].kind == "audioinput"){
opt = document.createElement("option")
opt.text = devices[l].label
opt.value = devices[l].deviceId
if(devices[l].deviceId == "default") opt.setAttribute("selected","")
select.appendChild( opt );
}
}
select.onchange = function(){
connect_source(this.value);
}
select.onchange();
}
try {
navigator.mediaDevices.enumerateDevices().then(generateSelector)
} catch(e){
fail(e);
}
connect_source = function(audioSource){
try {
if(Modernizr.getusermedia){
gUM = Modernizr.prefixed('getUserMedia', navigator);
gUM({video:false, audio : {deviceId: audioSource ? {exact: audioSource} : undefined}},success,fail);
} else {
navigator.mediaDevices.getUserMedia({video:false, audio : {deviceId: audioSource ? {exact: audioSource} : undefined}}).then(success,fail);
}
} catch(e){
fail(e);
}
}
}
Try
var AudioContext = window.AudioContext || window.webkitAudioContext;
var context = new AudioContext();
It doesn't look like the browsers have unified on the syntax of this one yet.
Source: MDN
I am currently trying to simulate audio autorecording. User speaks, and after he stops then audio should be submitted to the backend.
I already have a sample script that submits audio with start and stop click functions.
I'm trying to get sometype of value such as Amplitude, Volume or maybe a Threshold but I'm not sure if MediaRecorder supports this or if I need to look at Web Audio API or other solutions.
Can I achieve this with MediaRecorder?
Regarding the audio analysis of the mic input, the following example shows you how to take the audio captured by the mic, create an analyzer with createAnalyser method of the webkitAudioContext, connect the stream to the analyzer and calculate the FFT of the specified size, in order to calculate pitch and display the output sound wave.
window.AudioContext = window.AudioContext || window.webkitAudioContext;
var audioContext = null;
var isPlaying = false;
var sourceNode = null;
var analyser = null;
var theBuffer = null;
var audioCtx = null;
var mediaStreamSource = null;
var rafID = null;
var j = 0;
var waveCanvas = null;
window.onload = function() {
audioContext = new AudioContext();
audioCtx = document.getElementById( "waveform" );
canvasCtx = audioCtx.getContext("2d");
};
function getUserMedia(dictionary, callback) {
try {
navigator.getUserMedia =
navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia;
navigator.getUserMedia(dictionary, callback, error);
} catch (e) {
alert('getUserMedia threw exception :' + e);
}
}
function gotStream(stream) {
// Create an AudioNode from the stream.
mediaStreamSource = audioContext.createMediaStreamSource(stream);
// Connect it to the destination.
analyser = audioContext.createAnalyser();
analyser.fftSize = 1024;
mediaStreamSource.connect( analyser );
updatePitch();
}
function toggleLiveInput()
{
canvasCtx.clearRect(0, 0, audioCtx.width, audioCtx.height);
canvasCtx.beginPath();
j = 0;
buflen = 1024;
buf = new Float32Array( buflen );
document.getElementById('toggleLiveInput').disabled = true;
document.getElementById('toggleLiveInputStop').disabled = false;
if (isPlaying) {
//stop playing and return
sourceNode.stop( 0 );
sourceNode = null;
//analyser = null;
isPlaying = false;
if (!window.cancelAnimationFrame)
window.cancelAnimationFrame = window.webkitCancelAnimationFrame;
window.cancelAnimationFrame( rafID );
}
getUserMedia(
{
"audio": {
"mandatory": {
"googEchoCancellation": "false",
"googAutoGainControl": "false",
"googNoiseSuppression": "false",
"googHighpassFilter": "false"
},
"optional": []
},
}, gotStream);
}
function stop()
{
document.getElementById('toggleLiveInput').disabled = false;
document.getElementById('toggleLiveInputStop').disabled = true;
//waveCanvas.closePath();
if (!window.cancelAnimationFrame)
window.cancelAnimationFrame = window.webkitCancelAnimationFrame;
window.cancelAnimationFrame( rafID );
return "start";
}
function updatePitch()
{
analyser.fftSize = 1024;
analyser.getFloatTimeDomainData(buf);
canvasCtx.strokeStyle = "red";
for (var i=0;i<2;i+=2)
{
x = j*5;
if(audioCtx.width < x)
{
x = audioCtx.width - 5;
previousImage = canvasCtx.getImageData(5, 0, audioCtx.width, audioCtx.height);
canvasCtx.putImageData(previousImage, 0, 0);
canvasCtx.beginPath();
canvasCtx.lineWidth = 2;
canvasCtx.strokeStyle = "red";
prex = prex - 5;
canvasCtx.lineTo(prex,prey);
prex = x;
prey = 128+(buf[i]*128);
canvasCtx.lineTo(x,128+(buf[i]*128));
canvasCtx.stroke();
}
else
{
prex = x;
prey = 128+(buf[i]*128);
canvasCtx.lineWidth = 2;
canvasCtx.lineTo(x,128+(buf[i]*128));
canvasCtx.stroke();
}
j++;
}
if (!window.requestAnimationFrame)
window.requestAnimationFrame = window.webkitRequestAnimationFrame;
rafID = window.requestAnimationFrame( updatePitch );
}
function error() {
console.error(new Error('error while generating audio'));
}
Try the demo here.
Example adapted from pitch-liveinput.
I'm trying to get a stream of data from my microphone (ex. volume, pitch).
For now, I've been using getUserMedia to access my microphone audio.
But I couldn't find a way to extract the data from it.
My code :
$(function () {
var audioContext = new AudioContext();
var audioInput = null,
realAudioInput = null,
inputPoint = null,
analyserNode = null;
if (!navigator.getUserMedia)
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia || navigator.msGetUserMedia;
if (navigator.getUserMedia){
navigator.getUserMedia({audio:true}, success, function(e) {
alert('Error capturing audio.');
});
} else alert('getUserMedia not supported in this browser.');
function success(stream){
inputPoint = audioContext.createGain();
realAudioInput = audioContext.createMediaStreamSource(stream);
audioInput = realAudioInput;
audioInput.connect(inputPoint);
analyserNode = audioContext.createAnalyser();
analyserNode.fftSize = 2048;
inputPoint.connect( analyserNode );
}
function live(){
requestAnimationFrame(live);
var freqByteData = new Uint8Array(analyserNode.frequencyBinCount);
analyserNode.getByteFrequencyData(freqByteData);
console.log(analyserNode.frequencyBinCount);
}
});
Here is a version of your code which does two things :
retrieves raw PCM audio buffer from the live microphone which is sent to console.log (to show javascript console hit ctrl-shift-i ), this is the PCM raw audio curve of streaming mic audio data in the time domain.
It also runs this same audio data into a FFT (fast Fourier transform) which is also sent to console.log, this is the frequency domain representation of the same Web Audio API event loop buffer
NOTE - either wear headphones OR turn down your speaker volume otherwise you will hear the squeal of audio feedback as the mic will pickup speaker audio a la Jimmy Hendrix !
<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>capture microphone then show time & frequency domain output</title>
<script type="text/javascript">
var webaudio_tooling_obj = function () {
var audioContext = new AudioContext();
console.log("audio is starting up ...");
var BUFF_SIZE_RENDERER = 16384;
var audioInput = null,
microphone_stream = null,
gain_node = null,
script_processor_node = null,
script_processor_analysis_node = null,
analyser_node = null;
if (!navigator.getUserMedia)
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia || navigator.msGetUserMedia;
if (navigator.getUserMedia){
navigator.getUserMedia({audio:true},
function(stream) {
start_microphone(stream);
},
function(e) {
alert('Error capturing audio.');
}
);
} else { alert('getUserMedia not supported in this browser.'); }
// ---
function show_some_data(given_typed_array, num_row_to_display, label) {
var size_buffer = given_typed_array.length;
var index = 0;
console.log("__________ " + label);
if (label === "time") {
for (; index < num_row_to_display && index < size_buffer; index += 1) {
var curr_value_time = (given_typed_array[index] / 128) - 1.0;
console.log(curr_value_time);
}
} else if (label === "frequency") {
for (; index < num_row_to_display && index < size_buffer; index += 1) {
console.log(given_typed_array[index]);
}
} else {
throw new Error("ERROR - must pass time or frequency");
}
}
function process_microphone_buffer(event) {
var i, N, inp, microphone_output_buffer;
microphone_output_buffer = event.inputBuffer.getChannelData(0); // just mono - 1 channel for now
}
function start_microphone(stream){
gain_node = audioContext.createGain();
gain_node.connect( audioContext.destination );
microphone_stream = audioContext.createMediaStreamSource(stream);
microphone_stream.connect(gain_node);
script_processor_node = audioContext.createScriptProcessor(BUFF_SIZE_RENDERER, 1, 1);
script_processor_node.onaudioprocess = process_microphone_buffer;
microphone_stream.connect(script_processor_node);
// --- enable volume control for output speakers
document.getElementById('volume').addEventListener('change', function() {
var curr_volume = this.value;
gain_node.gain.value = curr_volume;
console.log("curr_volume ", curr_volume);
});
// --- setup FFT
script_processor_analysis_node = audioContext.createScriptProcessor(2048, 1, 1);
script_processor_analysis_node.connect(gain_node);
analyser_node = audioContext.createAnalyser();
analyser_node.smoothingTimeConstant = 0;
analyser_node.fftSize = 2048;
microphone_stream.connect(analyser_node);
analyser_node.connect(script_processor_analysis_node);
var buffer_length = analyser_node.frequencyBinCount;
var array_freq_domain = new Uint8Array(buffer_length);
var array_time_domain = new Uint8Array(buffer_length);
console.log("buffer_length " + buffer_length);
script_processor_analysis_node.onaudioprocess = function() {
// get the average for the first channel
analyser_node.getByteFrequencyData(array_freq_domain);
analyser_node.getByteTimeDomainData(array_time_domain);
// draw the spectrogram
if (microphone_stream.playbackState == microphone_stream.PLAYING_STATE) {
show_some_data(array_freq_domain, 5, "frequency");
show_some_data(array_time_domain, 5, "time"); // store this to record to aggregate buffer/file
}
};
}
}(); // webaudio_tooling_obj = function()
</script>
</head>
<body>
<p>Volume</p>
<input id="volume" type="range" min="0" max="1" step="0.1" value="0.5"/>
</body>
</html>
How to fix this?
its failing in past it was working fine but not anymore.
(Normally it show green bar in the canvas if you speak in the mic.)
<script type="text/javascript">
var navigator = window.navigator;
navigator.getMedia = ( navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getMedia({ video: false, audio: true}, function(stream) {
console.log('doing....');
audioContext = new webkitAudioContext();
analyser = audioContext.createAnalyser();
microphone = audioContext.createMediaStreamSource(stream);
javascriptNode = audioContext.createJavaScriptNode(2048, 1, 1);
analyser.smoothingTimeConstant = 0.3;
analyser.fftSize = 1024;
microphone.connect(analyser);
analyser.connect(javascriptNode);
javascriptNode.connect(audioContext.destination);
canvasContext = document.getElementById("test");
canvasContext= canvasContext.getContext("2d");
javascriptNode.onaudioprocess = function() {
console.log('doing.... bla bla');
var array = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(array);
var values = 0;
var length = array.length;
for (var i = 0; i < length; i++) {
values += array[i];
}
var average = values / length;
canvasContext.clearRect(0, 0, 300, 130);
canvasContext.fillStyle = '#00ff00';
canvasContext.fillRect(0,130-average,300,130);
}
console.log('doing.... done');
}, function(err) {
console.log("An error occured! " + err);
});
</script>
webkitAudioContext() does not have createJavaScriptNode and I believe you should not use it anywhere.
Try javascriptNode = audioContext.createScriptProcessor(2048, 1, 1);
createJavaScriptNode() has been marked as obsolete (https://developer.mozilla.org/en-US/docs/Web/API/AudioContext.createJavaScriptNode), and it's use is now discouraged. Looks like the method name has been changed to createScriptProcessor(), here's some doc on it: https://developer.mozilla.org/en-US/docs/Web/API/AudioContext.createScriptProcessor
Hope this helps!