Streaming into <audio> element - javascript

I would like to play audio from a web socket that sends packages of sound data of unknown total length. The playback should start as soon as the first package arrives and it should not be interrupted by new packages.
What I have done so far:
ws.onmessage = e => {
const soundDataBase64 = JSON.parse(e.data);
const bytes = window.atob(soundDataBase64);
const arrayBuffer = new window.ArrayBuffer(bytes.length);
const bufferView = new window.Uint8Array(arrayBuffer);
for (let i = 0; i < bytes.length; i++) {
bufferView[i] = bytes.charCodeAt(i);
}
const blob = new Blob([arrayBuffer], {"type": "audio/mp3"});
const objectURL = window.URL.createObjectURL(blob);
const audio = document.createElement("audio");
audio.src = objectURL;
audio.controls = "controls";
document.body.appendChild(audio);
};
However, to my knowledge, it is not possible to extend the size of ArrayBuffer and Uint8Array. I would have to create a new blob, object URL and assign it to the audio element. But I guess, this would interrupt the audio playback.
On the MDN page of <audio>, there is a hint to MediaStream, which looks promising. However, I am not quite sure how to write data onto a media stream and how to connect the media stream to an audio element.
Is it currently possible with JS to write something like pipe where I can input data on one end, which is then streamed to a consumer? How would seamless streaming be achieved in JS (preferably without a lot of micro management code)?

As #Kaiido pointed out in the comments, I can use the MediaSource object. After connecting a MediaSource object to an <audio> element in the DOM, I can add a SourceBuffer to an opened MediaSource object and then append ArrayBuffers to the SourceBuffer.
Example:
const ws = new window.WebSocket(url);
ws.onmessage = _ => {
console.log("Media source not ready yet... discard this package");
};
const mediaSource = new window.MediaSource();
const audio = document.createElement("audio");
audio.src = window.URL.createObjectURL(mediaSource);
audio.controls = true;
document.body.appendChild(audio);
mediaSource.onsourceopen = _ => {
const sourceBuffer = mediaSource.addSourceBuffer("audio/mpeg"); // mpeg appears to not work in Firefox, unfortunately :(
ws.onmessage = e => {
const soundDataBase64 = JSON.parse(e.data);
const bytes = window.atob(soundDataBase64);
const arrayBuffer = new window.ArrayBuffer(bytes.length);
const bufferView = new window.Uint8Array(arrayBuffer);
for (let i = 0; i < bytes.length; i++) {
bufferView[i] = bytes.charCodeAt(i);
}
sourceBuffer.appendBuffer(arrayBuffer);
};
};
I tested this successfully in Google Chrome 94. Unfortunately, in Firefox 92, the MIME type audio/mpeg seems not working. There, I get the error Uncaught DOMException: MediaSource.addSourceBuffer: Type not supported in MediaSource and the warning Cannot play media. No decoders for requested formats: audio/mpeg.

Related

How to continuously capture & save .jpg image frames DURING real time video stream?

Goal: I want to continuously capture & save .jpg image frames DURING real time video stream in Google Colab. I know how to convert the saved recorded video to .jpg image frames. I do not want that. I want to continuously capture & save .jpg image frames DURING real time video stream.
Current Code: I can successfully start, record, and save a real time video stream in Google Colab.
Thank you in advance for any help.
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
def record_video(filename='video.mp4'):
# This function uses the take_photo() function provided by the Colab team as a
# starting point, along with a bunch of stuff from Stack overflow, and some sample code
# from: https://developer.mozilla.org/enUS/docs/Web/API/MediaStream_Recording_API
js = Javascript("""
async function recordVideo() {
const options = { mimeType: "video/webm; codecs=vp9" };
const div = document.createElement('div');
const capture = document.createElement('button');
const stopCapture = document.createElement("button");
capture.textContent = "Start Recording";
capture.style.background = "green";
capture.style.color = "white";
stopCapture.textContent = "Stop Recording";
stopCapture.style.background = "red";
stopCapture.style.color = "white";
div.appendChild(capture);
const video = document.createElement('video');
const recordingVid = document.createElement("video");
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia({video: true});
let recorder = new MediaRecorder(stream, options);
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
await new Promise((resolve) => {
capture.onclick = resolve;
});
recorder.start();
capture.replaceWith(stopCapture);
/*Want to convert and save video stream to .jpg image frames continuously in real time, until stop is clicked.
Trying to access video that has been recorded to this point before stop is clicked.
while{
Blob realTime = recorder.requestData();
print("BLOB")
}
https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder/requestData
*/
await new Promise((resolve) => stopCapture.onclick = resolve);
recorder.stop();
let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
let arrBuff = await recData.data.arrayBuffer();
stream.getVideoTracks()[0].stop();
div.remove();
let binaryString = "";
let bytes = new Uint8Array(arrBuff);
bytes.forEach((byte) => {
binaryString += String.fromCharCode(byte);
})
return btoa(binaryString);
}
""")
try:
display(js)
data = eval_js('recordVideo({})')
binary = b64decode(data)
with open(filename, "wb") as video_file:
video_file.write(binary)
print(
f"Finished recording video. Saved binary under filename in current working directory: {filename}"
)
except Exception as err:
# In case any exceptions arise
print(str(err))
return filename
# Run the function, get the video path as saved in your notebook, and play it back here.
from IPython.display import HTML
from base64 import b64encode
video_width = 300
video_path = record_video()
video_file = open(video_path, "r+b").read()
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

What's the best way to get an audio buffer into a blob that can be played by an audio element?

I have an AudioBuffer stored as a variable, and I would like to have it be played by an Audio element. Here is my current non-functioning code:
const blob = new Blob(audioBuffer.getChannelData(1), { type: "audio/wav" });
const url = window.URL.createObjectURL(blob);
audioElement.src = url;
When I try to play audioElement, I get the following error:
Uncaught (in promise) DOMException: The element has no supported sources.
Does anyone have any ideas on how to solve this? Thanks in advance!
AudioBuffer is PCM data, not encoded as WAV yet. If you need WAV you should get a library to do the encoding for you, such as https://www.npmjs.com/package/audiobuffer-to-wav
After including above code (you can just copy the audioBufferToWav function and the functions it calls below it out of index.js).
const blob = new Blob([audioBufferToWav(audioBuffer.getChannelData(1))], { type: "audio/wav" });
const url = window.URL.createObjectURL(blob);
audioElement.src = url;
Below using Web Audio API to playback the PCM AudioBuffer directly.
var myArrayBuffer = audioBuffer;
var audioCtx = new (window.AudioContext || window.webkitAudioContext)();
var source = audioCtx.createBufferSource();
source.buffer = myArrayBuffer;
source.connect(audioCtx.destination);
source.start();

How to downsample audio recorded from mic realtime in javascript?

I am using following javascript to record audio and send it to a websocket server:
const recordAudio = () =>
new Promise(async resolve => {
const constraints = {
audio: {
sampleSize: 16,
channelCount: 1,
sampleRate: 8000
},
video: false
};
var mediaRecorder;
const stream = await navigator.mediaDevices.getUserMedia(constraints);
var options = {
audioBitsPerSecond: 128000,
mimeType: 'audio/webm;codecs=pcm'
};
mediaRecorder = new MediaRecorder(stream, options);
var track = stream.getAudioTracks()[0];
var constraints2 = track.getConstraints();
var settings = track.getSettings();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
webSocket.send(event.data);
});
const start = () => mediaRecorder.start(30);
const stop = () =>
new Promise(resolve => {
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
const play = () => audio.play();
resolve({
audioBlob,
audioUrl,
play
});
});
mediaRecorder.stop();
});
resolve({
start,
stop
});
});
This is for realtime STT and the websocket server refused to send any response. I checked by debugging that the sampleRate is not changing to 8Khz.Upon researching, I found out that this is a known bug on both chrome and firefox. I found some other resources like stackoverflow1 and IBM_STT but I have no idea on how to adapt it to my code.
The above helpful resources refers to buffer but all i have is mediaStream(stream) and event.data(blob) in my code.
I am new to both javascript and Audio Api, so please pardon me if i did something wrong.
If this helps, I have an equivalent code of python to send data from mic to websocket server which works. Library used = Pyaudio. Code :
p = pyaudio.PyAudio()
stream = p.open(format="pyaudio.paInt16",
channels=1,
rate= 8000,
input=True,
frames_per_buffer=10)
print("* recording, please speak")
packet_size = int((30/1000)*8000) # normally 240 packets or 480 bytes
frames = []
#while True:
for i in range(0, 1000):
packet = stream.read(packet_size)
ws.send(packet, binary=True)
To do realtime downsampling follow these steps:
First get stream instance using this:
const stream = await navigator.mediaDevices.getUserMedia(constraints);
Create media stream source from this stream.
var input = audioContext.createMediaStreamSource(stream);
Create script Processor so that you can play with buffers. I am going to create a script processor which takes 4096 samples from the stream at a time, continuously, has 1 input channel and 1 output channel.
var scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
Connect your input with scriptNode. You can connect script Node to the destination as per your requirement.
input.connect(scriptNode);
scriptNode.connect(audioContext.destination);
Now there is a function onaudioprocess in scriptProcessor where you can do whatever you want with 4096 samples. var downsample will contain (1/sampling ratio) number of packets. floatTo16BitPCM will convert that to your required format since the original data is in 32 bit float format.
var inputBuffer = audioProcessingEvent.inputBuffer;
// The output buffer contains the samples that will be modified and played
var outputBuffer = audioProcessingEvent.outputBuffer;
// Loop through the output channels (in this case there is only one)
for (var channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
var inputData = inputBuffer.getChannelData(channel);
var outputData = outputBuffer.getChannelData(channel);
var downsampled = downsample(inputData);
var sixteenBitBuffer = floatTo16BitPCM(downsampled);
}
Your sixteenBitBuffer will contain the data you require.
Functions for downsampling and floatTo16BitPCM are explained in this link of Watson API:IBM Watson Speech to Text Api
You won't need MediaRecorder instance. Watson API is opensource and you can look for a better streamline approach on how they implemented it for their use case. You should be able to salvage important functions from their code.

web audio analyze entire buffer

I have the audio buffer of a prerecorded audio file in my application.
I'm trying to get the frequency domain data of the ENTIRE audio track, this is what I've tried:
getAudioDataFromBuffer: function(buf){
var src = g.audioContext.createBufferSource();
src.buffer = buf;
var anal = src.context.createAnalyser();
src.connect(anal);
var dataArray = new Uint8Array(buf.length);
anal.fftSize = 2048;
anal.getByteFrequencyData(dataArray);
return dataArray;
},
But this only gives me an array full of zeros.
I need this to compare two audio tracks, one is prerecorded and the other is recorded in the application. I'm thinking I could measure the correlation between their frequency domains.
I arrived to the solution seeing this answer and this discussion.
Basically you need to use an OfflineAudioContext. Here the code staring from an already loaded audio buffer:
var offline = new OfflineAudioContext(2, buffer.length ,44100);
var bufferSource = offline.createBufferSource();
bufferSource.buffer = buffer;
var analyser = offline.createAnalyser();
var scp = offline.createScriptProcessor(256, 0, 1);
bufferSource.connect(analyser);
scp.connect(offline.destination); // this is necessary for the script processor to start
var freqData = new Uint8Array(analyser.frequencyBinCount);
scp.onaudioprocess = function(){
analyser.getByteFrequencyData(freqData);
console.log(freqData);
};
bufferSource.start(0);
offline.oncomplete = function(e){
console.log('analysed');
};
offline.startRendering();
Here's a working example using the latest version of the Web Audio API:
Note: You need to start with an audioBuffer.. you can get one using the new File System Access API:
const [fileHandle] = await window.showOpenFilePicker();
const file = await fileHandle.getFile();
const arrayBuffer = await file.arrayBuffer();
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
Once you have the audioBuffer, you can access it's contents using offlineAudioContext:
const offlineAudioContext = new OfflineAudioContext(
audioBuffer.numberOfChannels,
audioBuffer.length,
audioBuffer.sampleRate
);
const bufferSourceNode = offlineAudioContext.createBufferSource();
bufferSourceNode.start(0);
offlineAudioContext
.startRendering()
.then(renderedBuffer => {
const data = renderedBuffer.getChannelData(0);
for (let i = 0, length = data.length; i < length; i += 1) {
// careful here, as you can hang the browser by logging this data
// because 1 second of audio contains 22k ~ 96k samples!
if (!(i % 1000) && i < 250000) console.log(data[i]);
}
}
I think you need more something like
AudioBuffer.getChannelData()
Returns a Float32Array containing the PCM data associated with the channel, defined by the channel parameter (with 0 representing the first channel).
Lookup at Mozilla or W3C documentation.
Cheers
Kilian

Concatenating audio blobs

I tried concatenating audio blobs using Web RTC experiment by Muaz Khan, but when I play the concatenated audio, the HTML audio element does not show the full length of the audio file and also if you download and play, the issue will persist. I used ffmpeg to concate these blobs, though is there a way which can be used for concatenating audio blobs using the Web RTC js experiment by Muaz Khan. A similar attempt which also did not work out : Combine two audio blob recordings
The best way is to convert the blobs into AudioBuffers (Convert blob into ArrayBuffer using FileReader and then decode those arrayBuffers into AudioBuffers). You can then merge/combine more than one AudioBuffers and get the resultant.
Following code will work in such situation:
var blob="YOUR AUDIO BLOB";
var f = new FileReader();
f.onload = function (e) {
audioContext.decodeAudioData(e.target.result, function (buffer) {
arrayBuffer.push(buffer);
if (arrayBuffer.length > 1) {
resultantbuffer = appendBuffer(arrayBuffer[0], arrayBuffer[1]);
arrayBuffer = [];
arrayBuffer.push(resultantbuffer);
}
else {
resultantbuffer = buffer;
}
}, function (e) {
console.warn(e);
});
};
f.readAsArrayBuffer(blob);
This code read the blob and convert into arrayBuffer (e.target.result) and decode those buffers into AudioBuffers (buffer). I used appendBuffer method for appending more than one audioBuffers. Here is the method:
function appendBuffer(buffer1, buffer2) {
///Using AudioBuffer
var numberOfChannels = Math.min(buffer1.numberOfChannels, buffer2.numberOfChannels);
var tmp = recordingAudioContext.createBuffer(numberOfChannels, (buffer1.length + buffer2.length), buffer1.sampleRate);
for (var i = 0; i < numberOfChannels; i++) {
var channel = tmp.getChannelData(i);
channel.set(buffer1.getChannelData(i), 0);
channel.set(buffer2.getChannelData(i), buffer1.length);
}
return tmp;
}
Do let me know if you face any problem.

Categories

Resources