This is my script:
<script src="https://sdk.amazonaws.com/js/aws-sdk-2.7.13.min.js"></script>
<script>
AWS.config.region = 'eu-west-1';
AWS.config.accessKeyId = 'FOO';
AWS.config.secretAccessKey = 'BAR';
var polly = new AWS.Polly({apiVersion: '2016-06-10'});
var params = {
OutputFormat: 'mp3', /* required */
Text: 'Hello world', /* required */
VoiceId: 'Joanna', /* required */
SampleRate: '22050',
TextType: 'text'
};
polly.synthesizeSpeech(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
</script>
The request succeeds, and I get this kind of response:
How do I use this kind of response? I understand that the response is deserialized audio, but how do I actually play it, say, inside a HTML5 audio element?
Furthermore, this answer on SO explains why is this type of array suitable for audio data: https://stackoverflow.com/a/26320913/1325575
var uInt8Array = new Uint8Array(audioStream);
var arrayBuffer = uInt8Array.buffer;
var blob = new Blob([arrayBuffer]);
var url = URL.createObjectURL(blob);
audioElement.src = url;
audioElement.play();
I created a Javascript library called ChattyKathy that will handle the entire process for you if you want to take the easy way out.
Just pass it an AWS Credentials object and then tell her what to say. She'll call AWS, transform the response, and play the audio.
var settings = {
awsCredentials: awsCredentials,
awsRegion: "us-west-2",
pollyVoiceId: "Justin",
cacheSpeech: true
}
var kathy = ChattyKathy(settings);
kathy.Speak("Hello world, my name is Kathy!");
kathy.Speak("I can be used for an amazing user experience!");
Elliott's Chatty Kathy code worked beautifully for me, but there are two separate issues with Safari and mobile.
Safari: When creating the blob, the content type MUST be specified:
var blob = new Blob([arrayBuffer], {type: 'audio/mpeg'});
url = webkitURL.createObjectURL(blob);
Mobile: The above must be true, plus playback needs to be initiated by a user touch event. Note: Older iOS versions seem to require that playback be initiated in the same thread as the touch event, so a touch event that initiates a promise chain that eventually calls audio.play() will fail. Later iOS versions seem to be smarter about this.
Using the Web Audio API:
const result = await polly.synthesizeSpeech(params).promise();
const aContext = new AudioContext();
const source = aContext.createBufferSource();
source.buffer = await aContext.decodeAudioData(result.AudioStream.buffer);
source.connect(aContext.destination);
source.start();
Docs:
AudioContext
Decode ArrayBuffer
Related
Using media recorder, I am able to upload and append the video blobs on azure. But combined video is not seekable on download with following code -
var chunks =[];
var mediaRecorder = new MediaRecorder(stream, 'video/x-matroska;codecs=vp8,opus');
mediaRecorder.ondataavailable = function(event) {
if(event.data && event.data.size > 0) {
chunks.push(event.data);
appendBlockToAzure(chunks);
}
};
mediaRecorder.start(10000);
I tried using EBML.js, if I use the following code then I get the seekable video file. This approach needs the file to be processed at the end. Therefore, final file could be of 1GB in size which will take very long time to upload.
var chunks =[];
var mediaRecorder = new MediaRecorder(stream, 'video/x-matroska;codecs=vp8,opus');
mediaRecorder.ondataavailable = function(event) {
if(event.data && event.data.size > 0) {
chunks.push(event.data);
if(mediaRecorder.state == "inactive") { //if media recorder is stopped
var combined = new Blob(chunks, { type: event.data.type });
getSeekableBlob(combined, function (seekableBlob) {
saveCombinedVideoToAzure(seekableBlob);
});
}
}
};
mediaRecorder.start(10000);
That's the reason I want to upload simultaneously to the azure. If I use the following code, then it logs unknown tag warnings and then length error. Also, the video file is not playable.
var seekablechunks =[];
var mediaRecorder = new MediaRecorder(stream, 'video/x-matroska;codecs=vp8,opus');
mediaRecorder.ondataavailable = function(event) {
if(event.data && event.data.size > 0) {
getSeekableBlob(event.data, function (seekableBlob) {
seekablechunks.push(seekableBlob);
saveCombinedVideoToAzure(seekablechunks);
});
}
};
mediaRecorder.start(10000);
Function 'getSeekableBlob':
function getSeekableBlob(inputBlob, callback) {
// EBML.js copyrights goes to: https://github.com/legokichi/ts-ebml
if(typeof EBML === 'undefined') {
throw new Error('Please link: https://www.webrtc- experiment.com/EBML.js');
}
var reader = new EBML.Reader();
var decoder = new EBML.Decoder();
var tools = EBML.tools;
var fileReader = new FileReader();
fileReader.onload = function (e) {
var ebmlElms = decoder.decode(this.result);
ebmlElms.forEach(function (element) {
reader.read(element);
});
reader.stop();
var refinedMetadataBuf = tools.makeMetadataSeekable(reader.metadatas, reader.duration, reader.cues);
var body = this.result.slice(reader.metadataSize);
var newBlob = new Blob([refinedMetadataBuf, body], {
type: 'video/webm'
});
callback(newBlob);
};
fileReader.readAsArrayBuffer(inputBlob);
}
Is there a way to get seekable blobs and upload them to azure?
It's a challenge for an open-ended streaming source for media (for example MediaRecorder) to create a file with SeekHead elements in it. The Seek elements in a SeekHead element contain byte offsets to elements in the file.
MediaRecorder doesn't create segments or SeekHead elements as you have discovered. To do so it would need to be able to see the future to know how big future compressed video and audio elements will be in the file.
A good way for you to handle this problem might be to post-process your uploaded files on a server. You can use ts-ebml to do this in a streaming fashion on a server when a file is completely uploaded.
It's possible, I suppose, to create Javascript software in your browser that can transform the stream of data emitted by MediaRecorder so it's seekable, on the fly. To make your stream seekeable you'd need to insert SeekHead elements every so often. You'd buffer up multiple seconds of the stream, then locate the Cluster elements in each buffer, then write a SeekHead element pointing to some of them. (Chrome's MediaRecorder outputs Clusters beginning with video key frames.) If you succeed in doing this you'll know a lot about Matroska / webm.
Suddenly, our Face on camera web-cam recorder component stopped saving webm blob.
In the console there were warnings about {EBML_ID: "55b0", type: "unknown", ...} during reader.read(element) and then
"Uncaught (in promise) Error: No schema entry found for unknown" in EBMLEncoder.js" at tools.makeMetadataSeekable(...) call.
Ignoring unknown elements from the decoder workarounded the issue:
...
var ebmlElms = decoder.decode(this.result);
ebmlElms.forEach(function (element) {
if (element.type !== 'unknown') {
reader.read(element);
}
});
reader.stop();
...
Related issue on ts-ebml npm package https://github.com/legokichi/ts-ebml/issues/33 with similar workaround
I am trying to develop a canva-like Insta story creator using Canvas and MediaRecorder
The app is working perfectly on a desktop browser - I am able to download the file, and play it on desktop. However, when I send that file to my mobile, it doesn't play(even on Insta). I figure this is an issue with codecs - but don't know how to solve the same.
This is the function that handles the mediaRecorderAPI
Is there any mime type that I can use, that is universal and can play for any device?
initRecorder () {
var dl = document.querySelector("#dl")
let videoStream = this.canvas.captureStream(60);
if(this.isAudioPresent) {
videoStream.addTrack(this.audioStream.getAudioTracks()[0])
}
let mediaRecorder = new MediaRecorder(videoStream, {
videoBitsPerSecond : 2500000,
mime: 'video/webm'
});
let chunks = [];
mediaRecorder.onstop = function(e) {
var blob = new Blob(chunks, { 'type' : 'video/webm' });
chunks = [];
var videoURL = URL.createObjectURL(blob);
dl.href = videoURL;
};
mediaRecorder.ondataavailable = function(e) {
e.data.size && chunks.push(e.data);
};
mediaRecorder.start();
setTimeout(function (){ mediaRecorder.stop(); },this.storytime);
}
```
Figured this out: Different browsers use different transcoding. Insta only accepts MP4 transcoding. Hence, you need to use either a transcoder on the frontend(ffmpeg.js or wasm version of ffmpeg) or send your data to backend and handle there(which I ended up doing)
I am using following javascript to record audio and send it to a websocket server:
const recordAudio = () =>
new Promise(async resolve => {
const constraints = {
audio: {
sampleSize: 16,
channelCount: 1,
sampleRate: 8000
},
video: false
};
var mediaRecorder;
const stream = await navigator.mediaDevices.getUserMedia(constraints);
var options = {
audioBitsPerSecond: 128000,
mimeType: 'audio/webm;codecs=pcm'
};
mediaRecorder = new MediaRecorder(stream, options);
var track = stream.getAudioTracks()[0];
var constraints2 = track.getConstraints();
var settings = track.getSettings();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
webSocket.send(event.data);
});
const start = () => mediaRecorder.start(30);
const stop = () =>
new Promise(resolve => {
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
const play = () => audio.play();
resolve({
audioBlob,
audioUrl,
play
});
});
mediaRecorder.stop();
});
resolve({
start,
stop
});
});
This is for realtime STT and the websocket server refused to send any response. I checked by debugging that the sampleRate is not changing to 8Khz.Upon researching, I found out that this is a known bug on both chrome and firefox. I found some other resources like stackoverflow1 and IBM_STT but I have no idea on how to adapt it to my code.
The above helpful resources refers to buffer but all i have is mediaStream(stream) and event.data(blob) in my code.
I am new to both javascript and Audio Api, so please pardon me if i did something wrong.
If this helps, I have an equivalent code of python to send data from mic to websocket server which works. Library used = Pyaudio. Code :
p = pyaudio.PyAudio()
stream = p.open(format="pyaudio.paInt16",
channels=1,
rate= 8000,
input=True,
frames_per_buffer=10)
print("* recording, please speak")
packet_size = int((30/1000)*8000) # normally 240 packets or 480 bytes
frames = []
#while True:
for i in range(0, 1000):
packet = stream.read(packet_size)
ws.send(packet, binary=True)
To do realtime downsampling follow these steps:
First get stream instance using this:
const stream = await navigator.mediaDevices.getUserMedia(constraints);
Create media stream source from this stream.
var input = audioContext.createMediaStreamSource(stream);
Create script Processor so that you can play with buffers. I am going to create a script processor which takes 4096 samples from the stream at a time, continuously, has 1 input channel and 1 output channel.
var scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
Connect your input with scriptNode. You can connect script Node to the destination as per your requirement.
input.connect(scriptNode);
scriptNode.connect(audioContext.destination);
Now there is a function onaudioprocess in scriptProcessor where you can do whatever you want with 4096 samples. var downsample will contain (1/sampling ratio) number of packets. floatTo16BitPCM will convert that to your required format since the original data is in 32 bit float format.
var inputBuffer = audioProcessingEvent.inputBuffer;
// The output buffer contains the samples that will be modified and played
var outputBuffer = audioProcessingEvent.outputBuffer;
// Loop through the output channels (in this case there is only one)
for (var channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
var inputData = inputBuffer.getChannelData(channel);
var outputData = outputBuffer.getChannelData(channel);
var downsampled = downsample(inputData);
var sixteenBitBuffer = floatTo16BitPCM(downsampled);
}
Your sixteenBitBuffer will contain the data you require.
Functions for downsampling and floatTo16BitPCM are explained in this link of Watson API:IBM Watson Speech to Text Api
You won't need MediaRecorder instance. Watson API is opensource and you can look for a better streamline approach on how they implemented it for their use case. You should be able to salvage important functions from their code.
I'm trying to broadcast a video from my webcam in javascript. I'm using MediaStream to get the video from my webcam, MediaRecorder to record such video in chunks (which would be transmitted to the server), and MediaSource to assemble these chunks and play them seamlessly in a video container called watchVideo on the source below.
It all works perfectly when i'm capturing only video, i.e. constraints = { video: true } ; but if I add audio, the watchVideo doesn't display anything, and the console shows me the following error:
Uncaught DOMException: Failed to execute 'appendBuffer' on 'SourceBuffer': This SourceBuffer has been removed from the parent media source.
This is the relevant part of the code:
var mime = 'video/webm; codecs=vp8';
if (navigator.mediaDevices) {
constraints = { video: true, audio: true };
navigator.mediaDevices.getUserMedia(constraints)
.then(handleUserMedia)
.catch( err => {
console.log("ERROR: " + err);
})
}
function handleUserMedia(stream) {
source = new MediaSource();
watchVideo.src = window.URL.createObjectURL(source);
source.onsourceopen = () => {
buffer = source.addSourceBuffer(mime);
};
var options = { mimeType: mime };
mediaRecorder = new MediaRecorder(stream, options);
mediaRecorder.ondataavailable = handleDataAvailable;
}
function handleDataAvailable(evt) {
var filereader = new FileReader();
filereader.onload = () => {
buffer.appendBuffer(filereader.result );
};
filereader.readAsArrayBuffer(evt.data);
}
I came across the question and it actually helped me more than many answers related to this topic. I don't know if you are still interested in the answer but I have tried
mime="video/webm; codecs="vp9,opus"
and it worked fine with audio and video, I hope this answer will help you
I'm developing a XUL based Firefox extension. I'm trying to create an inline Web Worker using BLOB. The code used to work in Firefox 33 but after update to Firefox 35 I get an error. Here is a code sample:
try {
var blob = new Blob(["function f(){}"], {type: "application/javascript"});
var url = window.URL.createObjectURL(blob); //blob:null/371e34bd-1fbf-4f66-89cc-24d0c1c7bad5
return new Worker(url);
} catch(e) {
console.error(e);
}
And I get a following error:
Failed to load script (nsresult = 0x805303f4)
I'm aware that this error appears when Web Worker tries to load a script from a different domain but I cannot figure out why this is happening in my case. The url I get from createObjectURL() function appears to be invalid. It contains "null/" prefix.
Does anybody have an explanation what is going on? What is the possible fix here?
This example work for me, tested from Firefox 37 to 39.0a2.
// URL.createObjectURL
window.URL = window.URL || window.webkitURL;
// "Server response", used in all examples
var response = "self.onmessage=function(e){postMessage('Worker: '+e.data);}";
var blob;
try {
blob = new Blob([response], {type: 'application/javascript'});
} catch (e) { // Backwards-compatibility
window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
blob = new BlobBuilder();
blob.append(response);
blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));
// Test, used in all examples:
worker.onmessage = function(e) {
alert('Response: ' + e.data);
};
worker.postMessage('Test');