I get a WAV file from user upload (basically a file input) and have to do some manipulation with that:
Validate is it's a valid .wav file
If user uploaded a stereo file, extract a single channel (left)
Add w fade out at the end (50 last samples of the file)
My first thought was hey, there's an api for that (web audio), so I did something similar to:
const source = audioContext.createBufferSource();
const splitter = audioContext.createChannelSplitter(audioBuffer.numberOfChannels);
const gainNode = audioContext.createGain();
source.buffer = audioBuffer;
source.connect(splitter);
gainNode.gain.linearRampToValueAtTime(0, audioBuffer.duration);
splitter.connect(gainNode, 0);
Which in my thinking is taking the first channel out of the source and adding linear fade out (not really on last 50 samples, but that's not a point for now).
But...
How do I extract the output of that into a file? I know how to play manipulated sound frontend side, but am I able to turn it back into a file?
So at some point I assumed there's no way to do that, so I came up with a different solution, which uses low level file manipulation, that goes as follows:
const audioContext = new AudioContext();
// const arrayBuffer = await toArrayBuffer(file);
const audioBuffer = await decodeAudio(audioContext, arrayBuffer);
const channels = 1;
const duration = audioBuffer.duration;
const rate = audioBuffer.sampleRate;
const length = Math.ceil(duration * rate * channels * 2 + 44);
const buffer = new ArrayBuffer(length);
const view = new DataView(buffer);
let position = 0;
let offset = 0;
const setUint16 = (data) => {
view.setUint16(position, data, true);
position += 2;
};
const setUint32 = (data) => {
view.setUint32(position, data, true);
position += 4;
};
setUint32(0x46464952); // RIFF
setUint32(length - 8); // file length
setUint32(0x45564157); // WAV
setUint32(0x20746d66); // fmt
setUint32(16); // data size
setUint16(1); // PCM
setUint16(channels);
setUint32(rate);
setUint32(rate * 16 * channels);
setUint16(channels * 2);
setUint16(16);
setUint32(0x61746164); // "data"
setUint32(length - position - 4);
const leftChannel = audioBuffer.getChannelData(0);
let sample;
console.log('left', leftChannel);
console.log('length', length);
while (position < length) {
sample = leftChannel[offset];
sample = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
view.setInt16(position, sample, true);
position += 2;
offset++;
}
console.log('buffer', buffer);
const blob = new Blob([buffer], { type: file.type });
but seems it has a lot of flows, output is distorted / has different sample rate and so on...
My question then would be:
How do I extract a file out of a web audio api, if that's even possible? Cause that the best way to do that imho
If (1) is not possible, what am I doing wrong on the second approach?
WebAudio has no way of saving audio to a file. You can use MediaRecorder as one way, but I don't think that that's required to support WAV files. Or you can do it by hand as you show above. At a glance I don't see anything wrong with what you've done. Might be helpful to look at what Chrome does to save files in its test suite; it does basically what you do.
Related
I'm attempting to use opencv.js to align images to a baseline image. I'm following some basic python guidance that i've seen work (example: https://alexanderpacha.com/2018/01/29/aligning-images-an-engineers-solution/)
but i'm getting tripped up with an error that I don't quite understand. The error is "opencv.js:30 Uncaught TypeError: Cannot use 'in' operator to search for 'type' in 1e-10" and it seems to be caused by the "criteria" variable passed to "cv.findTransformECC();" see here.
any guidance as to what I'm doing wrong here?
function Align_img(){
let image_baseline = cv.imread(imgElement_Baseline);
let image = cv.imread('imageChangeup');
let im1_gray = new cv.Mat();
let im2_gray = new cv.Mat();
let im2_aligned = new cv.Mat();
//get size of baseline image
width1 = image_baseline.cols;
height1 = image_baseline.rows;
//resize image to baseline image
let dim1 = new cv.Size(width1, height1);
cv.resize(image, image, dim1, cv.INTER_AREA);
// Convert images to grayscale
cv.cvtColor(image_baseline, im1_gray, cv.COLOR_BGR2GRAY);
cv.cvtColor(image, im2_gray, cv.COLOR_BGR2GRAY);
// Find size of image1
let dsize = new cv.Size(image_baseline.rows, image_baseline.cols);
// Define the motion model
warp_mode = cv.MOTION_HOMOGRAPHY;
// Define 3x3 matrix and initialize the matrix to identity
let warp_matrix = cv.Mat.eye(3, 3, cv.CV_8U);
// Specify the number of iterations.
number_of_iterations = 5000;
// Specify the threshold of the increment in the correlation coefficient between two iterations
termination_eps = 0.0000000001; //1e-10;
// Define termination criteria
criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps);
//Run the ECC algorithm. The results are stored in warp_matrix.
cv.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria, null, 5);
// Use warpPerspective for Homography
cv.warpPerspective (image, im2_aligned, warp_matrix, dsize, cv.INTER_LINEAR + cv.WARP_INVERSE_MAP);
cv.imshow('imageChangeup', im2_aligned);
im1_gray.delete();
im2_gray.delete();
im2_aligned.delete();
};
UPDATE: 2 things. 1. Found easy fix to error (code below) and 2. looks like a bug in the findTransformECC opencv.js API causing this method not to work. Here is current code.
The API has 2 optional parameters (inputMask and gaussFiltSize) but if you don't include them you get an error ("function findTransformECC called with 5 arguments, expected 7 args!").
The issue is what to use for inputMask - "null" does not work, there doesn't seem to be support for 'cv.noArray()' and I can't find a mask that doesn't lead to a 'uncaught exception' error.
I'll update again once I find a workaround. Let me know if anyone sees a work around.
function Align_img(){
let image_baseline = cv.imread(imgElement_Baseline);
let image = cv.imread('imageChangeup');
let im1_gray = new cv.Mat();
let im2_gray = new cv.Mat();
let im2_aligned = new cv.Mat();
//get size of baseline image
var width1 = image_baseline.cols;
var height1 = image_baseline.rows;
//resize image to baseline image
let dim1 = new cv.Size(width1, height1);
cv.resize(image, image, dim1, cv.INTER_AREA);
// Convert images to grayscale
cv.cvtColor(image_baseline, im1_gray, cv.COLOR_BGR2GRAY);
cv.cvtColor(image, im2_gray, cv.COLOR_BGR2GRAY);
// Find size of image1
let dsize = new cv.Size(image_baseline.rows, image_baseline.cols);
// Define the motion model
const warp_mode = cv.MOTION_HOMOGRAPHY;
// Define 3x3 matrix and initialize the matrix to identity
let warp_matrix = cv.Mat.eye(3, 3, cv.CV_8U);
// Specify the number of iterations.
const number_of_iterations = 5000;
// Specify the threshold of the increment in the correlation coefficient between two iterations
const termination_eps = 0.0000000001; //1e-10;
// Define termination criteria
//const criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps);
let criteria = new cv.TermCriteria(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps);
//Run the ECC algorithm. The results are stored in warp_matrix.
//let inputMask = new cv.Mat.ones(im1_gray.size(), cv.CV_8U); //uint8
cv.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria, null, 5);
// Use warpPerspective for Homography
cv.warpPerspective (image, im2_aligned, warp_matrix, dsize, cv.INTER_LINEAR + cv.WARP_INVERSE_MAP);
getMatStats(im2_aligned, 1); //0 = baseline (srcMat), 1 = image (srcMat_compare)
cv.imshow('imageChangeup', im2_aligned);
im1_gray.delete();
im2_gray.delete();
im2_aligned.delete();
};
UPDATE 2 I verified code works fine in Python. code below. The issue at hand now is simply, how do you this in Javascript: "inputMask=None"
Python:
# Read the images to be aligned
im1 = cv2.imread(r"C:\temp\tcoin\69.jpg");
im2 = cv2.imread(r"C:\temp\tcoin\pic96_crop.jpg");
#resize image to compare
width1 = int(im1.shape[1])
height1 = int(im1.shape[0])
dim1 = (width1, height1)
im2 = cv2.resize(im2, dim1, interpolation = cv2.INTER_AREA)
# Convert images to grayscale
im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# Find size of image1
sz = im1.shape
# Define the motion model
warp_mode = cv2.MOTION_HOMOGRAPHY
# Define 2x3 or 3x3 matrices and initialize the matrix to identity
warp_matrix = np.eye(3, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = 5000;
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-10;
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1_gray,im2_gray,warp_matrix, warp_mode, criteria, inputMask=None, gaussFiltSize=1)
# Use warpPerspective for Homography
im2_aligned = cv2.warpPerspective (im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
# Show final results
cv2.imshow("Aligned Image 2", im2_aligned)
cv2.imwrite(r"c:\temp\tcoin\output\pic96_cropB.jpg", im2_aligned)
cv2.waitKey(0)
I'm trying to extract frequency information of simple 16 bit pcm wav file containing only one pure tone (750hz created with audacity) in JavaScript with nodejs.
I'm using:
node-wav for reading the wav file
fft.js to perfom fft operation
nodeplotlib to finaly plot the results
Here is the code i came up with:
const wav = require('node-wav');
const FFT = require('fft.js');
const fs = require('fs');
const plotlib = require('nodeplotlib');
let size = 4096; //fft size
let fft = new FFT(size); //create fft object
let realOutput = new Array(size); // to store final result
let complexOutput = fft.createComplexArray(); // to store fft output
let buffer = fs.readFileSync('750hz.wav'); // open a 1s wav file(mono 16bit pcm file at 32000hz) containing only a 750hz sinusoidal tone
let result = wav.decode(buffer); // read wav file data
let audioData = Array.prototype.slice.call( result.channelData[0]); // convert Float32Array to normal array
realInput = audioData.slice(0,size); // use only 4096 sample from the buffer.
fft.realTransform(complexOutput, realInput); // compute fft
// fft.completeSpectrum(complexOutput);
fft.fromComplexArray(complexOutput,realOutput); // get rid of the complex value and keep only real
let x =[];
for(let i=0;i<size;i++) x.push(i); //create a simple dumb x axis for the fft plot
plotlib.plot( // plotting the input data
[{
x: x,
y: realInput,
type: 'line',
name:'input'
}]
);
plotlib.plot( // plotting the fft output
[{
x: x,
y: realOutput,
type: 'line',
name:'output'
}]
);
Plotting the input data from the code above give me:
And the output look like this:
While i was expecting to get only one spike, something like this:
Is there something wrong with my code or the output is supposed to look like that and i missed something about how to generate frequency spectrum out of an fft?
Can fft work with float value right away or it need to be converted to Integer first?
Do i need the complex part of the fft output in my case?
Scenario, as a user scrubs an audio file I want to display a new image in the background of audio player div. Audio files have different images, some have 8 images some have 42 and so on. Audio files have different lengths.
we have our length: 30000 ms.
we have our image count for audio file: 13.
we have our url: http://www.foostorage.com/api/image_1.jpg
const duration = 30000
const imageCount = 13
const url = 'http://www.foostorage.com/api/image_1.png'
function renderImage(percentage) {
const ab = Math.round((percentage / imageCount) * 100);
const fooImage = url.replace(/poster.*.png/, `poster_${ab}.jpg`);
return fooImage;
}
The percentage is coming from the scrub position, ex: 0.232 would be 23% out of 100.
Hopefully i explained this ok, feel free to let me know if I haven't
Try this..
const duration = 30000
const imageCount = 13
const url = 'http://www.foostorage.com/api/image_1.png'
function renderImage(percentage) {
const ab = Math.round((percentage * imageCount));
const fooImage = url.replace(/image.*.png/, `poster_${ab}.jpg`);
return fooImage;
}
// Assuming scrub scale 0-1
console.log(renderImage(0.1325));
console.log(renderImage(0.20));
console.log(renderImage(0.336));
console.log(renderImage(0.85));
console.log(renderImage(1));
I'm trying to listen 2 channel audio in both headphones(same audio in left and right headphone).
Current situaltion:
Slider is centered - works perfectly well (both channels in both headphones).
Slider is on the right - works perfectly well (right channel in both headphones).
Slider is on the left - doesn't work (left channel only in left headphone).
const splitter = wavesurfer.backend.ac.createChannelSplitter(2);
const merger = wavesurfer.backend.ac.createChannelMerger(2);
const leftGain = wavesurfer.backend.ac.createGain();
const rightGain = wavesurfer.backend.ac.createGain();
const panner = wavesurfer.backend.ac.createPanner();
splitter.connect(leftGain, 0);
splitter.connect(rightGain, 1);
leftGain.connect(merger, 0, 0);
rightGain.connect(merger, 0, 1);
merger.connect(panner);
let slider = document.querySelector('#Slider');
$(slider ).change(function () {
rightGain.gain.value = Number(slider.value);
leftGain.gain.value = 1- (Number(slider.value));
})
wavesurfer.backend.setFilters([splitter, leftGain, rightGain, merger]);
When slider is on the left I want to hear only left channel in both headphones.
Can someone help me?
Problem
When calling setFilters() waversurfer.js will connect all provided nodes in a simple chain. In your case that means it will create additional connections like this:
splitter.connect(leftGain);
leftGain.connect(rightGain);
rightGain.connect(merger);
This is probably not what you want. But it is possible to make use of that behavior. I modified your example a bit.
const input = wavesurfer.backend.ac.createGain();
const splitter = wavesurfer.backend.ac.createChannelSplitter(2);
const merger = wavesurfer.backend.ac.createChannelMerger(2);
const leftGain = wavesurfer.backend.ac.createGain();
const rightGain = wavesurfer.backend.ac.createGain();
// This will make sure that a mono signal gets upmixed to stereo.
// If you always have stereo sound you can remove it.
input.channelCountMode = 'explicit';
// It is only necessary to connect the right channel
// because this is the one which needs optional parameters.
splitter.connect(rightGain, 1);
rightGain.connect(merger);
rightGain.connect(merger, 0, 1);
// Only the one connection which needs an optional parameter
// needs to be done for the left channel
leftGain.connect(merger, 0, 1);
// wavesufer.js will connect everything else.
wavesurfer.backend.setFilters([ input, splitter, leftGain, merger ]);
I also added another GainNode as the first node to make sure the signal is upmixed to stereo in case it is mono. And I removed the PannerNode as it wasn't used in your example.
How usergetmedia to use the microphone in chrome and then stream to get raw audio? I need need to get the audio in linear 16.
Unfortunately, the MediaRecorder doesn't support raw PCM capture. (A sad oversight, in my opinion.) Therefore, you'll need to get the raw samples and buffer/save them yourself.
You can do this with the ScriptProcessorNode. Normally, this Node is used to modify the audio data programmatically, for custom effects and what not. But, there's no reason you can't just use it as a capture point. Untested, but try something like this code:
const captureNode = audioContext.createScriptProcessor(8192, 1, 1);
captureNode.addEventListener('audioprocess', (e) => {
const rawLeftChannelData = inputBuffer.getChannelData(0);
// rawLeftChannelData is now a typed array with floating point samples
});
(You can find a more complete example on MDN.)
Those floating point samples are centered on zero 0 and will ideally be bound to -1 and 1. When converting to an integer range, you'll want to clamp values to this range, clipping anything beyond it. (The values can sometimes exceed -1 and 1 in the event loud sounds are mixed together in-browser. In theory, the browser can also record float32 samples from an external sound device which may also exceed that range, but I don't know of any browser/platform that does this.)
When converting to integer, it matters if the values are signed or unsigned. If signed, for 16-bit, the range is -32768 to 32767. For unsigned, it's 0 to 65535. Figure out what format you want to use and scale the -1 to 1 values up to that range.
One final note on this conversion... endianness can matter. See also: https://stackoverflow.com/a/7870190/362536
The only two examples I've found that are clear and make sense are the following:
AWS Labs: https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js
The AWS resource is very good. It shows you how to export your recorded audio to "WAV format encoded as PCM". Amazon Lex, which is a transcription service offered by AWS requires the audio to be PCM encoded and wrapped in a WAV container. You can merely adapt some of the code to make it work for you! AWS has some additional features such as "downsampling" which allows you to change the sample rate without affecting the recording.
RecordRTC: https://github.com/muaz-khan/RecordRTC/blob/master/simple-demos/raw-pcm.html
RecordRTC is a complete library. You can, once again, adapt their code or find the snippet of code that encodes the audio to raw PCM. You could also implement their library and use the code as-is. Using the "desiredSampleRate" option for audio config with this library negatively affects the recording.
They are both excellent resources and you'll definitely be able to solve your question.
You should look into MediaTrackConstraints.sampleSize property for the MediaDevices.getUserMedia() API. Using the sampleSize constraint, if your audio hardware permits you can set the sample size to 16 bits.
As far as the implementation goes, well that's what the links are and google are for...
here is some Web Audio API where it uses the microphone to capture and playback raw audio (turn down your volume before running this page) ... to see snippets of raw audio in PCM format view the browser console ... for kicks it also sends this PCM into a call to FFT to obtain the frequency domain as well as the time domain of the audio curve
<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>capture microphone then show time & frequency domain output</title>
<script type="text/javascript">
var webaudio_tooling_obj = function () {
var audioContext = new AudioContext();
console.log("audio is starting up ...");
var BUFF_SIZE_RENDERER = 16384;
var SIZE_SHOW = 3; // number of array elements to show in console output
var audioInput = null,
microphone_stream = null,
gain_node = null,
script_processor_node = null,
script_processor_analysis_node = null,
analyser_node = null;
if (!navigator.getUserMedia)
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia || navigator.msGetUserMedia;
if (navigator.getUserMedia){
navigator.getUserMedia({audio:true},
function(stream) {
start_microphone(stream);
},
function(e) {
alert('Error capturing audio.');
}
);
} else { alert('getUserMedia not supported in this browser.'); }
// ---
function show_some_data(given_typed_array, num_row_to_display, label) {
var size_buffer = given_typed_array.length;
var index = 0;
console.log("__________ " + label);
if (label === "time") {
for (; index < num_row_to_display && index < size_buffer; index += 1) {
var curr_value_time = (given_typed_array[index] / 128) - 1.0;
console.log(curr_value_time);
}
} else if (label === "frequency") {
for (; index < num_row_to_display && index < size_buffer; index += 1) {
console.log(given_typed_array[index]);
}
} else {
throw new Error("ERROR - must pass time or frequency");
}
}
function process_microphone_buffer(event) {
var i, N, inp, microphone_output_buffer;
// not needed for basic feature set
// microphone_output_buffer = event.inputBuffer.getChannelData(0); // just mono - 1 channel for now
}
function start_microphone(stream){
gain_node = audioContext.createGain();
gain_node.connect( audioContext.destination );
microphone_stream = audioContext.createMediaStreamSource(stream);
microphone_stream.connect(gain_node);
script_processor_node = audioContext.createScriptProcessor(BUFF_SIZE_RENDERER, 1, 1);
script_processor_node.onaudioprocess = process_microphone_buffer;
microphone_stream.connect(script_processor_node);
// --- enable volume control for output speakers
document.getElementById('volume').addEventListener('change', function() {
var curr_volume = this.value;
gain_node.gain.value = curr_volume;
console.log("curr_volume ", curr_volume);
});
// --- setup FFT
script_processor_analysis_node = audioContext.createScriptProcessor(2048, 1, 1);
script_processor_analysis_node.connect(gain_node);
analyser_node = audioContext.createAnalyser();
analyser_node.smoothingTimeConstant = 0;
analyser_node.fftSize = 2048;
microphone_stream.connect(analyser_node);
analyser_node.connect(script_processor_analysis_node);
var buffer_length = analyser_node.frequencyBinCount;
var array_freq_domain = new Uint8Array(buffer_length);
var array_time_domain = new Uint8Array(buffer_length);
console.log("buffer_length " + buffer_length);
script_processor_analysis_node.onaudioprocess = function() {
// get the average for the first channel
analyser_node.getByteFrequencyData(array_freq_domain);
analyser_node.getByteTimeDomainData(array_time_domain);
// draw the spectrogram
if (microphone_stream.playbackState == microphone_stream.PLAYING_STATE) {
show_some_data(array_freq_domain, SIZE_SHOW, "frequency");
show_some_data(array_time_domain, SIZE_SHOW, "time"); // store this to record to aggregate buffer/file
}
};
}
}(); // webaudio_tooling_obj = function()
</script>
</head>
<body>
<p>Volume</p>
<input id="volume" type="range" min="0" max="1" step="0.1" value="0.0"/>
<p> </p>
<button onclick="webaudio_tooling_obj()">start audio</button>
</body>
</html>
NOTICE - before running above in your browser first turn down your volume as the code both listens to your microphone and sends real time output to the speakers so naturally you will hear feedback --- as in Jimmy Hendrix feedback