Transcribe the Audio file into a Text with Azure Cognitive Service as a Web App

Create an empty directory name as SpeechTextApp and now go to root terminal of project and initiation of node project with the below command.

***Note:– Must be installed the node environment in your system.

npm init

Fill out the required information for the initialize of the project.
Create a file named index.js — this file will have the main code for our application.
After initiation of the node app, Now we’ve to installing the required below packages for this project.

npm install microsoft-cognitiveservices-speech-sdk@1.14.0 --save
npm install express@4.17.1 --save  
npm install request@2.88.2 --save
npm install ejs@3.1.5 --save
npm install consolidate@0.16.0 --save
npm install https-proxy-agent@3.0.0 --save
npm install multer@1.4.2 --save
npm install mustache@4.0.1 --save
npm install readline@1.3.0 --save
npm install wavefile@11.0.0 --save

We have to start with HTML views for a single page, Lets create a new folder name as "views” on the root of the directory. Now create the "index.html” & "404.html” file & copied the following:

We have to add the "public” folder on the root directory, and add the app styles css file name as "project_styles.css” & download the “Bootstrap” file from github.

Then add the "js” & "uploads” folder inside the public directory, add the “jquery min” file inside the js folder.

Go to the root directory of the project, now create a new file “index.js” & follow the below step, it is the heart and soul of our web app.

Import the required modules:-

var express = require("express");
var bodyParser = require('body-parser');
var multer = require('multer');
var path = require('path');
var fs = require("fs");
var engine = require('consolidate');
const WaveFile = require('wavefile').WaveFile;
var sdk = require("microsoft-cognitiveservices-speech-sdk");
const cons = require("consolidate");

Define the App Variables & Configuration:-

var app = express();
var router = express.Router();
const port = process.env.PORT || 8081;
app.use(bodyParser.json());
app.use(express.static('public'));
app.engine('html', require('ejs').renderFile);
app.set('view engine', 'html');
app.set('views', __dirname + '/views');
var path1 = __dirname + '/views/';
router.use(function (req, res, next) {
next();
});
app.use("/", router);

Add the Cognitive Service Key and Variable:-

Added the cognitive service"subscriptionkey”and "serviceRegion”, which you’ve already saved it on initial step.

var subscriptionKey = "*******************************";
var serviceRegion = "*******";
var selectedlang ;

Declare the storage path:-

var storage = multer.diskStorage({
destination: function(req, file, callback) {
callback(null, './public/uploads');
},
filename: function(req, file, callback) {
callback(null,"audio.wav");
}});
var upload = multer({storage: storage});

Define the Routes:-

//// Get Method
app.get("/", function (req, res) {
res.sendFile(path1 + "index.html");
});
//// Post method
app.post('/',upload.single('userFile'), function(req, res) {
var htmlBody = req.body;
selectedlang =htmlBody.langsel;
let wav = new WaveFile(fs.readFileSync("./public/uploads/audio.wav"));
wav.toSampleRate(16000); //convert audio wav into 16000
fs.writeFileSync("./public/uploads/16000Hz-file.wav", wav.toBuffer());
var filename ="./public/uploads/16000Hz-file.wav"; // 16000 Hz, Mono
var pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream(filename).on('data', function(arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function() {
pushStream.close();
});
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
speechConfig.enableDictation();
speechConfig.speechRecognitionLanguage = selectedlang;
var recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
processmsg =".";
recognizer.recognizing = (s, e) => {
processmsg = processmsg + ".";
console.log("processing ."+processmsg);
};
var outputmsg="" ;
recognizer.recognized = (s, e) => {
outputmsg =outputmsg + e.result.text;
if (e.result.reason == ResultReason.RecognizedSpeech) {
console.log(`RECOGNIZED: Text=${e.result.text}`);
}
else if (e.result.reason == ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}};recognizer.sessionStopped = (s, e) => {
console.log("n    Session stopped event.");
console.log("Final Output : ",outputmsg);
fs.writeFileSync("./public/uploads/output.docx", outputmsg);
console.log("write file");
res.render(__dirname + "/views/index.html", {data: {status:"completed", contents: outputmsg}});
console.log("Send status into html");
recognizer.stopContinuousRecognitionAsync();
};
recognizer.startContinuousRecognitionAsync();
});

for more detail about the speech to text sdk. follow the below links:-

speech to text sdk

Server Activation:-

At final, we’ve to activate the server

app.use("*", function (req, res) {
res.sendFile(path1 + "404.html");
});
app.listen(port, function () {
console.log(`Example app listening on port localhost: ${port} `);
});

The full code of “index.js” is the below following:-

Now the app is ready for a run.

Open the terminal from the root of directory and run the below “command”

node index.js

Footer