Let’s complete our application
Before we begin our second and final part, I hope you have read part-1 of the article. If not, please read the first part and we will meet again !
Application Logic 💡
Step 2 – Adding Audio Generator Open AI API
- Now that we are able to translate the text, we will use this translated text as input for our audio generator and get the audio file on our web page.
- To do that let’s go through the Open AI documentation for Text-To-Speech (TTS) API and get our data.
- In order to use this API, you need to sign up on open AI (free) and generate an Open AI API key for the yourself.
- Since I have already signed up, I have my secret key to make the API call.
- Now this is the API configuration –
API Endpoint : https://api.openai.com/v1/audio/speech
API Method: POST
Request Body:
{
"model": "tts-1",
"input": "The quick brown fox jumped over the lazy dog.",
"voice": "onyx"
}
- Let’s integrate this API in our application file – index.js
// import express and axios
const express = require('express');
const app = express();
// .... previous code
// adding OPEN AI API
const openAIEndpoint = 'https://api.openai.com/v1/audio/speech';
// enter your open ai secret key as a string
const openAISecretKey = {ENTER_YOUR_OPEN_AI_SECRET_KEY};
const textToSpeechAPIRequestBodyParams = {
model: 'tts-1',
};
const openAItextToSpeechOptions = {
method: 'POST',
url: openAIEndpoint,
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + openAISecretKey
},
data: textToSpeechAPIRequestBodyParams,
// we will keep the responseType as stream to pipe the result on our web page
responseType: 'stream'
};
app.post("/", async function (req, res) {
try {
// ... remaining code
} catch (err) {
}
});
- Now let’s call the API with the translated text
// import express and axios
const express = require('express');
const app = express();
// .... previous code
// adding OPEN AI API
const openAIEndpoint = 'https://api.openai.com/v1/audio/speech';
// enter your open ai secret key as a string
const openAISecretKey = {ENTER_YOUR_OPEN_AI_SECRET_KEY};
const textToSpeechAPIRequestBodyParams = {
model: 'tts-1',
};
const openAItextToSpeechOptions = {
method: 'POST',
url: openAIEndpoint,
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + openAISecretKey
},
data: textToSpeechAPIRequestBodyParams,
// we will keep the responseType as stream to pipe the result on our web page
responseType: 'stream'
};
app.post("/", async function (req, res) {
try {
if (translatedText.length === 0) {
return res.status(500).json({ error: 'Internal Error: Translated text is empty.' });
}
// log the translated text in console.
console.log('Translated Text in Target language = ',translatedText);
textToSpeechAPIRequestBodyParams['input'] = translatedText;
textToSpeechAPIRequestBodyParams['voice'] = 'onyx';
console.log('request body for text-to-speech = ', textToSpeechAPIRequestBodyParams);
const responseSpeech = await axios.request({
...openAItextToSpeechOptions,
data: textToSpeechAPIRequestBodyParams
});
// set the header for content-type as audio file
res.setHeader('Content-Type', 'audio/mpeg');
// pipe the output to the response of the API.
responseSpeech.data.pipe(res);
} catch (err) {
console.error("Error occurred:", err);
res.status(500).json({ error: "Internal Error: " + err.message });
}
});
- See the complete file index.js
Show File
// import express and axios
const express = require('express');
const app = express();
const axios = require('axios');
// define the local port 8000 or use the port number setup as the environment variable
const PORT = 8000;
const cors = require('cors');
// open the listener to our PORT. Any activity at PORT will now be detected
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
app.use(express.json());
app.use(cors());
// text translate POST API call options. [from Rapid API]
const textTranslatorTranslateOptions = {
method: 'POST',
url: 'https://text-translator2.p.rapidapi.com/translate',
headers: {
'content-type': 'application/x-www-form-urlencoded',
// Your rapid API key will automatically be populated below.
'X-RapidAPI-Key': {YOUR_RAPID_API_KEY},
'X-RapidAPI-Host': 'text-translator2.p.rapidapi.com'
}
};
// map to store POST API request body parameters as key-value pair
const textTranslatorTranslateAPIRequestBodyParams = {};
// adding OPEN AI API
const openAIEndpoint = 'https://api.openai.com/v1/audio/speech';
// enter your open ai secret key as a string
const openAISecretKey = {ENTER_YOUR_OPEN_AI_SECRET_KEY};
const textToSpeechAPIRequestBodyParams = {
model: 'tts-1',
};
const openAItextToSpeechOptions = {
method: 'POST',
url: openAIEndpoint,
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + openAISecretKey
},
data: textToSpeechAPIRequestBodyParams,
// we will keep the responseType as stream to pipe the result on our web page
responseType: 'stream'
};
// POST API call which will take input parameters from the form created
// in index.html
app.post("/", async function (req, res) {
try {
console.log("request = ", req);
const reqBody = req.body;
console.log("Req body : ",reqBody);
// read the input from the user input
const text = reqBody.text;
const sourceLanguage = reqBody.source;
const targetLanguage = reqBody.target;
const voice = 'onyx';
// handling error case when fields are null
if (text == null || sourceLanguage == null || targetLanguage == null) {
return res.status(400).json({
error: "Bad Request: Text, Source Language, and Target Language are required" });
}
// initialising the POST API parameters before calling the
// Text Translator APIs
textTranslatorTranslateAPIRequestBodyParams['text'] = text;
textTranslatorTranslateAPIRequestBodyParams['source_language'] = sourceLanguage;
textTranslatorTranslateAPIRequestBodyParams['target_language'] = targetLanguage;
// call the Text Translator API
const responseTranslate = await axios.request({
...textTranslatorTranslateOptions,
data: textTranslatorTranslateAPIRequestBodyParams
});
// log the API response in console.
console.log('API Response', responseTranslate.data);
const translatedText = responseTranslate.data.data.translatedText;
if (translatedText.length === 0) {
return res.status(500).json({ error: 'Internal Error: Translated text is empty.' });
}
// log the translated text in console.
console.log('Translated Text in Target language = ',translatedText);
res.setHeader('Content-Type', 'audio/mpeg');
textToSpeechAPIRequestBodyParams['input'] = translatedText;
textToSpeechAPIRequestBodyParams['voice'] = voice;
console.log('request body for TTS = ', textToSpeechAPIRequestBodyParams);
const responseSpeech = await axios.request({
...openAItextToSpeechOptions,
data: textToSpeechAPIRequestBodyParams
});
res.setHeader('Content-Type', 'audio/mpeg');
responseSpeech.data.pipe(res);
} catch (err) {
// log the axios error if occurred
console.error("Error occurred:", err);
res.status(500).json({ error: "Internal Error: " + err.message });
}
});
- That’s it, we have integrated Open AI API to our application. Our final step is to call our application from the html web page on the button click.
Step 3 – Final Change – Connect our backend application from frontend
Let make our frontend web page talk to our application.
- First, we will create and audio player in our html page just below the </form> tag ends, like this –
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="./check.css">
</head>
<body>
<div id="mainContent">
<div id="card">
<h1>Techshshila Translator</h1>
<!-- Creat an input form -->
<form id="translationForm">
<!-- Creat an input field to take input - Text-->
<!-- ... previous code (textarea, drop-downs and button) -->
</form>
<!-- Add a result audio container -->
<div id="audioContainer">
<audio id="audioPlayer" controls></audio>
</div>
</div>
</div>
</body>
</html>
- Now we will make a javascript function and add it to our html page under script tag. This is our script function –
<script>
// this function is called when 'Generate Translated Audio' button is clicked
function translateText() {
// first, we will hide the audio container as we are waiting for the response
let audioContainer = document.getElementById('audioContainer');
audioContainer.style.display = 'none';
let audioPlayer = document.getElementById('audioPlayer');
audioPlayer.src = null;
audioPlayer.style.display = 'none';
// then, we will extract our button to change its state to 'Loading'
let translateButton = document.getElementById('translateButton');
// set the button text to 'Loading...'
translateButton.innerText = 'Loading...';
// add loading class to the form
document.getElementById('translationForm').classList.add('loading');
// extract the user input text from text area.
let textArea = document.getElementById('textInput');
let textValue = textArea.value.trim(); // trim to remove leading and trailing whitespaces
let text = textValue;
// extract the languages from drop-downs
let sourceLanguage = document.getElementById('sourceSelect').value;
let targetLanguage = document.getElementById('targetSelect').value;
// log a message before the fetch request
console.log('Starting translation request...');
// prepare JSON data with language codes
let jsonData = {
text: text,
source: sourceLanguage,
target: targetLanguage
};
// log the request data
console.log('Request Data:', jsonData);
// since our app is running in local at PORT 8000, we will make a request to localhost.
const localHost = 'http://localhost:8000/';
// make an asynchronous POST request to localhost:8000
fetch(localHost, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(jsonData)
}).then(async response => {
if (response.status === 429) {
let errorCard = document.getElementById('errorCard');
errorCard.style.display = 'block';
document.getElementById("error-message").innerText = "Request limit exceeded !"
} else {
// create a Blob URL for the audio file
let audioUrl = URL.createObjectURL(await response.blob());
// select the audio player and set the audio element's source
let audioPlayer = document.getElementById('audioPlayer');
audioPlayer.src = audioUrl;
let audioContainer = document.getElementById('audioContainer');
// display the audio container again
audioContainer.style.display = 'block'
// display the audio element
audioPlayer.style.display = 'inline';
// reset the button text
translateButton.innerText = 'Generate Translated Audio';
// remove loading class when response is received
document.getElementById('translationForm').classList.remove('loading');
}
})
.catch(error => {
// reset the button text
translateButton.innerText = 'Generate Translated Audio';
// remove loading class in case of an error
document.getElementById('translationForm').classList.remove('loading');
document.getElementById("error-message").innerText = error.text;
});
}
</script>
- Now let’s add the above function to our html page. See below –
Show File
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="./check.css">
</head>
<body>
<div id="mainContent">
<div id="card">
<h1>Techshshila Translator</h1>
<script>
// this function is called when 'Generate Translated Audio' button is clicked
function translateText() {
// first, we will hide the audio container as we are waiting for the response
let audioContainer = document.getElementById('audioContainer');
audioContainer.style.display = 'none';
let audioPlayer = document.getElementById('audioPlayer');
audioPlayer.src = null;
audioPlayer.style.display = 'none';
// then, we will extract our button to change its state to 'Loading'
let translateButton = document.getElementById('translateButton');
// set the button text to 'Loading...'
translateButton.innerText = 'Loading...';
// add loading class to the form
document.getElementById('translationForm').classList.add('loading');
// extract the user input text from text area.
let textArea = document.getElementById('textInput');
let textValue = textArea.value.trim(); // trim to remove leading and trailing whitespaces
let text = textValue;
// extract the languages from drop-downs
let sourceLanguage = document.getElementById('sourceSelect').value;
let targetLanguage = document.getElementById('targetSelect').value;
// log a message before the fetch request
console.log('Starting translation request...');
// prepare JSON data with language codes
let jsonData = {
text: text,
source: sourceLanguage,
target: targetLanguage
};
// log the request data
console.log('Request Data:', jsonData);
// since our app is running in local at PORT 8000, we will make a request to localhost.
const localHost = 'http://localhost:8000/';
// make an asynchronous POST request to localhost:8000
fetch(localHost, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(jsonData)
}).then(async response => {
if (response.status === 429) {
let errorCard = document.getElementById('errorCard');
errorCard.style.display = 'block';
document.getElementById("error-message").innerText = "Request limit exceeded !"
} else {
// create a Blob URL for the audio file
let audioUrl = URL.createObjectURL(await response.blob());
// select the audio player and set the audio element's source
let audioPlayer = document.getElementById('audioPlayer');
audioPlayer.src = audioUrl;
let audioContainer = document.getElementById('audioContainer');
// display the audio container again
audioContainer.style.display = 'block'
// display the audio element
audioPlayer.style.display = 'inline';
// reset the button text
translateButton.innerText = 'Generate Translated Audio';
// remove loading class when response is received
document.getElementById('translationForm').classList.remove('loading');
}
})
.catch(error => {
// reset the button text
translateButton.innerText = 'Generate Translated Audio';
// remove loading class in case of an error
document.getElementById('translationForm').classList.remove('loading');
document.getElementById("error-message").innerText = error.text;
});
}
</script>
<!-- Creat an input form -->
<form id="translationForm">
<!-- Creat an input field to take input - Text-->
<label for="textInput">Text:</label>
<div id="textInputContainer">
<textarea id="textInput" name="text" oninput="onInput()" placeholder="Enter Text To Translate"></textarea>
</div>
<!-- Creat an empty drop-down field to select Source Language - Text-->
<label for="sourceSelect">Source:</label>
<select id="sourceSelect" name="source" required>
</select>
<!-- Creat an empty drop-down field to select Target Language - Text-->
<label for="targetSelect">Target:</label>
<select id="targetSelect" name="source" required>
</select>
<script type = "module">
// Get the select element
import {languageData} from "./constants.js";
let sourceSelect = document.getElementById("sourceSelect");
let targetSelect = document.getElementById("targetSelect");
// Populate the select options using the array
languageData.data.languages.map(language => {
let sourceOption = document.createElement("option");
sourceOption.value = language.code;
sourceOption.text = language.name;
let targetOption = document.createElement("option");
targetOption.value = language.code;
targetOption.text = language.name;
sourceSelect.appendChild(sourceOption);
targetSelect.appendChild(targetOption);
})
</script>
<!-- Add a button to click to generate audio file-->
<button id= "translateButton" type="button"
onclick=translateText()>Generate Translated Audio</button>
</form>
<!-- Result audio container -->
<div id="audioContainer">
<audio id="audioPlayer" controls></audio>
</div>
</div>
</div>
</body>
</html>
Time to celebrate 🥳, We have built our own Text to Speech application !