In this article, I have attempted to create a web page to talk to Chat GPT with speech-to-text and text-to-speech browser capabilities.
Introduction
This application is my attempt to create a smallest client app to talk to Chat GPT in JavaScript. My goal is to demonstrate use Chat GPT API with speech-to-text and text-to-speech browser capabilities. This means that you can talk to your browser and your browser will talk back to you.
Using the Code
- Get
OPENAI_API_KEY
from https://beta.openai.com/account/api-keys. - Open ChatGPT.js and add the API Key key to the first line. (Please note that in the real app, this key will need to be encrypted.)
Here is the code. Basically, it uses XMLHttpRequest
to post JSON to OpenAI. Endpoint: https://api.openai.com/v1/completions.
Code for ChatGPT.js
var OPENAI_API_KEY = "";
var bTextToSpeechSupported = false;
var bSpeechInProgress = false;
var oSpeechRecognizer = null
var oSpeechSynthesisUtterance = null;
var oVoices = null;
function OnLoad() {
if ("webkitSpeechRecognition" in window) {
} else {
lblSpeak.style.display = "none";
}
if ('speechSynthesis' in window) {
bTextToSpeechSupported = true;
speechSynthesis.onvoiceschanged = function () {
oVoices = window.speechSynthesis.getVoices();
for (var i = 0; i < oVoices.length; i++) {
selVoices[selVoices.length] = new Option(oVoices[i].name, i);
}
};
}
}
function ChangeLang(o) {
if (oSpeechRecognizer) {
oSpeechRecognizer.lang = selLang.value;
}
}
function Send() {
var sQuestion = txtMsg.value;
if (sQuestion == "") {
alert("Type in your question!");
txtMsg.focus();
return;
}
spMsg.innerHTML = "Chat GPT is thinking...";
var sUrl = "https://api.openai.com/v1/completions";
var sModel = selModel.value;
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
sUrl = "https://api.openai.com/v1/chat/completions";
}
var oHttp = new XMLHttpRequest();
oHttp.open("POST", sUrl);
oHttp.setRequestHeader("Accept", "application/json");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY)
oHttp.onreadystatechange = function () {
if (oHttp.readyState === 4) {
spMsg.innerHTML = "";
var oJson = {}
if (txtOutput.value != "") txtOutput.value += "\n";
try {
oJson = JSON.parse(oHttp.responseText);
} catch (ex) {
txtOutput.value += "Error: " + ex.message
}
if (oJson.error && oJson.error.message) {
txtOutput.value += "Error: " + oJson.error.message;
} else if (oJson.choices) {
var s = "";
if (oJson.choices[0].text) {
s = oJson.choices[0].text;
} else if (oJson.choices[0].message) {
s = oJson.choices[0].message.content;
}
if (selLang.value != "en-US") {
var a = s.split("?\n");
if (a.length == 2) {
s = a[1];
}
}
if (s == "") {
s = "No response";
} else {
txtOutput.value += "Chat GPT: " + s;
TextToSpeech(s);
}
}
}
};
var iMaxTokens = 2048;
var sUserId = "1";
var dTemperature = 0.5;
var data = {
model: sModel,
prompt: sQuestion,
max_tokens: iMaxTokens,
user: sUserId,
temperature: dTemperature,
frequency_penalty: 0.0,
presence_penalty: 0.0,
stop: ["#", ";"]
}
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
data = {
"model": sModel,
"messages": [
{
"role": "user",
"content": sQuestion
}
]
}
}
oHttp.send(JSON.stringify(data));
if (txtOutput.value != "") txtOutput.value += "\n";
txtOutput.value += "Me: " + sQuestion;
txtMsg.value = "";
}
function TextToSpeech(s) {
if (bTextToSpeechSupported == false) return;
if (chkMute.checked) return;
oSpeechSynthesisUtterance = new SpeechSynthesisUtterance();
if (oVoices) {
var sVoice = selVoices.value;
if (sVoice != "") {
oSpeechSynthesisUtterance.voice = oVoices[parseInt(sVoice)];
}
}
oSpeechSynthesisUtterance.onend = function () {
if (oSpeechRecognizer && chkSpeak.checked) {
oSpeechRecognizer.start();
}
}
if (oSpeechRecognizer && chkSpeak.checked) {
oSpeechRecognizer.stop();
}
oSpeechSynthesisUtterance.lang = selLang.value;
oSpeechSynthesisUtterance.text = s;
window.speechSynthesis.speak(oSpeechSynthesisUtterance);
}
function Mute(b) {
if (b) {
selVoices.style.display = "none";
} else {
selVoices.style.display = "";
}
}
function SpeechToText() {
if (oSpeechRecognizer) {
if (chkSpeak.checked) {
oSpeechRecognizer.start();
} else {
oSpeechRecognizer.stop();
}
return;
}
oSpeechRecognizer = new webkitSpeechRecognition();
oSpeechRecognizer.continuous = true;
oSpeechRecognizer.interimResults = true;
oSpeechRecognizer.lang = selLang.value;
oSpeechRecognizer.start();
oSpeechRecognizer.onresult = function (event) {
var interimTranscripts = "";
for (var i = event.resultIndex; i < event.results.length; i++) {
var transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
txtMsg.value = transcript;
Send();
} else {
transcript.replace("\n", "<br>");
interimTranscripts += transcript;
}
var oDiv = document.getElementById("idText");
oDiv.innerHTML = '<span style="color: #999;">' +
interimTranscripts + '</span>';
}
};
oSpeechRecognizer.onerror = function (event) {
};
}
Code for the HTML Page ChatGPT.html
<!DOCTYPE html>
<html>
<head>
<title>Chat GPT</title>
<script src="ChatGPT.js?v=15"></script>
</head>
<body onload="OnLoad()">
<div id="idContainer">
<textarea id="txtOutput" rows="10" style="margin-top: 10px;
width: 100%;" placeholder="Output"></textarea>
<div>
<button type="button" onclick="Send()" id="btnSend">Send</button>
<label id="lblSpeak"><input id="chkSpeak" type="checkbox"
onclick="SpeechToText()" />Listen</label>
<label id="lblMute"><input id="chkMute" type="checkbox"
onclick="Mute(this.checked)" />Mute</label>
<select id="selModel">
<option value="text-davinci-003">text-davinci-003</option>
<option value="text-davinci-002">text-davinci-002</option>
<option value="code-davinci-002">code-davinci-002</option>
<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
<option value="gpt-3.5-turbo-0301">gpt-3.5-turbo-0301</option>
</select>
<select id="selLang" onchange="ChangeLang(this)">
<option value="en-US">English (United States)</option>
<option value="fr-FR">French (France)</option>
<option value="ru-RU">Russian (Russia)</option>
<option value="pt-BR">Portuguese (Brazil)</option>
<option value="es-ES">Spanish (Spain)</option>
<option value="de-DE">German (Germany)</option>
<option value="it-IT">Italian (Italy)</option>
<option value="pl-PL">Polish (Poland)</option>
<option value="nl-NL">Dutch (Netherlands)</option>
</select>
<select id="selVoices"></select>
<span id="spMsg"></span>
</div>
<textarea id="txtMsg" rows="5" wrap="soft" style="width: 98%;
margin-left: 3px; margin-top: 6px" placeholder="Input Text"></textarea>
<div id="idText"></div>
</div>
</body>
</html>
Points of Interest
Not all browsers support speech-to-text and text-to-speech. Chrome and Edge seem to support it while Firefox seems to support only text-to-speech.
History
- 25th December, 2022: Version 1 created
- 27th December, 2022: Version 2 (added Model selector)
- 24th May, 2023, Chat GPT4 support