
const TTS_API_ENDPOINT = "https://api.cartesia.ai/tts/bytes"
const API_KEY = process.env.REACT_APP_CARTESIA_API_KEY

export async function textToSpeech(finalResponse, voiceId = "79a125e8-cd45-4c13-8a67-188112f4dd22") {
  finalResponse = sanitizeSpokenText(finalResponse)

  const voice = await fetch(TTS_API_ENDPOINT, {
    method: "POST",
    headers: {
      "Cartesia-Version": "2024-06-30",
      "Content-Type": "application/json",
      "X-API-Key": API_KEY,
    },
    body: JSON.stringify({
      model_id: "sonic-english",
      transcript: finalResponse,
      voice: {
        mode: "id",
        id: voiceId

      },
      output_format: {
        container: "raw",
        encoding: "pcm_f32le",
        sample_rate: 24000,
      },
    }),
  });
  return voice
}

export function encodeWAV(samples) {
  const buffer = new ArrayBuffer(44 + samples.length * 2);
  const view = new DataView(buffer);

  /* RIFF identifier */
  writeString(view, 0, 'RIFF');
  /* RIFF chunk length */
  view.setUint32(4, 36 + samples.length * 2, true);
  /* RIFF type */
  writeString(view, 8, 'WAVE');
  /* format chunk identifier */
  writeString(view, 12, 'fmt ');
  /* format chunk length */
  view.setUint32(16, 16, true);
  /* sample format (raw) */
  view.setUint16(20, 1, true);
  /* channel count */
  view.setUint16(22, 1, true);
  /* sample rate */
  view.setUint32(24, 16000, true);
  /* byte rate (sample rate * block align) */
  view.setUint32(28, 16000 * 2, true);
  /* block align (channel count * bytes per sample) */
  view.setUint16(32, 2, true);
  /* bits per sample */
  view.setUint16(34, 16, true);
  /* data chunk identifier */
  writeString(view, 36, 'data');
  /* data chunk length */
  view.setUint32(40, samples.length * 2, true);

  floatTo16BitPCM(view, 44, samples);

  return new Blob([view], { type: 'audio/wav' });
}

function sanitizeSpokenText(text) {
  if (!text)
    return ''
  // Remove asterisks
  text = text.replace(/\*+/g, '');

  // Remove numbered list markers (e.g., "1. ", "2. ")
  text = text.replace(/^\s*\d+\.\s*/gm, '');

  // Remove bullet points
  text = text.replace(/^\s*[•·-]\s*/gm, '');

  // Remove excessive whitespace
  text = text.replace(/\s+/g, ' ').trim();

  // Remove other special characters that shouldn't be spoken
  text = text.replace(/[_~`#@$%^&+=|<>{}[\]]/g, '');

  // Convert remaining acceptable punctuation to natural pauses
  text = text.replace(/[!?.](?=\s|$)/g, '$& ');

  return text;
}

function writeString(view, offset, string) {
  for (let i = 0; i < string.length; i++) {
    view.setUint8(offset + i, string.charCodeAt(i));
  }
}

function floatTo16BitPCM(output, offset, input) {
  for (let i = 0; i < input.length; i++, offset += 2) {
    const s = Math.max(-1, Math.min(1, input[i]));
    output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
  }
}