import axios from 'axios';
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';

const AZURE_SPEECH_KEY = process.env.REACT_APP_AZURE_SPEECH_KEY;
const AZURE_SPEECH_REGION = process.env.REACT_APP_AZURE_SPEECH_REGION;
const AZURE_OPENAI_API_KEY = process.env.REACT_APP_AZURE_OPENAI_API_KEY;
const AZURE_OPENAI_ENDPOINT = process.env.REACT_APP_AZURE_OPENAI_ENDPOINT;

// Initialize speech config
const speechConfig = sdk.SpeechConfig.fromSubscription(AZURE_SPEECH_KEY, AZURE_SPEECH_REGION);
speechConfig.speechRecognitionLanguage = 'en-US';

// Function to convert speech to text
export const speechToText = () => {
  return new Promise((resolve, reject) => {
    const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
    const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

    recognizer.recognizeOnceAsync(
      (result) => {
        if (result.reason === sdk.ResultReason.RecognizedSpeech) {
          resolve(result.text);
        } else {
          reject('Speech not recognized');
        }
        recognizer.close();
      },
      (err) => {
        reject(err);
        recognizer.close();
      }
    );
  });
};

// Function to convert text to speech (kept for backward compatibility)
export const textToSpeech = async (text) => {
  const synthesizer = new sdk.SpeechSynthesizer(speechConfig);
  return new Promise((resolve, reject) => {
    synthesizer.speakTextAsync(
      text,
      (result) => {
        if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
          resolve(result.audioData);
        } else {
          reject('Speech synthesis canceled');
        }
        synthesizer.close();
      },
      (error) => {
        reject(error);
        synthesizer.close();
      }
    );
  });
};

// Function to call Azure OpenAI API
export const callOpenAI = async (messages, functions, function_call = 'auto') => {
  if (!AZURE_OPENAI_API_KEY) {
    throw new Error('Azure OpenAI API key is not set in environment variables');
  }

  try {
    const response = await axios.post(
      AZURE_OPENAI_ENDPOINT,
      {
        messages,
        functions,
        function_call,
        max_tokens: 300,
      },
      {
        headers: {
          'api-key': AZURE_OPENAI_API_KEY,
          'Content-Type': 'application/json',
        },
      }
    );
    return response.data.choices[0].message;
  } catch (error) {
    console.error('Error calling Azure OpenAI API:', error.response ? error.response.data : error.message);
    throw error;
  }
};

// Function to capture image
export const captureImage = (videoElement) => {
  return new Promise((resolve, reject) => {
    if (!videoElement) {
      reject('No video element provided');
      return;
    }

    const canvas = document.createElement('canvas');
    canvas.width = videoElement.videoWidth;
    canvas.height = videoElement.videoHeight;
    canvas.getContext('2d').drawImage(videoElement, 0, 0);
    
    // Convert to blob
    canvas.toBlob((blob) => {
      const reader = new FileReader();
      reader.onloadend = () => {
        // Get base64 data
        const base64data = reader.result;
        resolve(base64data);
      };
      reader.onerror = reject;
      reader.readAsDataURL(blob);
    }, 'image/jpeg', 0.8);  // JPEG format with 0.8 quality
  });
};

// Function to setup camera
export const setupCamera = async (videoElement, facingMode = 'user') => {
  if (!videoElement) return;

  if (videoElement.srcObject) {
    videoElement.srcObject.getTracks().forEach(track => track.stop());
  }

  try {
    const stream = await navigator.mediaDevices.getUserMedia({
      video: { facingMode: facingMode }
    });
    videoElement.srcObject = stream;
    await videoElement.play();
  } catch (error) {
    console.error('Error accessing the camera:', error);
  }
};

// New function to setup avatar
export const setupAvatar = async () => {
  try {
    console.log('Setting up avatar...');
    const speechSynthesisConfig = sdk.SpeechConfig.fromSubscription(AZURE_SPEECH_KEY, AZURE_SPEECH_REGION);
    speechSynthesisConfig.speechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
    
    const videoFormat = new sdk.AvatarVideoFormat();
    const avatarConfig = new sdk.AvatarConfig('lisa', 'casual-sitting', videoFormat);
    avatarConfig.backgroundColor = '#FFFFFFFF';

    const avatarSynthesizer = new sdk.AvatarSynthesizer(speechSynthesisConfig, avatarConfig);
    console.log('Avatar synthesizer created successfully');
    return avatarSynthesizer;
  } catch (error) {
    console.error('Error in setupAvatar:', error);
    throw error;
  }
};

export const startAvatarSession = async (synthesizer) => {
  try {
    console.log('Starting avatar session...');
    const response = await axios.get(`https://${AZURE_SPEECH_REGION}.tts.speech.microsoft.com/cognitiveservices/avatar/relay/token/v1`, {
      headers: {
        'Ocp-Apim-Subscription-Key': AZURE_SPEECH_KEY
      }
    });
    console.log('Token fetched successfully');
    const { Urls, Username, Password } = response.data;
    return await setupWebRTC(synthesizer, Urls[0], Username, Password);
  } catch (error) {
    console.error('Error in startAvatarSession:', error);
    throw error;
  }
};

const setupWebRTC = async (synthesizer, iceServerUrl, iceServerUsername, iceServerCredential) => {
  try {
    console.log('Setting up WebRTC...');
    const peerConnection = new RTCPeerConnection({
      iceServers: [{
        urls: [iceServerUrl],
        username: iceServerUsername,
        credential: iceServerCredential
      }]
    });

    peerConnection.addTransceiver('video', { direction: 'sendrecv' });
    peerConnection.addTransceiver('audio', { direction: 'sendrecv' });

    console.log('Starting avatar...');
    const result = await synthesizer.startAvatarAsync(peerConnection);
    if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
      console.log('Avatar started successfully');
      return peerConnection;
    } else {
      console.error('Failed to start avatar. Reason:', result.reason);
      console.error('Error details:', result.errorDetails);
      throw new Error(`Failed to start avatar. Reason: ${result.reason}, Details: ${result.errorDetails}`);
    }
  } catch (error) {
    console.error('Error in setupWebRTC:', error);
    throw error;
  }
};

// Updated main function to handle voice interaction with avatar
export const handleVoiceInteraction = async (userProfile, memories, previousMessages, audioEnabled, videoElement, audioContext, avatarSynthesizer, peerConnection) => {
  try {
    const userInput = await speechToText();
    
    let messages = [
      { 
        role: 'system', 
        content: `You are an AI assistant named Ava. You are AI fashion and Skincare expert. Provide short answer as you are voice mode, only opt for longer answer if required. Do not add markdown or even add : this signs or any other in your responses. You can see images if the user asks you to look at something, you have function calling to capture images so when user ask anything visual capture image, don't ask user to upload image as this is live visual mode, user ask anything about visuals call the function for capturing image right away, we don't need delay, don't use numbers etc in responses because it's voice mode not text mode. 
          User Profile: ${JSON.stringify(userProfile)}
          User Memories: ${JSON.stringify(memories)}`
      },
      ...previousMessages,
      { role: 'user', content: userInput }
    ];

    let defaultResponse = "";

    const functions = [
      {
        name: 'capture_image',
        description: 'Capture an image using the device camera',
        parameters: { type: 'object', properties: {} }
      }
    ];

    const aiResponse = await callOpenAI(messages, functions);

    let finalResponse;

    if (aiResponse.function_call) {
      if (aiResponse.function_call.name === 'capture_image') {
        defaultResponse = getRandomDefaultResponse();
        if (audioEnabled) {
          await speakWithAvatar(defaultResponse, avatarSynthesizer);
        }
        const imageData = await captureImage(videoElement);
        messages.push({
          role: 'function',
          name: 'capture_image',
          content: JSON.stringify({ image_url: imageData })
        });
        const secondResponse = await callOpenAI(messages, functions);
        finalResponse = secondResponse.content;
      } else {
        finalResponse = "I'm sorry, I don't know how to perform that function.";
      }
    } else {
      finalResponse = aiResponse.content;
    }

    if (audioEnabled) {
      await speakWithAvatar(finalResponse, avatarSynthesizer);
    }

    return { text: finalResponse, defaultResponse };
  } catch (error) {
    console.error('Error in voice interaction:', error);
    throw error;
  }
};
// New function to speak with avatar
const speakWithAvatar = async (text, avatarSynthesizer) => {
  const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' xml:lang='en-US'>
    <voice name='en-US-SaraNeural'>
      <mstts:leadingsilence-exact value='0'/>
      <mstts:trailingsilence-exact value='0'/>
      <mstts:express-as style="auto-predict">
        ${text}
      </mstts:express-as>
    </voice>
  </speak>`;

  return new Promise((resolve, reject) => {
    avatarSynthesizer.speakSsmlAsync(ssml).then((result) => {
      if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
        resolve();
      } else {
        reject(new Error('Speech synthesis failed'));
      }
    }).catch(reject);
  });
};
function getRandomDefaultResponse() {
  const responses = [
    "I'm looking at what you're showing me right now.",
    "I can see your camera feed. What would you like me to focus on?",
    "I'm analyzing the live image from your camera.",
    "I'm examining what's in front of your camera at the moment.",
    "I'm observing the current scene through your device's camera.",
    "I'm taking in the visual information from your camera feed.",
    "I'm processing the live video input from your device.",
    "I'm focusing on what your camera is pointing at right now.",
    "I'm interpreting the real-time visual data from your camera.",
    "I'm assessing the current view from your device's camera."
  ];
  return responses[Math.floor(Math.random() * responses.length)];
} 
