[
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F034.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable emphasis on certain words, suggesting strong feelings. Additionally, there may be some trembling in the voice, although it's not prominent, which further supports the inference of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F031.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a weight of sadness, evident from the slow pace and low pitch of her voice. The tears in her eyes suggest a deep emotional pain, and the sniffle indicates she is trying to hold back her emotions. Additionally, the use of the phrase 'for heaven's sake' implies a sense of desperation or pleading, further enhancing the mood of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M030.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow speech rate, and a屡次的叹息 'ah.' There's also a noticeable pause before stating 'and it gave me an idea,' which indicates contemplation and sorrow. The emotional delivery suggests a deep sense of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F001.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The context may not be clear from the provided text alone, but these vocal cues suggest a surprised mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M006.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through an emphatic and upbeat tone, engaging manner of speaking, and a cheerful demeanor while discussing personal experiences. The use of 'um', 'ah', and positive words like 'camping' suggest a relaxed and joyful attitude. There are no signs of sadness or distress in the vocal expressions or pauses."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M026.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits a range of excitement through their vocal expressions and body language. The heightened pitch and quicker pace of speech indicate excitement. Additionally, there are instances of emphatic pauses and louder volume, further emphasizing their excitement. Furthermore, the presence of crying or sobbing sounds suggests an intense emotional state, contributing to the overall excitement conveyed by the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F016.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery indicates she is trying to avoid remembering or hoping for something, which resonates with feelings of sorrow or loss."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F000.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues indicative of fear:\n\n1. Crying: The presence of tears in the speaker's voice suggests distress or fear.\n2. Changes in tone: There is a noticeable shift in the speaker's tone from a normal speaking pace to one that likely indicates anxiety or fear.\n3. Speech rate: The speaker's speech rate may increase, reflecting a heightened state of fear or panic.\n4. Pauses: Short hesitations or pauses in the speech can indicate uncertainty or fear.\n5. Emphasis: The speaker places more emphasis on certain words, possibly indicating areas of concern or fear.\n6. Stress: There may be an increased pitch and volume in the speaker's voice, suggesting stress and fear.\n7. Voice trembling: A trembling voice is a common physical reaction to fear or anxiety.\n\nThese combined auditory elements suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M001.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There might be a hint of a tear in their voice or a subtle change in pitch and volume that usually reflects sadness. Additionally, the speaker might take a moment to pause before speaking, suggesting contemplation or sorrow. The emotional delivery can also be slow-paced, reflecting a more somber demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M006.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their struggle with financial constraints, as indicated by the expression 'I can't afford to go back to school.' This statement conveys a sense of helplessness or distress due to inability to financially support educational pursuits."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch, faster pace, and possibly some hesitations or stuttering, indicating they are upset or agitated about the situation discussed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_M015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their loud and emphatic speech, including a raised voice and a longer pause at the beginning of the sentence. Additionally, there's a noticeable tremble in their voice, suggesting irritation or agitation. The emotional intensity and loud speaking style further support this interpretation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F008.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone likely reflects frustration due to the context of not being able to fill out a form, as indicated by the emotion-related words 'frustrated' and 'angry'. Additionally, there might be a sense of helplessness or annoyance conveyed through the tone, especially if the speaker sounds irritated about having to ask someone who is busy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M031.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating increased vocal intensity and a lack of control over their voice. There's also a noticeable tremble in the voice, suggesting a heightened emotional state. Additionally, the short, choppy manner of speaking and the quick pace can further emphasize feelings of anger and frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M005.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent emotional expressions like crying or laughter. The pace and volume of the speech remain consistent, indicating a lack of emotional fluctuations. There are no noticeable pauses or hesitations, suggesting the delivery is smooth and composed. The steady rhythm and pitch suggest a calm and neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F013.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker expresses frustration through their tone, which likely sounds irritated or angry, especially when they mention the fish dying. The heightened pitch and quicker pace of speech can indicate an increase in frustration. Additionally, there may be instances of pauses or hesitation, suggesting that the speaker is struggling to articulate their thoughts on the subject."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_F014.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and volume of speech, lack of heavy breathing or sighing, and a consistent tone without any strong emotional fluctuations. There might be subtle variations in pitch due to normal speech patterns but no distinct signs of crying, laughter, or other emotional expressions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M032.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and emphatic pronunciation. There's also a noticeable lack of pauses and a energetic tone, reflecting an excited mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M009.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery includes pauses and a sigh, indicating a sense of longing or disappointment. There's also an emphasis on certain words ('you know', 'it'), suggesting deep contemplation or frustration. Additionally, the mention of not coming back and the belief that the person might still have hope ('we believed with them') adds layers of complexity and emotional depth to the sadness expressed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M033.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no discernible changes in pitch or intensity. There are no emotional cues such as crying or laughter, and the pace of speech is steady. The use of filler words like 'um' indicates a normal speaking pattern without any signs of distress or discomfort."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M039.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or volume. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F036.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through a forceful and rapid speech pace, loud and aggressive tone, and by emphasizing certain words with heavy stress. There's also noticeable trembling in the voice, indicating strong emotions. Additionally, there's an audible display of frustration through interrupted speech and crying out."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F028.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's sadness is indicated through a soft voice, slow speech rate, and tears in her eyes while speaking. The emotional delivery suggests a sense of sorrow or grief."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F053.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in her voice. There's an absence of any signs of distress or sadness, indicating a positive emotional state. The brief and frequent pauses suggest she is comfortable and at ease while speaking. Furthermore, the lightness in her voice and the softening of her speech indicate contentment and joy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F008.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and soft vocal expressions. The sniffle indicates a possible emotional response of distress or sorrow. There's also an increase in volume towards the end of the first sentence, suggesting heightened emotions. Additionally, the use of filler words like 'I don't know' suggests uncertainty and distress, contributing to the overall sad mood of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_F000.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong positive or negative emotions such as laughter or crying. The tone is even and there are no significant changes in pitch or stress. Overall, the speaker maintains a calm demeanor throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_F010.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through a stern and displeased tone, indicating anger or annoyance towards the situation being discussed. The harsh choice of words and the quick pace of speech further emphasize this emotion. There are also instances of pauses and raised voices, which suggest irritation or exasperation. Additionally, the speaker's decision to pretend something is happening when it's not, along with her dismissive attitude towards others' opinions, indicates a strong sense of frustration or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits a range of emotional cues that suggest frustration:\n\n1. Crying sound: The presence of tears indicates an emotional state of distress or frustration.\n2. Laughter: The laughter heard towards the end of the clip may indicate a moment of release from tension or frustration.\n3. Changes in tone: There's a noticeable shift from a neutral to a slightly irritated tone as the speech progresses.\n4. Speech rate: The speed of speech might pick up, reflecting an escalation of frustration or agitation.\n5. Pauses: The occasional pauses could imply a struggle to maintain composure or think clearly under stress.\n6. Emphasis and stress: The heightened pitch and volume of certain words suggest areas of particular frustration or anger.\n7. Voice trembling: A quivering voice can be an indicator of inner turmoil and emotional distress, often associated with frustration or anger.\n\nOverall, these auditory indicators combine to paint a picture of a person experiencing frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F046.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain explicit indicators of happiness such as laughter or upbeat tempo; however, there's a sense of resolution and acceptance in the speaker’s voice, possibly suggesting a positive turn of events or making peace with a situation. The fact that she speaks softly ('low pitched') might indicate contemplation or calmness, while the sigh at the end could imply a release from tension or sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro06_M017.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and emotional delivery. The sigh indicates a sense of weariness or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_F015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and quickened pace, indicating they might be upset or agitated about the situation. The fact that they're male and in the 16-25 age range suggests typical traits of frustration in young men. Additionally, there's a noticeable instance of them sighing, which often conveys feelings of annoyance or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F013.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion can be observed through their steady pace and normal volume. There are no signs of agitation or distress; the speech is slow-paced and deliberate, indicating a calm demeanor. The speaker maintains a level tone throughout, without any noticeable variations in pitch or intensity. Additionally, there are no discernible emotional cues such as sighs, laughter, or crying sounds, further supporting the perception of a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M025.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch and quicker pace, possibly indicating anger or agitation. Additionally, there may be instances of pauses or hesitations, suggesting indecision or frustration. The speaker's voice may also tremble, further amplifying the sense of distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, with no noticeable changes in pitch or volume. There are no discernible emotional cues such as crying, laughter, or voice trembling. The pace of speech is steady, indicating a calm and composed demeanor. Pauses are occasionally present, but they do not add any particular emotional emphasis to the speech. Overall, the neutral tone suggests a calm and balanced attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_F005.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and quickened pace, indicating irritation or agitation. The emotional heightened state is evident from the loud and emphatic speech, along with the crying sound that suggests a strong feeling of being overwhelmed or upset. The changes in tone and the speed at which they speak indicate a lack of patience and a heightened emotional state. Additionally, the pauses and the way the speaker emphasizes certain words suggest a struggle to maintain composure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense and possibly irritated. The situation described involves an unexpected event (a window breaking) that occurred at a specific time (four o'clock), which could be causing stress or annoyance. Additionally, there might be a sense of urgency or frustration in the speaker's voice due to the immediate reaction they had upon waking up and seeing the aftermath."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F011.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is persistent and unyielding, which often indicates frustration or irritation. There is also a noticeable lack of variation in pitch and a slightly elevated volume, suggesting an increase in agitation. Furthermore, the presence of crying sounds and a sniffle indicates that the emotion is poignant and charged with frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F014.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone with noticeable pauses and a slower speech rate. There are instances of sniffing, indicating sadness or distress. The emotional delivery seems subdued and perhaps resigned, reflecting the overall sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M010.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent emotional expressions like crying or laughter. The pace and volume of the speech remain consistent, indicating no significant changes in mood. There are no discernible pauses or hesitations, suggesting smooth and composed delivery. The emphasis and stress are evenly distributed, further supporting the neutral emotion conveyed. Additionally, there's no indication of voice trembling or other physical signs of distress, reinforcing the perception of a neutral attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F005.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a variety of vocal and non-verbal cues. The sigh indicates a sense of weariness or disappointment. Additionally, the slow pace and low pitch of the voice convey a feeling of melancholy. There's also a noticeable hesitation before speaking, which might suggest contemplation or distress. Furthermore, the emotional tone wavers slightly, contributing to the overall feeling of sadness. The softness and possible tremble in the voice suggest a depth of sadness and emotional vulnerability."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M043.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or body movements. The heightened pitch and quicker pace indicate excitement. Also, there might be a brief hesitation before speaking which could suggest anticipation or excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M013.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable tremble in her voice, which further supports the emotion of anger. Additionally, the context of the sentence suggests an intense or frustrating situation, contributing to the overall angry mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F021.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocalizations like 'uh' and 'oh.' There's also a noticeable increase in pitch and volume towards the end of the sentence 'you make it uh you know right oh.' Additionally, the use of exclamation marks ('!') suggests strong feelings of excitement or surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F008.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a sense of urgency and frustration, indicating they are emotionally charged. There is an evident pause before the speech which might suggest contemplation or hesitation. Additionally, the emphatic way of saying 'there is no point' suggests strong feelings of disappointment or anger. The fact that the speaker is female and her English is not perfect also adds layers of complexity and authenticity to her emotional expression."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F023.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocal颤抖 or fluctuation in pitch. The use of exclamation marks like 'Oh' and 'horrible' also conveys a sense of astonishment or thrill, contributing to the overall excited mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F026.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, indicating emotional distress or sorrow. The prolonged pauses between words suggest hesitation or deep thought, enhancing the sense of sadness. Additionally, there might be a hint of crying or sobbing, although these specific vocalizations are not directly mentioned, they are implied by the emotional state conveyed through the voice."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M030.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense or irritated. Additionally, there may be instances of pauses or hesitation, suggesting they are struggling to articulate their thoughts clearly. The emotional state of the speaker could also be inferred through vocal indicators like voice trembling or changes in pitch and volume."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_M018.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they may be upset or agitated about the situation being discussed. The presence of crying sounds suggests an emotional response that contributes to the overall sense of frustration. Additionally, the quick pace and choppy manner of speaking further emphasize the speaker's distress and frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M004.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's sadness can be inferred from their slow pace and low tone, indicating a lack of energy and possibly a feeling of resignation or disappointment. The deliberate slowing down of speech suggests a desire to convey a sense of sorrow or disheartenment. Additionally, there might be a hint of struggle in maintaining a steady pace, further emphasizing the emotional weight of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M020.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocal trembles indicating heightened emotions. There's also a noticeable lack of pauses between words, suggesting a eagerness to communicate."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M027.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slower pace, and low tone. The emotional delivery is slow-paced and heavy, indicating a sad mood. There's also an audible sniffle, reinforcing the sadness conveyed through the vocal expressions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M032.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, indicating emotional distress or sorrow. The slow pace and low pitch of the voice further emphasize the sad mood. Additionally, there's a noticeable hesitation before speaking, which could suggest contemplation or sadness. Furthermore, the content of the speech mentions being with someone else, which might imply a longing or absence of companionship contributing to the overall feeling of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M020.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues indicative of fear. Firstly, there is an instance of crying, which is a common response to distress or fear. Additionally, the speaker's voice may sound shaky or uncertain, reflecting a lack of confidence or fearfulness. There might be hesitations or pauses in her speech, suggesting she is unsure or scared. Furthermore, the pitch and volume of her voice could fluctuate, possibly indicating anxiety or panic. Lastly, the emotional context provided does not suggest a positive sentiment, reinforcing the idea that fear is present."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M036.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration through their vocal expressions and body language. The sigh indicates a sense of weariness or disappointment, while the harsh tone and loud speaking volume suggest anger or frustration. Additionally, the rapid pace and possibly interrupted speech pattern further emphasize feelings of urgency or agitation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F015.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain explicit indicators of excitement such as crying, laughter, or vocal changes typical for excited states. However, the energetic delivery and slightly quickened pace of the speech may convey a sense of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F018.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in pitch or loudness. There are no discernible crying sounds, laughter, or other emotional indicators. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M002.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The speaker's voice may sound shaky or unsure, reflecting the state of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M012.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a sense of urgency and frustration, indicated by the quick pace and loudness of their speech. There are also instances of pauses and repeated phrases, suggesting irritation or difficulty in conveying their thoughts. The emotional delivery includes elements like sighing and crying, which further emphasize feelings of annoyance and distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M035.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their slow pace and low tone, indicating a lack of energy and possibly disappointment or grief. The prolonged pause before speaking ('Umm') further emphasizes their emotional state. Additionally, there's a hint of voice trembling which could suggest distress or vulnerability."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M000.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, indicating emotional distress or sorrow. The sigh at the beginning of the speech further emphasizes this emotion. There's also a noticeable pause before the speaker reveals the news about going to Iraq, suggesting contemplation or sadness associated with this revelation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F009.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a noticeable sadness, evident from the slow pace and low pitch of her speech. There is an audible sniffle, indicating she might be upset or emotional. Additionally, the speaker's tone appears to be subdued and perhaps melancholic, reflecting her sad mood. The presence of these vocal indicators suggests that she is feeling sad."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M027.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits intense excitement through their passionate and rapid tone, loud volume, and emphatic speech. The crying sound indicates a deep emotional response, while the laughter suggests amusement or joy. There's also a noticeable pause before the laughter, adding to the dramatic effect. The overall energy and modulation of the voice convey a sense of eagerness and excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F013.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, with a clear elevation in pitch and possibly some vocalizations like sighs or laughter. There may also be instances of heightened volume and a more animated delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M036.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, rapid and forceful speech, and a tense tone. There's also an indication of irritation and dissatisfaction conveyed through their words."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M003.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's sadness is evident through their low and slow tone, the use of filler words like 'no' and 'of course,' and the sigh at the end of the sentence. The prolonged pause before the sigh also indicates a sense of sorrow or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F000.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain explicit indicators of excitement such as crying or laughter. However, there is an increase in pitch and a faster speaking rate, which usually indicate excitement or agitation. Additionally, the use of words like 'okay' and 'news' suggests that the speaker might be sharing some exciting or important information."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M007.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slow pace, and low tone. The sigh indicates a sense of weariness or disappointment. There's also an emotional pause before they start speaking, suggesting contemplation or grief."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M030.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, lacking any discernible changes in pitch or intensity. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M021.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid increase in pitch at the word 'perfect'. This change in tone indicates strong feelings of excitement or elation. Additionally, there might be a subtle tremble in the voice, suggesting a surge of emotions during the spoken phrase."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F032.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement. Firstly, there is a joyful and energetic tone from the start until the end of the speech, reflecting a sense of elation. Additionally, the pace of the speech is relatively fast, with a speaking rate of 148.0 bpm, indicating heightened excitement or agitation. Furthermore, there are instances of laughter, which can be heard intermittently throughout the speech, contributing to an overall atmosphere of mirth and exuberance. The speaker also exhibits vocalizations like sighs and laughter, which often accompany feelings of excitement or joy. Moreover, the voice trembles slightly during certain parts of the speech, adding a layer of emotional depth and sincerity to the excitement conveyed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F033.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a heightened pitch, faster pace, and a forceful delivery. There's also an indication of irritation and agitation in her voice, reflecting a heightened emotional state consistent with anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their emotional state, including crying and a change in tone towards the end of the speech, suggesting an escalation of emotions. The mention of 'life will start' in a hopeful or optimistic light may indicate a contrast with the frustration experienced earlier."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F009.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might be a noticeable increase in volume and possibly some hesitations or pauses that suggest excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_F024.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong positive or negative emotions like happiness or sadness. The tone is even and there are no discernible inflections indicating stress or relief. Also, there are no audible cues such as sighs, laughter, or crying sounds that could indicate emotional fluctuations."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M006.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several key emotional indicators that suggest excitement:\n\n1. Laughter: The speaker's laughter indicates amusement or joy.\n2. High-pitched voice: A higher pitch often conveys excitement or agitation.\n3. Speech rate: The quickened pace of speech suggests excitement or enthusiasm.\n4. Emphasis and stress: The heightened pitch and quicker pace of speech suggest an excited state.\n5. Voice trembling: Although subtle, the trembling voice may indicate nervousness or excitement.\n\nThese elements combined create an atmosphere of excitement within the audio."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M037.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain explicit indicators of happiness such as laughter or upbeat tempo; however, the tone is calm and neutral, suggesting a composed demeanor. There's no particular emphasis or stress on specific words, indicating a relaxed delivery. The pace of speech is slow but steady, contributing to an overall sense of tranquility. Since there are no discernible negative emotions or crying sounds, we can infer that the speaker might be expressing contentment or satisfaction."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F016.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement. Firstly, there is a rapid increase in pitch and a heightened volume at the beginning of the speech, suggesting an excited or passionate state. Additionally, the use of exclamation marks like 'Oh' and 'Wow' indicates strong feelings. Furthermore, the energetic delivery and possibly overlapping words suggest excitement. Lastly, the short duration of the exclamation marks might indicate urgency or excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F022.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating her emotional state. The delivery is slow-paced and the voice may tremble slightly, suggesting distress or sorrow. There might be a hint of unintentional pauses or hesitations in her speech, further emphasizing her sadness. Additionally, if there were any audible tears or sniffles, it would support the argument that she is feeling sad."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M028.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or facial expressions. The mention of an unexpected encounter in Africa could also contribute to the excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F017.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery indicates she is upset or sorrowful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F031.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The use of expletives further emphasizes the intensity of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F021.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There's also a noticeable emphasis on certain words, suggesting strong feelings. The shortness and quick pace of the speech further contribute to the angry mood. Additionally, there might be some instances of pauses or hesitation, which could be indicative of frustration or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely includes a heightened pitch and possibly a faster speaking rate, indicating irritation or agitation. Additionally, there may be instances of pauses or hesitations, suggesting they are struggling to maintain composure or find the right words. The emotional tone, possibly tinged with sadness or annoyance, aligns with feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F017.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried speech, indicating they are likely upset or agitated about a past event. The repetition of 'you' suggests an emotional connection with the listener, possibly blaming them for something that happened. Additionally, there's a mention of not seeing a 'grunion,' which might be a minor detail but adds to the overall sense of frustration by introducing an unexpected element into the conversation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F009.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and an emphatic 'Oh' when mentioning someone likes something, indicating a joyful or pleased demeanor. There's also a noticeable lack of tension or strain in the voice, suggesting comfort and positivity. Additionally, the light-hearted laughter indicates amusement or contentment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M029.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, faster pace, and a forceful tone. There's also an indication of irritation and agitation in her voice."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, as indicated by the loud and emphatic speech, crying out, and a sharp increase in pitch at the end of the sentence ('this isn't rocket science this is just fish'). The heightened emotional state is conveyed through a combination of vocal expressions like crying and shouting, as well as non-verbal cues such as the change in tone and pitch."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F014.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio indicates that the speaker is excited through various vocal expressions like a fast speech rate, loudness, and a rising pitch at the end of the sentence 'Sure why not.' Additionally, there's a noticeable hesitation before the answer, which could suggest contemplation or excitement. The use of contractions ('can't') also adds a sense of urgency or eagerness to the question."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F010.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any prominent signs of emotion such as crying or laughter. The pace and volume of her speech remain consistent, indicating no significant changes in mood. There's no discernible stress on particular words or phrases, supporting the idea of a neutral emotional state. Furthermore, the lack of vocal tics or hesitations suggests she is trying to maintain composure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their emotional tone, which likely includes a heightened pitch, faster pace, and possibly some hesitations or pauses. The mention of not wanting the other person to give up or move away suggests an emotional attachment that's being tested or strained. Additionally, there might be a hint of desperation or pleading in the speaker’s voice, further indicating feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F000.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong positive or negative emotions like happiness or sadness. The tone is even and there are no noticeable pauses or hesitations. However, the presence of a sniffle indicates a subtle emotional response, possibly indicating sadness or vulnerability under the neutral exterior."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F028.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slower pace, and low tone. The emotional delivery is slow-paced and heavy, indicating sadness. There's also an instance of laughter, which might suggest a complex emotional state mixing with sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M028.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and hurried tone, along with instances of sighing and a rushed speech pattern. There's also an indication of frustration through the use of filler words like 'I don't know,' reflecting a sense of uncertainty or trouble. The emotional state of the speaker seems to be one of distress or annoyance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud manner of speaking, indicating they are emotionally charged. The presence of crying or sobbing suggests an intense feeling of distress or anger. Laughter, although not continuous, indicates a momentary release of tension or frustration. The quickened pace and hesitations in speech suggest a sense of urgency or agitation. Additionally, the emphasis on certain words and the overall loudness of the voice further amplify the feeling of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F035.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an increase in pitch and volume. There's also a noticeable pause before she speaks, suggesting contemplation or frustration. The emotional intensity may be further inferred from the expression 'you could go into the computer and you know,' where the repetition and emphasis on 'you know' can indicate irritation or annoyance. Additionally, the potential presence of crying sounds or a change in pitch and volume later in the speech could further support the interpretation of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M016.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any prominent signs of joy or distress. There are no audible cues of laughter or crying, and the speech rate is steady, indicating a calm and composed demeanor. The pause between the first and second phrases does not suggest any particular emotion but rather indicates a natural pause in speech delivery. Emphasis and stress are evenly distributed, contributing to an overall sense of neutrality. Additionally, there's no vocal tremble or other physical indicators of strong emotions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F038.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's expression of anger is through an elevated volume, a faster speaking rate, and a forceful tone. There are also instances of loud sobbing and shouting, which indicate strong feelings of anger and frustration. The continuous and emphatic speech further emphasizes her anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F019.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a noticeable sadness, evident from the slow pace and low pitch of her speech. There is an audible sniffle, indicating she might be crying, which is a strong indicator of sadness. Additionally, the speaker's tone appears subdued and melancholic, reflecting her sad mood. The prolonged pause before she speaks further emphasizes the sorrowful atmosphere."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F023.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, with no discernible changes in pitch or pace. There are no emotional cues such as crying or laughter, and the voice does not tremble or show any other signs of distress. The calm and composed manner of speaking indicates a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M019.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their raised and forceful tone, indicating anger or annoyance. The prolonged pause before speaking suggests hesitation or difficulty in managing emotions. Additionally, there's a noticeable increase in pitch and volume, which further emphasizes feelings of irritation or agitation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M005.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. There are no discernible crying sounds or laughter, indicating emotional stability. The pace and rhythm of the speech are regular, without any noticeable speeding up or slowing down. Slight variations in pitch can be perceived, contributing to the subtlety of the neutral mood. There are no pauses or hesitations in the speech, suggesting smooth and composed delivery. Overall, these auditory cues suggest that the speaker's emotions remain neutral."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F028.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of happiness, including:\n\n1. Laughter: The speaker's laughter indicates amusement and joy.\n2. Changes in tone: There is a noticeable shift from a neutral to a happy tone when the speaker begins speaking about their brother-in-law.\n3. Speech rate: The speaker's speech rate increases slightly, suggesting excitement or happiness.\n4. Pauses: The speaker takes a brief pause before mentioning their brother-in-law, which could indicate they are thinking happily about the topic.\n5. Emphasis: The speaker places extra emphasis on the word 'brother-in-law,' possibly indicating affection or pride for him.\n6. Stress: There is no noticeable stress in the speaker's voice, contributing to an overall happy demeanor.\n\nOverall, these audio features suggest that the speaker is experiencing happiness while discussing their brother-in-law."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M000.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch, faster pace, and possibly some vocal disruptions like sighs or hiccups. There may also be a temporary pause before they continue speaking, indicating a moment of internal conflict or frustration. Additionally, the speaker's choice of words and phrasing suggests an intent to blame or accuse, further amplifying the sense of frustration conveyed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F033.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and tears in her voice. The emotional delivery indicates she is upset or sorrowful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M007.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they are eager or agitated. The use of filler words like 'of course' and 'naturally' suggests impatience and annoyance. Additionally, there's a mention of crying sounds, which could be an emotional release linked to frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F008.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or speech rate. There are no emotional cues such as crying, laughter, or voice trembling. The pauses between words are consistent, indicating normal speech patterns without any signs of distress. Emphasis is on the content being discussed rather than emotional expression."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their raised volume, faster pace, and harsher tone, indicating irritation or anger about the subject being discussed. The emotional state is further indicated by the presence of crying sounds, which suggests a strong emotional response."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M000.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Voice trembling: A quivering or shaky voice indicates nervousness or fear.\n2. Changes in tone: The speaker's voice may fluctuate, rising or falling in pitch, reflecting anxiety or panic.\n3. Speed variations: The pace at which the speaker speaks can be irregular, suggesting they might be rushed or scared.\n4. Pauses: Incomplete sentences or hesitations ('Umm') suggest uncertainty or fear.\n5. Emphasis: Stressing certain words or phrases ('saw it') can reveal fear or concern.\n6. Laughter: Although not explicitly mentioned, laughter could imply nervousness or discomfort under fear.\n\nThese elements combined give an impression of the speaker being fearful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F038.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a variety of vocal and non-verbal cues. The sniffle indicates a possible emotional response, often associated with sadness or distress. Additionally, the sigh at the end of the sentence 'I'm gonna miss you so much' conveys a sense of longing or sorrow. The tone of voice can also be perceived as subdued or melancholic, reflecting the emotional state of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M021.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues that indicate fear:\n\n1. Crying sound: There is a noticeable tearing up or sobbing sound in the speaker's voice, which is often associated with distress or fear.\n2. Changes in tone: The speaker's voice may fluctuate, possibly indicating anxiety or fearfulness.\n3. Speech rate: The speaker might speak quickly or hesitantly, reflecting their emotional state.\n4. Pauses: The presence of pauses in speech can suggest hesitation or fear.\n5. Emphasis and stress: The speaker may place extra emphasis on certain words, indicating worry or concern.\n6. Voice trembling: A trembling voice is often a sign of fear or nervousness.\n7. Other emotional characteristics: Other non-verbal indicators of fear could include changes in body language, increased heart rate, or rapid breathing.\n\nOverall, these features combined suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M002.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Changes in pitch and volume: The speaker's voice may fluctuate, rising or falling in pitch, indicating distress or anxiety.\n\n2. Speed variations: The pace at which the speaker speaks can be quick, reflecting a sense of urgency or fear.\n\n3. Tense vocal cords: There might be involuntary tensing of the vocal cords, leading to a strained or harsh voice quality.\n\n4. Exaggerated articulation: The speaker may have difficulty enunciating words clearly, possibly due to fear or nervousness.\n\n5. Pleading tone: The speaker's voice may carry a pleading quality, suggesting they are seeking understanding or help from others.\n\n6. Emotional cues: The presence of crying or sobbing indicates strong emotions of distress or fear.\n\n7. Stress on certain syllables: The speaker may place extra stress on certain syllables, reflecting their fear or anxiety about a situation.\n\n8. Pauses and hesitations: The speaker may pause frequently or hesitate before speaking, which could indicate uncertainty or fear.\n\n9. Voice trembling: A trembling voice suggests that the speaker is experiencing intense fear or nervousness.\n\nOverall, these auditory cues combined suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_F020.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating emotional distress. The slow pace and low pitch of the voice suggest a sense of sorrow or disappointment. Additionally, there may be instances of pauses or hesitations, which often accompany sadness in speech. Furthermore, the speaker's choice of words and the overall content of what they say may convey a feeling of heartache or disillusionment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F006.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a sense of urgency and frustration. There is an evident pause before she speaks, indicating contemplation or frustration. The heightened pitch and quicker pace of her speech convey a feeling of being overwhelmed or impatient about not being able to find a job. Additionally, there is a noticeable tremble in her voice, further amplifying the sense of distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M024.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and emphatic pronunciation. There's also a noticeable lack of pauses and a continuous flow of speech, which contributes to the overall sense of eagerness. The energetic delivery and enthusiastic tone further support this interpretation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M039.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, accelerated speech rate, and a relaxed pause at the beginning of the speech. Additionally, there's a light-hearted laughter indicated by the 'laughter' tag and a joyful emotion conveyed through the energetic delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F008.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, lacking any prominent emotional expressions like crying or laughter. There's no change in pitch or speed; the voice remains steady, indicating a calm and composed demeanor. Pauses are few and short, suggesting an efficient speaking style without any emotional hesitations. The choice of words and phrases suggests a professional attitude, aiming to convey factual information rather than displaying personal emotions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M014.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, faster pace, and a forceful tone. There's also an indication of frustration, as indicated by the use of expletives and the phrase 'I don't understand why'. The emotional intensity is further heightened by the presence of crying sounds and a sharp, questioning intonation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M029.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might be a temporary increase in volume or intensity, indicating heightened emotions. Additionally, any physical signs of agitation or increased heart rate could further support this assumption."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M037.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in his voice. There's an audible lightness in his voice, suggesting joy or contentment. Additionally, the lack of any signs of distress or frustration in his delivery further supports the inference of happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M051.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any prominent signs of strong emotions like happiness or sadness. There are no discernible crying sounds or laughter, and the pace and rhythm of speech are steady, indicating a calm and composed demeanor. The lack of vocal expressions like sighs or hiccups further supports the idea of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any discernible emotional fluctuations or cues. There are no instances of crying, laughter, or other emotional expressions mentioned. The pace and rhythm of the speech are standard, without any noticeable speeding up or slowing down. Slight variations in pitch may indicate normal speech patterns, but no distinct emotional stress or strain is present in the vocal delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M033.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a sense of urgency and frustration, indicating they are emotionally charged. There's a noticeable increase in speaking rate, which often reflects heightened emotions. Additionally, there are instances of pauses and hesitations, suggesting indecision or anxiety. The emotional state is further indicated by the presence of crying sounds, which usually indicate distress or sorrow."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud manner of speaking, indicating anger or agitation. The fact that they're crying and raise their voice further emphasizes this emotion. There's also a noticeable pause before they continue speaking, suggesting contemplation or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M024.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a faster speaking rate. There's also a noticeable hesitation before the word 'Really?' which indicates uncertainty or surprise. Additionally, the speaker's voice may sound tense or shaky, contributing to the overall sense of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M023.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any discernible emotional cues such as crying or laughter. The pace and volume of her speech remain consistent, indicating no significant changes in mood or intensity. There are no noticeable pauses or hesitations, suggesting she is speaking deliberately and coherently. The articulation is clear, with no signs of strain or struggle in enunciation. These factors suggest that the speaker's emotional state remains neutral throughout the conversation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F041.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a series of vocal expressions including a fast speech rate, loud and forceful tone, and frequent pauses indicating irritation or agitation. Additionally, there is an emotional display through crying, which serves as a clear indicator of anger. The emphasis on certain words and the shaking voice further support this interpretation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F003.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several key elements that suggest excitement:\n\n1. Crying sound: The presence of a crying sound indicates strong emotions, often associated with excitement or joy.\n2. Laughter: The laughter heard in the audio is another clear indicator of excitement or amusement.\n3. Changes in tone: There's an increase in pitch and volume towards the end of the sentence 'Yeah!' which suggests a heightened state of excitement.\n4. Speech rate: The quickened pace of speech towards the end of the sentence 'Yeah!' also contributes to the excitement conveyed.\n5. Pauses: The brief pause before the word 'Yeah!' might indicate hesitation or anticipation leading up to the exciting moment.\n6. Emphasis and stress: The emphasis on the word 'Yeah!' and the stress placed on the syllables 'yeah' and 'eh' further emphasize the excitement expressed.\n7. Voice trembling: Although not explicitly audible, the trembling in the voice could be inferred from the context, adding to the perception of excitement.\n\nOverall, these audio features combine to create a lively and enthusiastic atmosphere, strongly suggesting that the speaker is feeling excited."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F012.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration through their harsh, irritated tone, rapid and choppy speech pattern, and the use of dismissive and offensive language. The emotional turmoil is further indicated by instances of crying (sobbing), which suggests a deep level of distress or anger. Additionally, there's a noticeable increase in pace and volume towards the end, indicating an escalation of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F000.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Changes in pitch and volume: The speaker's voice may fluctuate, rising or falling in pitch, indicating distress or anxiety.\n\n2. Increased heart rate: The physical symptoms of an elevated heart rate can be reflected through vocal changes, such as rapid breathing or shallow breathing.\n\n3. Tense speech patterns: The speaker may experience difficulty in enunciating words clearly, using shorter or rushed phrases, and hesitating before speaking.\n\n4. Crying or sobbing: These are strong indicators of fear or distress, often reflecting intense emotions that are difficult to contain verbally.\n\n5. Voice trembling: A quivering or shaky voice suggests that the speaker is experiencing fear or nervousness.\n\n6. Changes in tone: The speaker's tone may become high-pitched, tense, or brittle, reflecting their emotional state.\n\n7. Pauses and hesitations: Fearful individuals often hesitate or pause before speaking, indicating uncertainty or anxiety about what they are saying.\n\n8. Emotional exhaustion: If the speaker seems drained or fatigued, it could indicate that they have been struggling with fear for a prolonged period.\n\n9. Body language: While not directly observable in the audio, changes in body language, such as fidgeting or hunching forward, can often accompany feelings of fear.\n\nBy analyzing these elements together, we can infer that the speaker is likely experiencing fear or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M005.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some hand gestures or physical movements. The heightened pitch and quicker pace indicate excitement. Additionally, there might be a sudden change in tone or a dramatic pause that contributes to the excitement conveyed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M001.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or body movements. The heightened pitch and quicker pace indicate excitement. Additionally, there might be a sense of urgency or agitation in the speaker's voice, further supporting the idea of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F035.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain explicit indicators of happiness such as laughter or upbeat tempo; however, the soft and gentle voice suggests a calm and peaceful demeanor, which can be associated with a happy or content state. The choice of words and the lack of any harsh or negative language also indicates a positive emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M025.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slow pace, low tone, and the use of filler words like 'I know I know.' Additionally, there's an emotional pause before the phrase 'you can't worry about me,' indicating concern or distress. The context of taking care of children also underscores a sense of responsibility and possibly sadness or burden."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F004.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a heightened pitch, faster pace, and loud, forceful delivery of the speech. There is also an indication of irritation and frustration, as reflected by the emotional state of the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M019.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The context where this phrase is said suggests an unexpected revelation or surprising information, contributing further to the perception of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M017.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. The pace and volume remain consistent, indicating a calm and composed delivery. There are no discernible emotional cues such as crying or laughter, and the speech does not fluctuate in pitch or intensity. Pauses are few and short, suggesting the speaker has thought through their words carefully."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M026.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio reflects excitement through an increased speech rate, louder volume, and a more animated tone. There are instances of emphatic pauses and a rise in pitch, which are common in moments of excitement. Additionally, there may be subtle vocalizations like sighs or gasps that could indicate excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F019.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocalizations like sighs or laughter. The heightened pitch and quicker pace indicate excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M013.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they may be upset or agitated about the situation being discussed. The presence of crying sounds suggests an emotional response, and the fact that the speech is interrupted by a sigh further emphasizes feelings of annoyance or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F018.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense and possibly irritated. Additionally, there may be instances of sighing or a change in pitch, reflecting an increase in frustration. The emotional delivery seems to convey a sense of annoyance or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F029.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and emotional drooping in the voice, indicating a tired or sad demeanor. The pauses between words suggest contemplation or sorrow. Additionally, there's a subtle hint of crying or sobbing based on the vocal quality."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M032.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, faster pace, and a tense tone. There's also an instance of him shouting which indicates strong emotions of anger. Additionally, his voice may sound shaky or unsure, reflecting the turmoil of his feelings."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F016.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery indicates she is struggling to maintain composure while speaking, which aligns with feelings of sorrow or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F018.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an increase in pitch and volume. There's also a noticeable pause before speaking, suggesting irritation or annoyance. The emotional intensity can be inferred from the rapid pace and loud delivery of the speech, further supporting the argument of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M011.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their aggressive tone, loud voicing, and fast pace. The presence of crying and shouting indicates strong emotions, while the emphasis on certain words and the overall loud manner of speaking further amplify this sentiment. Additionally, there's a noticeable lack of pauses, suggesting a rush to convey frustration or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M007.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating anger or annoyance. The emotional delivery includes a raised volume and possibly harsher articulation, reflecting an inability to control emotions. There may also be instances of interrupting or speaking quickly, further emphasizing feelings of agitation. Additionally, the presence of crying or sobbing suggests a deep level of distress or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F000.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain explicit indicators of excitement such as yelling, laughter, or rapid speech; however, the tone of voice can be considered as slightly elevated, suggesting a heightened emotional state. Additionally, there's a subtle hesitation before the word 'them,' which might indicate contemplation or anticipation, further supporting the idea of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F012.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a lower and slower speech rate, increased pauses between words, and a soft, possibly subdued voice. There may also be instances of throat clearing or sniffing, which could indicate distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F004.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and emphatic pronunciation. There's also a noticeable increase in vocal intensity and a slight tremble in the voice, which usually indicate excitement or agitation. Additionally, the emotional delivery seems to be charged with energy and enthusiasm, reflecting a sense of eagerness or anticipation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M023.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their firm, loud voice, which likely betrays a sense of desperation or urgency. The emphasis on not letting go and the willingness to make any sacrifice or change indicates an inability to control the situation, further amplifying the emotion of anger. Additionally, there may be signs of frustration, as indicated by the repetition of phrases like 'I can't let you go' and the hurried pace of speech, which together with the crying sound, suggest a deep-seated emotional turmoil."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_M009.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, as indicated by the loud, emphatic speech, crying out, and heavy breathing. The urgency in the voice suggests a heightened emotional state, possibly bordering on anger or agitation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M027.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo; however, the use of the word 'like' typically indicates a casual or informal speech style, which can be perceived as friendly and positive. The absence of negative words or strong emotional expressions also contributes to a neutral or happy mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M001.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, lacking any prominent emotional expressions like crying or laughter. The pace and volume of her speech remain consistent, indicating no significant changes in mood or intensity. There are no discernible pauses or hesitations, suggesting smooth and composed delivery. She does not emphasize certain words or phrases, reinforcing the perception of neutrality. Overall, the audio lacks any discernible emotional cues that would suggest the speaker is feeling anything but neutral."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_F002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they are likely upset or agitated about being in line. The fact that they apologize suggests a level of distress or annoyance, while the emotional delivery may include instances of sighing, which often conveys feelings of frustration or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_F023.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and quickened pace, indicating irritation or annoyance. The heightened pitch and volume suggest an emotional state of agitation. Additionally, there's a noticeable pause before the speaker continues, which might indicate they're struggling to maintain composure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M021.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely sounds tense and possibly raised. There may be instances of hesitation or stuttering, indicating difficulty or irritation. Additionally, if there are crying sounds present, it could further emphasize the speaker's emotional state of distress or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_2_F015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, particularly through their loud and emphatic speech style. The heightened pitch and urgency in their voice suggest a state of agitation or anger. Additionally, there's a noticeable wail at the end of the sentence, which amplifies the sense of distress. Furthermore, the long pause before the wail indicates a moment of heightened emotion before it is unleashed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they are upset or agitated about the situation being discussed. The use of filler words like 'really' and 'messed up' emphasizes their dissatisfaction. Additionally, there's a noticeable pause before the speaker continues speaking, suggesting they are struggling to find the right words or taking a moment to compose themselves."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F019.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits a range of emotional cues that indicate excitement. There's an emphatic increase in pitch and volume, suggesting heightened intensity and passion. The quick pace and possibly irregular breathing pattern (shorter inhales and exhales) align with excitement or agitation. Additionally, the use of exclamation marks ('Oh!' and 'Yeah!') reinforces the sense of excitement. Furthermore, the emotional state may be further inferred from the context where this speech was recorded; if it was during a celebratory event or a thrilling moment, these elements would amplify the excitement conveyed by the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F018.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and soft vocal expressions. The tears in her voice indicate emotional distress, and the subdued manner of speaking suggests a lack of energy and cheerfulness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F029.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in their voice. There's an absence of any signs of distress or frustration, indicating a positive emotional state. The use of words like 'ah' and 'we're' convey a sense of unity and contentment, further enhancing the feelings of happiness conveyed by the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F005.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a likely increase in pitch and volume at the word 'Really?' indicating surprise or excitement. There's also a noticeable absence of any negative emotions or sighs, supporting the idea of the speaker being happy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_F016.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they are upset or agitated about the topic being discussed. The use of filler words like 'obviously' suggests irritation or annoyance. Additionally, there's a noticeable increase in speaking rate, which usually occurs when someone is anxious or angry. Furthermore, the emotional state of the speaker is likely indicated by a raised volume and possibly some vocal disruptions like sighs or hiccups, all of which contribute to a sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M050.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain any explicit indicators of excitement such as yelling, screaming, or laughter. However, the rapid pace and slightly upbeat intonation of the speech suggest a sense of enthusiasm or eagerness. Additionally, there's a slight hesitation before the word 'together,' which might indicate contemplation followed by excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M019.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits signs of anger through their raised volume, rapid pace, and forceful delivery. The intensity and sharpness in their voice suggest irritation or fury. Additionally, there's a noticeable narrowing of the eyes mentioned, which often accompanies anger in social interactions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M013.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid increase in pitch at the word 'Oh.' This indicates heightened emotionality. Additionally, there's a noticeable pause before the word 'I,' which could suggest contemplation or anticipation leading up to the expression of excitement. Furthermore, the use of 'I know' in a confident tone might emphasize the speaker's current state of excitement or knowledge about the subject being discussed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M038.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid increase in pitch at the word 'home'. This modulation in voice intensity and speed suggests a heightened level of enthusiasm or anticipation regarding going home."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F021.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely sounds tense and possibly irritated. There may be instances of pauses or hesitation, suggesting they are struggling to maintain composure or think clearly. Additionally, there might be a noticeable change in pitch or volume, which could indicate feelings of agitation or annoyance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M036.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in pitch or volume. There are no discernible crying sounds, laughter, or other emotional indicators. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F034.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, faster speaking rate, and a relaxed pace. There's an absence of harshness or strain in her voice, indicating she's comfortable and content. The smiling while speaking further emphasizes her happy disposition."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F003.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement:\n\n1. Crying sound: There's a noticeable crying sound at the beginning of the audio, which often indicates strong emotions such as joy or excitement.\n2. Laughter: The laughter heard later in the audio contributes to an atmosphere of excitement and happiness.\n3. Changes in tone: The speaker's tone starts neutral but rises to a higher pitch, conveying feelings of excitement and elation.\n4. Speech rate: The faster pace of speech towards the end of the audio also reflects heightened excitement.\n5. Pauses: The brief pause before the laughter indicates a moment of anticipation or build-up to the exciting moment.\n6. Emphasis and stress: The heightened pitch and quicker pace of speech suggest that the words are being emphasized and stressed, contributing to the overall sense of excitement.\n7. Voice trembling: Although subtle, the slight tremble in the voice can be detected, which usually occurs when someone is experiencing intense emotions like excitement.\n8. Other emotional characteristics: The combination of these emotional features creates a holistic picture of excitement throughout the audio.\n\nOverall, the audio demonstrates a range of emotional responses that contribute to an atmosphere of excitement and happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F030.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The intonation likely rises, suggesting an element of astonishment or amazement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M031.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio exhibits several features that indicate excitement. Firstly, there's an increase in the pitch and volume of the voice, suggesting heightened energy or agitation. Additionally, there are instances of laughter, which is often associated with joy or excitement. Furthermore, the brief silence between the words 'yeah' and 'and now you're getting married' could imply anticipation or eagerness for the upcoming event. Lastly, the rapid pace and slightly rushed manner of speaking might also suggest excitement or nervousness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_F009.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of happiness such as a joyful tone, light-hearted laughter, and a quickened speech rate towards the end. There's also an audible smile in the speaker’s voice. Additionally, the brief pause before the laughter indicates a moment of contemplation or anticipation leading to the happy expression."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F047.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo; however, the tone is light-hearted and positive, suggesting a relaxed and cheerful demeanor. The soft and gentle voice further supports this perception of a happy mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F018.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or speech rate. There are no crying sounds or laughter detected. The delivery is straightforward without any pauses or hesitation, indicating a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F033.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a noticeable undercurrent of complaint and frustration. There is an evident pause before she speaks, indicating contemplation or hesitation. The repetition of 'I always think so' suggests a recurring thought pattern, possibly reflecting ongoing frustration. Additionally, the sigh at the end of her speech conveys a sense of weariness or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M004.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio reflects excitement through an increase in speech rate, louder and more forceful delivery, and elongated vowels indicating heightened emotions. There's also a noticeable pause before the speaker begins talking, suggesting anticipation or build-up. Additionally, the use of exclamation marks ('Oh!') and the word 'good' in an upbeat tone further emphasizes the excitement conveyed by the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F008.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a faster speaking rate. There's also an instance of crying, which indicates strong emotions. The intonation likely rises, suggesting an element of astonishment or amazement. Additionally, there may be hesitations or pauses in the speech, which could further emphasize the sense of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M011.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely includes a heightened pitch and possibly a faster speaking rate, indicating irritation or agitation. Additionally, there may be instances of pauses or hesitations, suggesting they are struggling to maintain composure or find the right words. The emotional state of the speaker may also be conveyed through vocal expressions like sighing or shouting, although these are not explicitly mentioned in the transcription provided."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M011.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, which includes pauses and a questioning tone suggesting curiosity or eagerness. The heightened pitch and volume of the voice also indicate excitement. Additionally, there are instances of laughter, further emphasizing the speaker's excited mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M031.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might be instances of heightened volume or intonation during key points, reflecting a sense of eagerness or agitation. Additionally, any signs of physical reactions such as increased heart rate or sweating could further indicate excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F023.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The audio contains several indicators of sadness:\n\n1. Crying: The presence of tears in the voice indicates a sad mood.\n2. Slow speech rate: A slower pace of speech often conveys sadness or melancholy.\n3. Emphasis on certain words: The repetition of 'Why?' and the sigh at the end suggest distress or sorrow.\n4. Changes in pitch and volume: The fluctuation in pitch and volume, especially the low pitch and soft volume towards the end, can evoke feelings of sadness.\n5. Use of filler words: The use of filler words like 'umm' and elongated 'ahs' indicates discomfort or sadness.\n\nOverall, these elements combined create a sad emotional tone throughout the speech segment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M029.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone likely reflects frustration due to the context of discussing financial responsibility for lost luggage, which may be a stressful or overwhelming topic for the individual. The use of filler words like 'um' and sighs ('exhale') could also indicate a sense of weariness or frustration. Additionally, the mention of a specific dollar amount for the reimbursement limit might suggest that the speaker feels limited or frustrated by the compensation offered."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F016.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their emotional state, evident from the crying sound and the tone of voice which likely reflects irritation or anger. The heightened pitch and quicker pace of speech also suggest a sense of urgency or agitation. Additionally, the emotional turmoil might be indicated by instances of pauses or hesitations, which could imply that the speaker is struggling to articulate their thoughts."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M002.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a faster speaking rate. There's also a noticeable hesitation before the word 'Why?' which indicates uncertainty or surprise. Additionally, the speaker's voice may sound tense or shaky, contributing to the overall sense of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M058.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in her voice. There's an absence of any signs of distress or sadness, indicating a positive emotional state. The brief and frequent affirmations like 'definitely not' contribute to a sense of confidence and positivity. Additionally, the light-hearted manner of speaking suggests she is happy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M016.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice's low tone, slow pace, and instances of pauses. There is also an evident emotional struggle, indicated by the strained quality of voice and perhaps some signs of crying or emotional agitation. The sigh at the end further emphasizes the sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration through their vocal expressions and body language. The sigh indicates a sense of weariness or emotional exhaustion, while the repetition of 'one' suggests a level of frustration or agitation, possibly counting down or emphasizing an point. Additionally, the soft and quiet voice further conveys a feeling of sadness or frustration, as it might be struggling to maintain composure or loudness amidst strong emotions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F003.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating emotional distress. The slow pace and low pitch of the voice suggest sadness. Additionally, there might be instances of pauses or hesitations, which often accompany feelings of sadness. Furthermore, the speaker's gender could influence how the sadness is perceived; for example, a female voice may convey sadness more subtly than a male voice."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and harsh tone, interrupted speech pattern, and the use of negative words like 'jealous imagination'. The crying sound indicates an emotional outburst, reinforcing the sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F010.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their emotional state, including crying and a change in their tone from a normal speaking pace to one that conveys distress or anger, particularly evident during the sigh at the end of the sentence 'on- oh come on.' The emotional turmoil is also indicated by the hesitation, as represented by the fillers 'uh' and 'um', and the emotional charge in the choice of words like 'frustration' and the sigh."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F017.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of joy or sorrow. There are no discernible crying sounds or laughter, indicating emotional stability. The pace and volume of the speech remain consistent, suggesting no strong feelings underlying the words spoken. Emphasis is evenly distributed, further supporting the idea of a neutral emotional state. Additionally, there's no evidence of voice trembling or other physical indicators of distress, reinforcing the perception of a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F028.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo; however, the use of 'thank you' typically conveys gratitude and positivity, which can be inferred as an expression of happiness. The tone is neutral but has a subtle undertone of satisfaction or contentment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M012.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with laughter indicating amusement and joy. There's an evident lightness in the voice, suggesting elation. Additionally, the pace of speech is brisk, further enhancing the sense of cheerfulness. The laughter and rapid speech rate suggest that the speaker is genuinely pleased. Furthermore, there are no signs of distress or sadness; rather, the overall mood conveyed is one of happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F027.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, lacking any prominent emotional expressions or variations in pitch. There are no discernible signs of laughter, crying, or other emotional responses that could indicate a non-neutral mood. The pace and delivery of the speech are regular and consistent, suggesting a level of composure and neutrality."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M012.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker's surprise can be inferred from their sudden and unexpected question, indicated by the quick pace and slightly elevated pitch of their voice. There may also be a temporary increase in vocal intensity and possible hesitations or pauses before the question is asked, suggesting they were caught off-guard. Additionally, the use of an open-ended question like 'Why didn't you?' implies curiosity and surprise about the lack of action taken by the person being addressed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M035.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There's also a noticeable emphasis on certain words, suggesting heightened emotional intensity. Moreover, the short and quick speech pattern along with occasional pauses can be observed, which aligns with angry mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M033.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. The intonation is likely to be high-pitched and there may be some hesitations or stuttering in the speech. Additionally, there might be a sudden widening of the eyes or a sudden change in body posture indicating surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_M031.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and a sense of urgency in their voice. There's also a noticeable lack of hesitation, indicating a direct and passionate delivery. The emotional intensity may lead to vocal strain, as evidenced by the speaker's voice trembling towards the end of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M022.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear. Firstly, there is a noticeable increase in the pitch and volume of the voice, which usually indicates distress or anxiety. Additionally, the presence of crying or sobbing suggests an emotional state of distress or sorrow. Furthermore, the hesitations ('Umm') and pauses ('I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I"
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_F016.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their aggressive tone, rapid speech rate, and loud voicing. There's also evidence of crying, indicating strong emotions. The way they raise their voice and emphasize certain words suggests deep-seated frustration or irritation. Additionally, the presence of a sniffle indicates that the speaker might be struggling to contain their anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F028.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in her voice. There's an evident lightness in her delivery, suggesting she's pleased or content. Additionally, the brief and casual manner of the interaction indicates a friendly and joyful atmosphere."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M030.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and emphatic pronunciation. There's also a noticeable lack of pauses and a energetic tone, which contribute to the overall sense of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M036.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might also be instances of sighing, which can indicate excitement or relief."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M005.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, faster pace, and a forceful tone. There's also an instance of yelling which indicates strong emotions. Additionally, the speaker's face might be flushed or tense, reflecting physical signs of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M017.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and hurried tone, along with instances of sighing and a raised voice, indicating heightened emotional distress or annoyance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F014.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery includes pauses and sniffles, indicating distress. The tone suggests a sense of longing or disappointment, reflecting the overall sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F020.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the audio, lacking any prominent changes in pitch or intensity. There are no discernible crying sounds, laughter, or pauses that could indicate a shift in emotion. The steady pace and regular rhythm suggest a level of composure and neutrality. However, without auditory cues like sighs or changes in volume, it's challenging to confirm absolute neutrality."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in pitch or volume. There are no discernible crying sounds, laughter, or other emotional indicators. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M022.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's voice carries a sense of frustration, primarily indicated by their struggle to find words, as evidenced by stuttering ('Umm, umm') and hesitation ('I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I"
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M026.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, faster speaking rate, and a relaxed manner. There are no signs of crying or laughter, but the overall light-heartedness in the voice indicates a happy mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M019.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits several key emotional indicators of frustration:\n\n1. In加大音量和语速： The speaker increases their volume and speaking pace, which usually indicates an escalation of emotions.\n2. 打断对方： The speaker interrupts the other person, which can be a sign of irritation or anger.\n3. 愤怒的语气： The speaker uses a tone that conveys anger or annoyance.\n4. 强调关键点： The speaker repeats and emphasizes certain phrases, suggesting they are frustrated or passionate about the topic.\n5. 眼睛瞪得圆圆的： This physical characteristic may indicate that the speaker is angry or frustrated.\n\nOverall, these elements combined suggest that the speaker is experiencing frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M007.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocalizations like sighs or screams that indicate intense feelings. The modulation of the voice, including changes in pitch and volume, also suggests excitement. Additionally, there might be some hesitations or pauses that do not disrupt the overall exuberance of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F020.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and rapid tone, along with loud and emphatic speech. The emotional distress is evident from the crying sound and the harsh, strained quality of voice. There's also a noticeable pause before the speaker continues, which further emphasizes the feeling of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M017.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a heightened pitch and quicker pace, indicating frustration. There are also instances of sighing, which further emphasizes their emotional state of distress. Additionally, the use of expletives such as 'fucking' suggests strong feelings of annoyance or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M039.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There is also a noticeable slowing down of speech pace, which usually indicates sadness or disheartenment. Additionally, there might be a hint of unintentional pauses or hesitations, further supporting the inference of sadness. The emotional delivery seems subdued and perhaps melancholic, which aligns with feelings of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M033.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be described as harsh and irritated, indicating anger. There is also a noticeable increase in the pitch and volume, which further emphasizes their angry mood. Additionally, the pace of speech is quick, suggesting impatience and frustration. Furthermore, the speaker's choice of words ('very late', 'disturb the people upstairs') implies a confrontational attitude towards the situation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F029.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain explicit indicators of excitement such as crying, laughter, or vocal changes that are traditionally associated with excitement. However, there is an element of surprise or astonishment in the speaker's voice, which could be interpreted as excitement under certain contexts."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_2_F001.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent emotional expressions like crying or laughter. There's no particular change in pace or volume; it's steady and consistent. The speech is delivered without any noticeable pauses or hesitations, suggesting a calm and composed delivery. Emphasis is evenly distributed throughout the sentence, contributing to the overall neutral mood. Stress and tension are minimal, indicating a calm and relaxed emotional state. Furthermore, there are no physical signs of emotions such as voice trembling, supporting the idea of a neutral emotional tone."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F010.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow speech rate, low pitch, and tears in her voice. The pauses she takes while speaking also indicate a struggle to maintain composure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro06_M002.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a weight of sadness, evident from the slow pace and low pitch of his speech. The emotional delivery includes pauses and a hesitating tone, suggesting grief or sorrow. Additionally, there is a noticeable tremble in his voice, which further amplifies the sense of sadness conveyed through his words."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F042.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits a range of excitement through their vocal expressions and body language. The following features indicate excitement:\n\n1. High-pitched and rapid speech: The speaker's voice is likely to be higher in pitch and faster in pace, reflecting their state of excitement.\n\n2. Enthusiastic tone: The overall tone of the speech suggests enthusiasm, often characterized by a heightened volume, a more animated manner of speaking, and possibly a smile or other facial expression that conveys joy or excitement.\n\n3. Use of exclamation marks: The frequent use of exclamation marks, such as 'Oh!' and 'Wow!', indicates strong feelings of excitement or astonishment.\n\n4. Emotional delivery: The way the speaker delivers the speech can also convey excitement, including through changes in pitch, volume, and speed.\n\n5. Energy levels: Physical signs like increased heart rate, heightened energy levels, and possibly even a trembling voice can indicate that the speaker is excited.\n\n6. Body language: Non-verbal cues like clapping hands, jumping up and down, or gesturing wildly can also indicate excitement.\n\n7. Laughter: If the speaker includes laughter in their speech, it can serve as an additional indicator of excitement or amusement.\n\n8. Repetition: Repeating words or phrases, especially if they are done with enthusiasm, can also suggest excitement.\n\n9. Personal context: Understanding the personal context of the situation where this speech was delivered can help us interpret the level of excitement conveyed by the speaker more accurately.\n\nOverall, the combination of these features creates a vivid picture of a speaker who is experiencing excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F004.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their repeated sighing, indicating irritation or annoyance about the situation being discussed. The sighs help convey a sense of weariness or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F019.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits a variety of emotional cues indicating fear. These include crying, which can be heard at multiple intervals (0.38-2.97, 4.65-6.33, 6.76-7.70), heavy breathing (2.97-4.64), a trembling voice (7.69-10.00), and a high-pitched voice (7.70-10.00). Furthermore, there's a noticeable pause between the start of her speech and the first instance of crying (0.00-0.38), suggesting initial hesitation or fear before the emotional release. The overall modulation of her voice, including changes in pitch and volume, also contributes to the perception of fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M001.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and quickened pace, indicating they might be upset or agitated about the situation being discussed. The fact that the speaker's voice trembles slightly further emphasizes this emotion. Additionally, there are instances of them sighing, which often indicates a sense of weariness, exasperation, or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro06_M013.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker expresses frustration through a consistent increase in volume and a hurried pace towards the end of the speech, indicating an escalation of annoyance or anger. There's also a noticeable wobble in his voice, possibly due to stress or emotional turmoil, which aligns with feelings of frustration. Additionally, the repetition of the phrase 'it just gets worse' reinforces this emotion by highlighting a persistent negative situation that contributes to the speaker's increasing frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F016.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and emotional delivery. The use of exclamation marks indicates an increase in volume and intensity, often associated with distress or sorrow. Additionally, there's a noticeable hesitation before the speech, suggesting contemplation or sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F004.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy, strained voice, indicating they might be upset or agitated. The emotional tone suggests a sense of anger or annoyance, which aligns with feelings of frustration. Additionally, there's a noticeable increase in the pitch and volume of the voice, further emphasizing the heightened emotional state of the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M002.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no discernible changes in pitch or volume. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor. The consistent rhythm and enunciation further support the perception of a neutral emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M040.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of the speaker's happiness:\n\n1. Laughter: The speaker's laughter indicates amusement or joy.\n2. Changes in tone: There are moments when the speaker's tone lightens up, suggesting they are pleased or happy.\n3. Speech rate: The speaker's slightly fast-paced speech can be an indicator of excitement or happiness.\n4. Pauses: The occasional pause might indicate thoughtfulness or happiness, as it allows for moments of reflection.\n5. Emphasis: When the speaker emphasizes certain words, it often suggests happiness or positivity.\n\nOverall, these auditory cues suggest that the speaker is experiencing happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F038.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through a series of vocal expressions such as loud and aggressive speaking, rapid speech rate, and a forceful delivery. The heightened pitch and volume indicate strong feelings of anger. Additionally, there are instances of interrupted speech and raised eyebrows, further emphasizing the speaker's angry mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M020.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some energetic gestures. The modulation in pitch and volume indicates heightened emotions. There might be instances of pauses or hesitations that suggest anticipation or eagerness. Additionally, if there are any crying sounds or laughter, it could further support the argument that the speaker is excited."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F020.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their interrupted speech pattern, rushed pace, and emotional tone. There's an evident sense of urgency and agitation in the way they speak, which aligns with feelings of frustration. Additionally, the speaker's voice may tremble slightly, indicating a heightened emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F014.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any discernible changes in pitch or intensity. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor. The consistent rhythm and enunciation suggest minimal stress or emotional fluctuations."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M017.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their labored breathing, sighing, and emotional tone, indicating they are struggling or overwhelmed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M016.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some hand gestures or physical movements. The intonation likely rises, reflecting heightened anticipation or enthusiasm. There may be occasional hesitations or pauses that add to the sense of eagerness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M025.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The speaker's voice may sound shaky or unsure, reflecting their state of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M039.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. The 'Umm' sound indicates hesitation or uncertainty before the surprising statement. Additionally, there's a short silence following the statement, which further emphasizes the element of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M001.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocalizations like sighs or shouts. The energetic delivery and modulation in pitch and volume suggest excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M009.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and upbeat tone, speaking louder and with more energy than usual. There's a noticeable smile in their voice, indicated by a soft and warm timbre. The pace of speech is also faster, reflecting a sense of eagerness or enthusiasm. Additionally, the brief pauses between phrases suggest a natural flow of excitement rather than forced speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F015.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some hand gestures or facial expressions. The modulation in pitch and volume indicates heightened emotions. There might also be a brief hesitation before the speech, reflecting contemplation or anticipation of what's to come."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F019.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an irritated or angry mood. There's also a noticeable pause before she speaks, suggesting irritation or frustration. Additionally, her use of words like 'why can't' and questioning the actions of others contributes to the overall sense of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M022.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch, faster pace, and possibly some vocal disruptions like sighs or sniffles. The context of the question 'you want to get married again?' may also evoke a sense of urgency or agitation in their voice, contributing to the overall feeling of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_F012.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch and possibly a faster speaking rate, indicating irritation or agitation. Additionally, there may be instances of pauses or hesitations, suggesting indecision or annoyance. The presence of crying or sobbing also indicates strong feelings of distress or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M008.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, relaxed pace, and laughter heard towards the end of the speech. The light-hearted manner in which the speaker mentions climbing and taking pictures suggests a joyful experience. Additionally, there's a subtle hint of amusement or happiness when mentioning about 'climbing it' and wanting someone else to do the same."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M042.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's expression of anger is through an intense tone, loud voicing, and a rapid speaking rate. There are also instances of pauses and repeated words which emphasize their feelings. The overall loudness and forceful manner of speaking convey the anger effectively."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M035.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, with noticeable pauses between phrases. There's also a mention of an upcoming event (going to school), which contributes to the overall sense of eagerness and anticipation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M015.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone with noticeable sadness in the eyes and possibly in the vocal delivery. There might be instances of pauses or hesitations, indicating distress or uncertainty about the situation. The emotional delivery could be slow-paced, reflecting a more subdued or melancholic demeanor. Additionally, there may be instances where the speaker's voice trembles, further emphasizing their sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F025.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate without any noticeable variations. There are no signs of laughter or crying, and the tone remains calm and composed throughout the speech. The stress on the words is minimal, indicating a lack of strong emotional expression. Voice trembling or other physical signs of distress are also absent, supporting the perception of a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F038.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong emotion such as crying or laughter; the tone is consistent throughout the speech. The pausing indicates normal speech pattern without any rush. The emphasis on 'scholarships' suggests a straightforward delivery without any particular emotional emphasis. Overall, the audio reflects a calm and neutral demeanor of the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M012.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely sounds tense and possibly irritated. There may be a raised pitch and quicker pace to their speech, reflecting an increase in urgency or agitation. Additionally, any emotional cues such as sighs or huffs could indicate feelings of annoyance or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M023.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits several key emotional indicators that suggest excitement:\n\n1. Increased speech rate: The speaker talks faster, indicating a heightened level of energy or excitement.\n\n2.高地音调和语调： The speaker's pitch goes up and down rapidly, reflecting an excited or passionate state.\n\n3.强调和重音： There are moments where the speaker emphasizes certain words, suggesting they are important or exciting points.\n\n4.眼跳： The speaker has occasional instances of eye blinking rapidly (which can be perceived as 'rolling eyes'), often associated with excitement or being emotionally charged.\n\n5.鼻息声： The speaker takes short, rapid breaths, possibly due to excitement or anxiety.\n\n6.笑声： Although brief, there is a moment of laughter from the speaker, adding to the overall sense of excitement.\n\n7.颤音： There are slight instances of voice trembling, especially during the higher-pitched sections of the speech, which usually indicate excitement or nervousness.\n\nOverall, these elements combined create a picture of an excited or passionate speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried manner of speaking, indicating they are eager or agitated. The rushed delivery and loud speaking volume suggest irritation or annoyance. Additionally, there's a noticeable pause before the speaker continues, which might indicate them trying to maintain composure amidst their frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M024.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be considered as one of anger, especially when they mention 'Peter Burdon' with a sense of disdain or hatred. There might also be a noticeable increase in the pitch and volume of their voice, indicating heightened emotions during the speech. Additionally, the use of forceful language and possibly interrupted speech patterns (such as stuttering or hesitations) further support the inference of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M035.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy tone, sighing, and emotional distress indicated by crying, all contributing to a sense of weariness and disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M002.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a quickened pace of speech, indicating a moment of unexpected revelation or startling information. There may also be a temporary increase in vocal intensity and possible hesitations or stuttering, further emphasizing the element of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M048.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement:\n\n1. Changes in pitch and volume: The speaker's voice may fluctuate, rising or falling in pitch, indicating excitement.\n2. Increased heart rate: If the audio includes a sound of a heartbeat, it might be faster than normal, reflecting an excited state.\n3. Exaggerated intonations: The speaker may speak louder or with more force, using a higher pitch, which can convey excitement.\n4. Prolonged pauses: Short hesitation before speaking can indicate excitement or anticipation.\n5. Smiling or laughing: If the audio includes any sounds of smiling or laughing, these could suggest excitement.\n6. Emotional vocal expressions: Sighs, sighs, or other vocal expressions that convey strong emotions can also indicate excitement.\n\nHowever, without visual or non-verbal cues from the speaker, it's challenging to confirm these assumptions with certainty."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F001.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a quicker pace of speech, indicating a sudden realization or unexpected information. There's also a noticeable hesitation before speaking, which further emphasizes the element of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F010.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery indicates she is struggling to hold back tears while speaking."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M018.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone can be perceived as irritated and hurried, possibly indicating frustration. There's also a noticeable increase in the pace of speech towards the end, suggesting a heightened state of annoyance or exasperation. Additionally, the speaker's voice may tremble slightly, further amplifying the sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F002.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Crying or sobbing: The presence of crying indicates strong emotions, often associated with distress or fear.\n2. Changes in tone: The speaker's voice may fluctuate, rising or falling rapidly, reflecting anxiety or panic.\n3. Speech rate: A faster speech rate can suggest nervousness or fear.\n4. Pauses: Long, hesitation-filled pauses may imply uncertainty or fear about what to say next.\n5. Emphasis and stress: Stronger emphasis on certain words or phrases suggests areas of concern or fear.\n6. Voice trembling: If the voice trembles, it is an obvious sign of fear or anxiety.\n7. Other vocal expressions: Shaking head, stuttering, or other vocal tics may also indicate fear.\n\nConsidering these elements, the overall emotional state of the speaker seems to be one of fear or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M009.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or volume. There are no discernible emotional cues such as crying or laughter, and the pace and rhythm of the speech suggest a calm and composed demeanor. The consistent breath control indicates stability and emotional balance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M036.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable emphasis on certain words, suggesting strong feelings. Additionally, there may be a temporary change in pitch or a 'crack' in the voice, which are typical indicators of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F019.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. The intonation likely rises, indicating a sudden realization or astonishment. There may also be a temporary pause before continuing, which further emphasizes the element of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F010.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, evident from the loud and emphatic speech, along with a rapid speech rate and signs of agitation such as yelling or shouting. The heightened pitch and volume indicate anger or frustration. Additionally, there's a noticeable pause before the speaker starts speaking again, which might suggest a moment of consideration or an attempt to regulate their emotions. Furthermore, the presence of crying or sobbing sounds conveys a deep sense of distress or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F013.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, without any noticeable changes in pitch or volume. There are no signs of laughter, crying, or other emotional displays. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F039.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating strong disapproval or frustration. The loud and emphatic manner in which the speaker says 'Oh, you're not going like this!' suggests an intense emotional state of anger or agitation. Additionally, there's a noticeable increase in the pitch and volume of the speech, which further amplifies the sense of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_M014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely sounds irritated or annoyed. There may be instances of them raising their voice or speeding up their speech, indicating an increase in frustration. Additionally, the presence of crying sounds suggests a heightened emotional state, further amplifying the sense of frustration. The use of expletives and the phrase 'you can't just take' implies an intolerance towards unfair treatment, which is often associated with feelings of annoyance and frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M038.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues indicative of fear:\n\n1. Changes in pitch and volume: The speaker's voice may fluctuate, possibly indicating anxiety or fear.\n2. Speed variations: The pace at which the speaker speaks can be quick, reflecting a sense of urgency or distress.\n3. Tense vocal cords: There might be a strain or tightness in the vocal cords, contributing to a fearful mood.\n4. Crying or sobbing: These are clear indicators of distress or fear.\n5. Pounding heart: A racing heartbeat is a common physical reaction to fear or anxiety.\n\nThese elements combined suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F034.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating strong disapproval or frustration. The loud and emphatic manner in which the speaker says 'Well do try to control yourself, darling!' suggests irritation or agitation. Additionally, there's a noticeable increase in the pitch and volume, which are common indicators of anger. Furthermore, the use of the word 'darling' in an angry context might imply a sense of provocation or sarcasm."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_M032.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and louder volume of speech. There's also a noticeable emphasis on certain words, indicating strong feelings. The use of exclamation marks and the phrase 'it's giving me goosebumps' further emphasizes the excitement. Additionally, the physical reaction of standing up from the chair suggests an intense level of excitement or anticipation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F020.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their forceful and rapid speech, which includes elements like shouting and raised volume. There's also a noticeable lack of control over emotions, indicated by crying and loud speaking. The pace and intensity of the speech suggest a heightened emotional state. Furthermore, the emphasis on certain words ('They stink!') and the harsh delivery indicate anger. Additionally, the voice trembling could be another indicator of anger or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M006.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an increased speech rate, louder volume, and a more animated tone. There are also instances of emphatic pauses and repeated words, which further emphasize the speaker's excitement. The heightened pitch and quicker pace of speech convey a sense of eagerness or anticipation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M036.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, upbeat pace, and a smiling or laughing expression, as indicated by the description of their voice. There's also an absence of any negative emotions such as sadness or anger, and a noticeable elevation in pitch suggesting elation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M000.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker expresses frustration through their loud and emphatic speech, indicating increased vocal intensity and a quicker pace, often associated with anger or annoyance. There's also a noticeable wail at the end, reflecting an emotional outburst. The use of profanity and the phrase 'freaking human being' underscores the speaker’s frustration and irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M001.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and a sense of urgency in their voice. There's also a noticeable lack of pauses and a more direct, forceful delivery which contributes to the overall excited mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F020.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. The pace and volume of her speech remain consistent, indicating a calm and composed delivery. There are no discernible crying sounds or laughter, and the speaker's voice does not tremble or show any other signs of strong emotions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M040.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds strained or tense, reflecting their emotional state. There may be instances of sighing or hesitation, suggesting irritation or annoyance. Additionally, any emotional responses like crying or laughter could indicate heightened feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F016.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker's voice carries a sense of fear, evident from the trembling in her voice and the rapid pace of her speech. There are frequent pauses and instances of stress, indicating she may be struggling to maintain composure or articulate her thoughts clearly. Additionally, there is an underlying tone of distress, which can be heard through her crying and sighing."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_F006.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone with noticeable pauses and a slower speech rate. There might be instances of throat clearing or sniffing, indicating sadness or distress. The emotional delivery seems subdued and possibly melancholic, reflecting the overall feeling of sadness conveyed through the vocal expressions and body language."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M029.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain explicit indicators of excitement such as crying or laughter. However, there is an increase in the pitch and volume of the speaker's voice towards the end, which may suggest excitement or agitation. Additionally, the pace of speech slightly accelerates towards the end, further contributing to the perception of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M038.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch and possibly a faster speaking rate, indicating irritation or agitation. Additionally, there may be instances of pauses or hesitations, suggesting difficulty or annoyance. The emotional intensity could also lead to voice trembling or changes in volume, further amplifying feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F026.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they may be upset or agitated about the topic being discussed. The use of filler words like 'umm' suggests hesitancy or difficulty in expressing their thoughts. Additionally, there's a noticeable increase in pace and volume towards the end, which further amplifies the sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F000.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear. Firstly, there is an audible sniffle, which often indicates distress or sadness. Additionally, the voice may sound shaky or unsure, reflecting a lack of confidence or fearfulness. The pace of speech can be slow, suggesting hesitation or anxiety. There might also be a noticeable pause before the speech starts, contributing to an overall sense of unease. Furthermore, the speaker's choice of words and phrasing may convey a feeling of being overwhelmed or fearful. Finally, the emotional tone of the voice carries a sense of apprehension or concern."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M015.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some hand movements indicating increased energy and enthusiasm. The use of 'tonnes' and 'too' also conveys a sense of eagerness and overture towards meeting many people."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M027.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and rapid tone, along with a strained voice and crying sound, indicating strong emotions of anger or annoyance. The emotional turmoil is further emphasized by the sudden deepening of voice, suggesting an intense emotional response."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F017.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume without any noticeable changes in tone or pitch. There are no signs of laughter, crying, or other strong emotional expressions. The speech is delivered in a straightforward manner with no particular emphasis or stress on certain words. Additionally, there are no instances of voice trembling or other physical indicators of emotions during the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F005.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone is elevated with a sense of urgency and frustration, indicated by the modulation of pitch and speed. There are also noticeable pauses and changes in volume, suggesting an emotional struggle. Additionally, the presence of crying or sobbing indicates a high level of distress and frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong feelings such as happiness or sadness; the tone is even and calm throughout the speech. Crying sounds or laughter are not present, indicating a lack of intense emotions. The speech does not have any significant pauses or hesitations, contributing to the overall neutral demeanor. Emphasis and stress are minimal, further supporting the perception of a neutral mood. There is no noticeable trembling in the voice, maintaining composure and balance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M040.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable emphasis on certain words, suggesting irritation or frustration. Additionally, the shortness and quick pace of the speech further convey a sense of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their labored breathing, sighing, and emotional distress indicated by crying. The repetition of 'I don't know' suggests uncertainty and frustration. Additionally, the tone likely reflects a sense of weariness or emotional exhaustion due to ongoing challenges or failures."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M022.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong feelings such as happiness or sadness; the tone is consistent throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M027.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and quickened pace, indicating they might be upset or agitated about the situation being discussed. The fact that they are crying also suggests strong emotions, often associated with frustration or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M065.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio reflects happiness through various vocal expressions like a light-hearted tone, quicker pace, and an upbeat manner of speaking. There's also a noticeable absence of negative emotions such as sadness or anger, indicating overall positivity."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M008.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement:\n\n1. High-pitched and speeding up speech: The speaker's voice reflects a sense of urgency and excitement, indicated by the rapid increase in pitch and speed.\n\n2. Crying sound: There is an audible crying sound from the speaker, which often conveys intense emotions such as joy or excitement.\n\n3. Laughter: Following the crying sound, there is a burst of laughter, which amplifies the excitement conveyed by the speaker.\n\n4. Emphasis and stress: The repetition of 'Oh my gosh' with emphasis and stress indicates strong feelings of excitement or astonishment.\n\n5. Voice trembling: Although subtle, the trembling in the speaker's voice suggests a level of inner excitement or nervousness.\n\n6. Changes in tone: The shift from a normal speaking pace to a faster and higher-pitched tone contributes to the overall excitement conveyed.\n\n7. Pauses: The brief pause between the statements also adds to the dramatic effect, emphasizing the excitement.\n\nOverall, these features combined create a vivid picture of a speaker experiencing intense excitement or shock."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F043.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion can be observed through their steady pace and regular rhythm in speaking, lacking any prominent changes in pitch or intensity. There are no signs of laughter, crying, or other emotional displays, indicating a calm and composed demeanor. The voice remains steady throughout, suggesting an unwavering neutral attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried manner of speaking, indicating they are likely rushed or eager to get away. The use of sighs and the repetition of phrases like 'no problem' suggest a sense of annoyance or exasperation. Additionally, there's a mention of an incident involving 'the Gurney's running tonight,' which could be causing further distress or urgency."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro01_M023.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their raised and forceful tone, indicating anger or annoyance. The emphatic statement 'There's nothing I can do for you' suggests helplessness and frustration. Additionally, the emotional state is further conveyed by the presence of crying sounds, which indicates an intense feeling of distress or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F015.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits a range of emotional cues that suggest frustration:\n\n1. Crying: The presence of tears indicates strong emotions, often associated with frustration or distress.\n2. Laughter: Although not continuous, the laughter heard intermittently conveys a sense of sarcasm or exasperation.\n3. Changes in tone: The sharp increase in pitch and volume towards the end of the speech segment suggests an escalation of frustration or anger.\n4. Speech rate: The quickened pace of speech indicates a heightened state of urgency or frustration.\n5. Pauses: The frequent pauses between words or phrases imply contemplation or frustration in constructing thoughts.\n6. Emphasis and stress: The speaker places heavy emphasis on certain words, indicating feelings of annoyance or frustration.\n7. Voice trembling: A quivering voice can be an indicator of inner turmoil and emotional distress, commonly experienced when frustrated.\n8. Other emotional characteristics: The speaker's sigh at the beginning of the speech also reflects feelings of frustration or weariness.\n\nOverall, these audio features combine to create a picture of a person experiencing intense frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F030.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through a tense voice, rapid speech rate, and a sharp increase in volume towards the end of the sentence 'Your father thinks Larry's coming back.' This indicates that the speaker may be upset or agitated about the situation mentioned."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F022.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through a raised and forceful tone, indicating anger or annoyance. The use of expletives such as '滚吧，都滚吧' (Go away, all of you) strongly conveys a sense of irritation. Additionally, there's a noticeable increase in speaking rate and a lack of pauses, which further emphasizes the speaker's emotional state of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M006.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and quickened pace, indicating irritation or annoyance. The heightened pitch and volume suggest an increase in emotional intensity. Additionally, there may be instances of pauses or hesitations, which often accompany feelings of frustration when trying to articulate one's thoughts clearly under pressure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M047.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone likely reflects frustration through a heightened pitch and possibly a quicker pace, indicating irritation or anger. Additionally, there may be instances of pauses or hesitations, which could suggest indecision or annoyance. The emotional state of the speaker might also be conveyed through vocalizations like sighs or raised voices."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M010.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues indicative of fear. Firstly, there is an instance of crying, which is a common response to distress or fear. Additionally, the speaker's voice may sound shaky or uncertain, reflecting a lack of confidence or fearfulness. There might be hesitations or pauses in speech, suggesting anxiety or fear. Furthermore, the speaker's choice of words and phrasing may convey a sense of urgency or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M022.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some hand gestures or physical movements. The heightened pitch and quicker pace indicate excitement. Additionally, there might be a sense of urgency or agitation in the speaker's voice, further supporting the idea of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M026.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they are eager or agitated. The use of filler words like 'umm' and 'ah' suggests a lack of preparation or difficulty in finding the right words. Additionally, there's a noticeable increase in pace and volume towards the end, which can be associated with anger or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M032.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and sniffle, indicating they are trying to hold back tears. The fact that the speaker's voice trembles slightly further supports this emotion. Additionally, there is a noticeable pause before the speaker says 'Okay,' which may suggest contemplation or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M008.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of happiness such as:\n\n1. The speaker's tone is likely to be upbeat and cheerful.\n2. There may be a noticeable increase in pitch and speed while speaking, which often indicates elation or excitement.\n3. The use of exclamation marks like 'Oh' and 'Wow' suggests strong positive emotions.\n4. Any laughter heard during the speech would also suggest amusement or joy.\n5. If the speaker's voice trembles slightly, it could indicate they are emotionally moved by happiness.\n\nThese elements combined give an impression that the speaker is expressing great happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M037.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional indicators of fear including:\n\n1. Crying or sobbing: There is an instance of crying or sobbing at (3.28,4.07).\n2. Changes in tone: The speaker's tone likely fluctuates due to fear, potentially becoming shaky or unsure.\n3. Speech rate: The speaker may speak more quickly under conditions of distress, indicating a higher pitch and faster pace.\n4. Pauses: Fear can cause hesitation, leading to longer pauses between words or phrases.\n5. Emphasis and stress: The speaker might place greater emphasis on certain words, reflecting increased stress or anxiety.\n6. Voice trembling: Fear can lead to physical reactions like shaking, which could be audible in the speaker's voice.\n\nThese elements combined suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F029.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense and possibly irritated. Additionally, there may be instances of pauses or hesitation, suggesting they are struggling to maintain composure or think clearly under stress. The emotional state of the speaker can also be inferred through vocal indicators like voice trembling or changes in pitch and volume."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F011.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and emotional delivery. The sniffle indicates a possible emotional response, and the sigh emphasizes a sense of sorrow or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M064.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of crying or laughter. However, there is a notable increase in pitch and a lighter, possibly happier tone towards the end of the sentence 'Well you bear my children.' This suggests a positive emotion, potentially happiness, although without additional context it cannot be confirmed with certainty."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M018.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate without any noticeable changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays. The voice remains clear and steady throughout the speech, indicating a calm and composed demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F015.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits a variety of fear-related vocal indicators including a high-pitched voice, crying, and a trembling voice which suggests a state of distress or fear. The quick pace and hesitations ('Umm') in her speech also indicate anxiety or fear. Additionally, the content of the speech implies a sense of uncertainty or confusion about the situation being discussed, contributing further to the perception of fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M043.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their harsh and commanding tone, loud voicing, and rapid speech rate. The emphasis on certain words and the overall forceful manner of speaking convey feelings of anger. Additionally, there's a noticeable trembling in the voice, suggesting a high level of emotional arousal."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M013.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocal颤抖. The heightened pitch and quicker pace indicate excitement. Additionally, there might be intermittent pauses or hesitations that further emphasize the excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M042.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo. However, the tone is assertive and possibly hopeful, considering the content of what's being said ('I'm going to make you a fortune!'). The confidence in the speaker's voice might suggest optimism or determination."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M020.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery is filled with sorrow, and there's a noticeable hesitation and struggle in the speech, indicating deep distress. Additionally, the sigh at the end intensifies the sense of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M036.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and volume of speech, lacking any prominent emotional cues such as crying or laughter. The consistent rhythm and non-inflected tone indicate a calm and composed demeanor. There are no noticeable pauses or hesitations, suggesting an attempt to maintain composure. However, the repetition of 'there's nothing else I can do' might carry a hint of resignation or helplessness, which could be subtly perceived as a subtle variation from neutrality."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F006.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues indicative of fear:\n\n1. Crying: There is an instance of crying, which is often associated with distress or fear.\n2. Changes in tone: The speaker's voice may fluctuate, possibly indicating anxiety or fear.\n3. Speech rate: A faster speech rate can be an indicator of fear or nervousness.\n4. Pauses: Short hesitation or pause before speaking can suggest fear or uncertainty.\n5. Emphasis: Stronger emphasis on certain words might suggest that they are being emphasized due to fear or anxiety.\n6. Stress: Tense vocal cords and changes in pitch can indicate stress, which is often related to fear.\n7. Voice trembling: If the voice trembles while speaking, it can be a clear sign of fear.\n\nThese features combined suggest that the speaker is experiencing fear during the speech segment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F022.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating anger or annoyance. The emotional delivery includes a raised volume and possibly harsher speech patterns, reflecting an inability to control emotions. There may also be instances of pauses or hesitations, suggesting irritation or difficulty in communicating effectively. Additionally, crying or sobbing sounds indicate a deep level of distress or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F028.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no discernible crying sounds, laughter, or other emotional indicators. The voice remains steady and firm throughout the speech, indicating a lack of emotional波动."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F000.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and tears in her voice. The emotional delivery indicates she is upset or sorrowful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M006.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their slow pace and low tone, indicating a lack of energy and possibly disappointment or grief. The deliberate slowing down of speech suggests a struggle to convey emotions, often a sign of distress. Additionally, there's a noticeable tremble in the voice, which further amplifies the sense of sadness and vulnerability."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F008.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's expression of frustration can be noted through their heavy sigh at the beginning of the speech (0.32-1.67), indicating a sense of weariness or annoyance. Additionally, the use of the phrase 'I'm getting very bored with this conversation' explicitly conveys the speaker's frustration. The repetition of the word 'very' emphasizes their boredom and irritation. Furthermore, the speaker's decision to pause before continuing could suggest contemplation or an inability to continue without taking a moment to process their feelings."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F022.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an increase in pitch and volume. There's also a noticeable pause before speaking, suggesting irritation or anger. Additionally, the expression 'I'm paying so that I can take myself and my belongings with me' conveys a sense of frustration or resentment towards the situation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F001.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their emotional tone, which likely includes a heightened pitch and quicker pace, possibly indicating anger or annoyance. Additionally, there may be instances of vocalizations like sighs or exclamations that further convey feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F001.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio does not contain any explicit indicators of excitement such as loud volume or rapid speech; however, the tone of the voice may suggest a sense of urgency or隐瞒, which could be related to excitement or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M025.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a weight of sadness, evident from the slow pace and low pitch of his speech. There is a noticeable tremble in his voice, indicating inner turmoil and distress. The emotional delivery is slow and heavy, reflecting a profound sense of sorrow or grief. Additionally, there are pauses in his speech, suggesting contemplation or deep emotion. The choice of words like 'lost' implies a sense of loss or failure, further amplifying the overall sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F015.mp4",
    "ground_truth": "Other",
    "audio_clue": "The speaker's expression of distress is evident through their crying and sighing, indicating a sense of sorrow or discomfort. The use of a sigh helps convey a feeling of resignation or disappointment about the situation they are discussing. Additionally, the hesitations ('Umm') and the soft pace of speech ('I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I-I"
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M027.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and possibly harsh tone, indicating they are upset or agitated about the situation being discussed. The emotional delivery suggests a sense of desperation or frustration, as if they can't understand why someone wouldn't want something from them."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_M005.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or body movements. The heightened pitch and quicker pace indicate excitement. Additionally, there might be a sudden change in tone or a momentary pause before continuing, contributing to the overall sense of eagerness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F030.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains a segment where the speaker's voice rises in pitch and intensity, suggesting excitement or agitation. There is also a noticeable speeding up of the speech rate during this part, which further amplifies the sense of excitement. Additionally, the emotional tone seems to be charged with energy and possibly a sense of urgency or enthusiasm. These elements combined indicate that the speaker is likely feeling excited."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M008.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low tone, and heavy breathing, indicating a struggle to contain emotions. There's also a noticeable hesitation before speaking, which suggests contemplation or distress. Additionally, the use of the phrase 'it's paying off now' in a sad mood might imply a sense of disappointment or irony about the situation, further enhancing the melancholic atmosphere."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F021.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a faster speaking rate. There's also a noticeable hesitation before the word 'Really?' which indicates uncertainty or surprise. Additionally, the speaker's voice may sound tense or shaky, contributing to the overall sense of surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F032.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery suggests she is experiencing feelings of sorrow or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense and irritated. Additionally, there may be instances of pauses or hesitation, suggesting they are struggling to maintain composure or find the right words. The emotional state could also manifest through crying sounds or changes in pitch, indicating a heightened sense of distress or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F009.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, indicating a calm and composed demeanor. The voice remains steady throughout, suggesting an unwavering neutral attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F023.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an irritated or angry mood. There's also a noticeable increase in pace and possibly a choppy or hurried manner of speaking, suggesting frustration. Additionally, the emotional intensity may be conveyed through loud voicing and a lack of control over breathing, which are typical physical reactions to anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M037.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, and the voice remains steady and calm throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_M007.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they are upset or agitated about someone waiting. The prolonged silence before speaking also emphasizes their emotional state. Additionally, there's a noticeable tremble in their voice, further amplifying the sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M004.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There is also a noticeable pause before the speaker begins speaking, suggesting contemplation or distress. Furthermore, the choice of words like 'left him alone' implies a sense of isolation or giving up on someone, contributing to the overall feeling of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F014.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might also be instances of sighing, laughing (which could be interpreted as an excited reaction), or heightened intonations indicating excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F021.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery includes pauses and a sniffle, indicating distress or sorrow. The tone is also slightly wavered, contributing to the overall feeling of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M008.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or volume. There are no discernible emotional cues such as crying, laughter, or sighs. The pace of speech is steady, indicating a calm and composed demeanor. Additionally, there are no instances of stammering or hesitations, supporting the idea of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F029.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The neutral emotion in the audio is indicated by a steady pace and normal volume of speech without any noticeable changes in tone or pitch. There are no signs of laughter, crying, or other strong emotional expressions. The pauses between words are regular, indicating a calm and composed delivery. Voice trembling or other physical indicators of distress are also absent, supporting the idea of a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M031.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid increase in pitch at the beginning of the speech, indicated by a sharp rise in voice intensity. This change in pitch is coupled with a brief pause before continuing, suggesting hesitation or anticipation. There's also a noticeable speeding up of the speech rate towards the end, which contributes to the overall sense of eagerness. Additionally, the use of louder volume and possibly a more animated tone further emphasizes the speaker's excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M001.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their labored breathing, harsh tone, and emotional agitation, as indicated by the crying sound and loud speaking. The rushed speech and hesitations ('Umm') suggest a sense of urgency and distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M015.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent changes in pitch or intensity. There are no discernible emotional cues such as crying or laughter, and the pace and rhythm of the speech suggest a calm and composed delivery. The consistent non-emotional delivery indicates a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M027.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker's tone can be described as tense and irritated, indicating feelings of frustration. There is also a noticeable increase in the pitch and volume, suggesting an escalation of emotions. Additionally, the presence of crying or sobbing sounds suggests a deep level of distress and frustration. The pauses between words indicate a struggle to maintain composure, while the emphasis on certain syllables highlights key points of frustration. Furthermore, the trembling voice adds a layer of emotional vulnerability and intensity to the speech, reinforcing the overall sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M023.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery includes pauses and a sniffle, indicating distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M016.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they feel strongly about the subject being discussed. The presence of crying or sobbing sounds suggests an emotional depth of distress or anger. Pauses in speech and changes in pitch further emphasize the intensity of their feelings."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F031.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits several key emotional indicators that suggest excitement:\n\n1. High-pitched and rapid speech rate: The speaker's voice reflects a quickened pace and an elevated pitch, typical of excitement or agitation.\n\n2. Emphasis and stress on certain words: There is a noticeable emphasis and stress placed on certain words, indicating that the speaker is highlighting these points with enthusiasm or eagerness.\n\n3. Crying sounds: Although not continuous, the presence of crying sounds indicates that the speaker may be experiencing strong emotions, including excitement.\n\n4. Laughter: Brief laughter can be heard intermittently, which further supports the idea that the speaker is in a state of excitement or amusement.\n\n5. Voice trembling: Although subtle, there is a noticeable tremble in the speaker's voice, which often accompany excitement or anxiety.\n\n6. Changes in tone: The speaker's tone fluctuates between periods of intensity and calmness, reflecting the ebb and flow of excitement.\n\n7. Pauses: Sudden pauses in the speech can indicate moments of heightened emotion or anticipation.\n\nOverall, these features combined create a picture of an excited speaker who is likely engaged or passionate about the subject being discussed."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M018.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slow pace, low pitch, and emotional delivery. The sigh indicates a sense of weariness or disappointment. Additionally, the context of the conversation might suggest an ongoing struggle or burden that contributes to the overall sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F031.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker's happiness can be observed through their light-hearted tone, cheerful manner of speaking, and the use of laughter. There's an evident smile in their voice, indicated by the softness and warmth of their speech. Additionally, the quick pace and upbeat rhythm of their speech further emphasize their happy mood. There are no signs of distress or sadness; rather, the energy and joy in their voice are unmistakable indicators of happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F016.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong positive or negative emotions like happiness or sadness. The tone is even and there are no particular inflections or强调 (emphasized) words. The pauses are subtle and do not add any dramatic effect to the statement. Overall, the speech maintains a calm and balanced demeanor throughout."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F003.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There is also a noticeable pause before the speech begins, suggesting contemplation or distress. Additionally, the speaker's voice may tremble slightly, further supporting the emotion of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M038.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tone, which likely sounds irritated or angry. There may be instances of loud speaking or shouting, indicating an increase in volume, possibly reflecting frustration. Additionally, there might be a rapid pace or hurried speech, suggesting urgency or agitation. Furthermore, the speaker's voice may tremble or fluctuate, which can be an indicator of emotional distress or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M001.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, evident from the loud and emphatic speech, the rapid pace, and the strained or tense quality of voice. There are also instances of stuttering, where the words are not clearly articulated, indicating heightened emotional arousal. Furthermore, there's an audible display of frustration through sighs and crying, contributing to the overall sense of desperation and annoyance."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_M006.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits several key indicators of anger in their speech. Firstly, there is a noticeable increase in the pitch and volume of their voice, which often indicates anger or frustration. Additionally, the pace at which they speak suggests a sense of urgency or agitation. There are also instances where the speaker may pause momentarily before continuing, which can further emphasize feelings of anger or annoyance. Furthermore, the tone of the speaker seems to be harsh and commanding, which are typical characteristics of angry communication. Lastly, the presence of crying sounds mixed with the anger in their voice could indicate a heightened emotional state. Overall, these auditory cues suggest that the speaker is expressing anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M016.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, evident from the loud and emphatic speech, heavy breathing, and crying out which indicates a high level of distress or anger. The rushed speech, louder volume, and faster pace further support this interpretation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M021.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some energetic gestures or vocal modulation. The context where they mention not going to school but still being productive might suggest a sense of eagerness and enthusiasm about their work commitments."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_F011.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and tears in her voice. The emotional delivery indicates she is upset or sorrowful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M022.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, with no discernible changes in pitch or speech rate. There are no crying sounds or laughter; however, there is a slight hesitation indicated by a pause before continuing the sentence ('but you know'). The overall delivery is calm and composed, lacking any signs of strong emotion or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F045.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement. Firstly, there is a rapid speech rate, which usually suggests excitement or anxiety. Additionally, there are instances of loud and emphatic speaking, which further emphasizes the excitement. Furthermore, the presence of crying sounds indicates an intense emotional state, often linked with excitement or happiness under certain circumstances. Lastly, the short pauses between words suggest a hurried or excited manner of speaking. Overall, these auditory cues combine to suggest that the speaker is feeling excited."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_F025.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, evident from their harsh, loud, and fast-paced speech. The heightened pitch and volume indicate strong feelings of anger or annoyance. Moreover, there's a noticeable trembling in the voice, suggesting a high level of emotional arousal. The use of forceful language and the repetition of certain words like 'Oh no' emphasize the severity of the emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_M013.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and rapid tone, indicating they might be upset or agitated about the situation. The heightened pitch and quicker pace of speech suggest an emotional state of distress or annoyance. Additionally, there may be instances of stuttering or hesitations, further emphasizing the speaker's frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_M032.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some energetic gestures. The modulation in pitch and volume indicates heightened emotions. Additionally, there might be instances of laughter or sniffles that further emphasize the speaker's excited state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. The pace and volume of her voice remain consistent, indicating a calm and composed demeanor. There are no discernible emotional cues such as laughter or crying sounds, further supporting the idea of a neutral emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M000.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and a sense of urgency in their voice. There's also a noticeable lack of pauses which indicates a smooth flow of energy and enthusiasm. The intonation rises, suggesting a heightened emotional state. Additionally, the use of exclamation marks at the end of sentences often indicates strong feelings such as excitement or surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M023.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits a range of emotional cues that suggest frustration:\n\n1. Crying: The presence of tears indicates an emotional state of distress or frustration.\n2. Laughter: The intermittent laughter indicates a release of tension or frustration.\n3. Changes in tone: There's a shift from a normal speaking pace to a faster, more animated tone, reflecting an escalation of frustration.\n4. Speech rate: The quickened pace of speech suggests a heightened level of frustration or agitation.\n5. Pauses: The frequent pauses indicate a struggle to maintain composure or express emotions effectively.\n6. Emphasis and stress: The heightened pitch and volume of speech suggest increased emphasis and stress, typical of someone experiencing frustration.\n7. Voice trembling: The trembling voice indicates emotional arousal and distress, often associated with frustration.\n8. Body language: Non-verbal cues like fidgeting or hugging oneself can also indicate frustration.\n\nOverall, these auditory indicators combine to convey a sense of frustration in the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M001.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio exhibits several key emotional indicators that suggest excitement. Firstly, there's an increase in the pitch and volume of the voice, indicating heightened energy and passion. Additionally, there are instances of laughter, which is often a sign of joy or excitement. Furthermore, the brief silence followed by an emphatic statement 'I'm getting married' suggests a moment of revelation or personal triumph that could be driving the excitement. Lastly, the presence of crying sounds might indicate a mix of emotions, but it coexists with other exciting elements like laughter and the declaration of an engagement, thus amplifying the overall sense of excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F033.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their struggle to maintain composure while speaking, as indicated by hesitation, stuttering, and a voice that cracks towards the end. The sigh indicates a sense of weariness or emotional exhaustion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F044.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, without any noticeable changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, indicating a calm and composed demeanor. The voice remains steady, without trembling or other indicators of stress or discomfort. Overall, the lack of distinct emotional cues contributes to the perception of a neutral mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F014.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy tone, slow speech rate, and increased vocal intensity towards the end of the sentence ('What's the story?'). There's also a noticeable pause before they begin speaking, indicating contemplation or annoyance. The emotional delivery suggests a sense of irritation or exasperation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_M041.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone with noticeable pauses and a slower speech rate. There might be instances of sighing or a soft voice, indicative of sadness. Additionally, the emotional delivery might include underlining words or a softening of the voice at key points, further emphasizing the sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_M000.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits intense distress or fear, as indicated by the description of a loud, deep voice that may be associated with crying or shouting. The use of an exclamation like 'Oh God!' suggests a sudden or intense feeling of shock, anxiety, or desperation. Additionally, the presence of a man speaking in English with a fearful mood further supports this interpretation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F004.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a variety of vocal and non-verbal cues. The sigh indicates a sense of weariness or disappointment. Additionally, there's a hint of crying or sobbing, which is a clear indicator of sadness. The slow pace and low tone of the speech convey a sense of grief or sorrow. Furthermore, the emphasis on certain words suggests a deep emotional burden. There's also a noticeable pause before the speaker continues, which might indicate contemplation or distress. Lastly, the voice trembling adds a layer of vulnerability and emotional distress to the speaker's delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F013.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Changes in tone: The speaker's voice likely has a higher pitch and faster pace, which are typical responses to fear or anxiety.\n\n2. Crying sounds: The presence of crying suggests intense distress or fear.\n\n3. Voice trembling: A trembling voice indicates that the speaker is experiencing physical reactions associated with fear, such as shaking.\n\n4. Pauses: Short, hesitation-filled pauses may indicate that the speaker is struggling to find words or is uncertain about how to express their feelings.\n\n5. Emphasis and stress: The speaker may place extra emphasis on certain words, indicating that they are trying to convey urgency or distress.\n\n6. Laughter: Although not typically expected in situations of fear, the presence of laughter could indicate a coping mechanism or disbelief at the situation.\n\n7. Speech rate: An increased speech rate can be another indicator of fear, as it often reflects a heightened state of alertness or panic.\n\nOverall, these combined emotional features paint a picture of a speaker who is likely feeling intense fear or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M002.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several key emotional indicators of fear:\n\n1. Changes in tone: The speaker's voice likely has a higher pitch and faster pace, which are typical physical reactions to fear.\n\n2. Voice trembling: A trembling voice can be heard during the speech, which is often associated with fear or anxiety.\n\n3. Pauses: There may be instances where the speaker hesitates or takes longer than usual to speak, which could indicate they are experiencing fear or uncertainty.\n\n4. Emphasis and stress: The speaker places a greater deal of emphasis on certain words, suggesting they are worried about a particular aspect of the situation.\n\n5. Crying sounds: Although not explicitly mentioned, crying is sometimes a physical response to fear and distress, so it's possible that the speaker experiences some level of emotional turmoil.\n\n6. Laughter: Laughter, especially if it's forced or unnatural, can be an indicator of fear or nervousness.\n\n7. Stress on specific syllables: The speaker seems to place extra stress on certain syllables, indicating worry or anxiety about those subjects.\n\nOverall, these audio features suggest that the speaker is experiencing fear or anxiety."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M023.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in their voice. There's an absence of any signs of distress or sadness, indicating a positive emotional state. The brief and frequent pauses suggest comfort and ease. Additionally, the lightness in the voice and the slightly颤音 manner of speaking further support the perception of the speaker being happy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F005.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy tone, slow speech rate, and a strained delivery. The sigh indicates a sense of weariness or emotional exhaustion. There's also a noticeable pause before the speaker begins speaking, which might suggest hesitation or difficulty in expressing their feelings."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F002.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent emotional expressions like crying or laughter. The pace and volume of her speech are consistent, indicating no significant changes in mood or intensity. There are no discernible pauses or hesitations, suggesting smooth and composed delivery. The articulation is clear, with no noticeable struggles or errors, supporting the idea of a neutral emotional state. Overall, these auditory cues suggest that the speaker's emotions remain even and unvaried throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, evident from the loud and emphatic speech delivery. The heightened pitch and urgency in the voice suggest a state of agitation and annoyance. Additionally, there's a noticeable increase in the pace and intensity of speech, contributing to an overall sense of exasperation. Furthermore, the emotional turmoil might be indicated by instances of vocal disruptions like sighs or hiccups, which could indicate feelings of stress or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M032.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slow pace, low tone, and the use of filler words like 'I don't know.' The sigh indicates a sense of weariness or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M019.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through an elevated pitch, faster speaking rate, and a forceful tone, indicating irritation or annoyance. There's also a noticeable increase in volume and possibly some shouting elements. Additionally, the emotional context suggests that the speaker might be upset about someone else's actions affecting them negatively, leading to their angry outburst."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_M022.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud tone, indicating they may be upset or agitated about the topic being discussed. The presence of crying sounds suggests an emotional depth that aligns with feelings of frustration. Additionally, the hurried pace and possibly choppy breathing further emphasize the speaker's emotional state of distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M008.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The context where these vocal expressions occur can suggest the nature of the surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M025.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, with no discernible changes in pitch or intensity. There are no audible cues of laughter or crying, and the pace of speech is steady, indicating a level head."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_M022.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through their voice trembling, slow pace, low tone, and emotional delivery. The sigh indicates a sense of weariness or disappointment."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_1_F031.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate without any noticeable variations or emotional cues. There are no signs of laughter, crying, or other strong emotional responses. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M032.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any discernible emotional fluctuations or cues. There are no instances of crying, laughter, or other emotional expressions. The pace and volume of the speech remain consistent, indicating a lack of emotional intensity."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M007.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speaking rate without any noticeable variations or emotional cues. There are no signs of laughter, crying, or other strong emotional responses. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F029.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, lacking any prominent emotional expressions like crying or laughter. The pace and volume of her speech remain consistent, indicating no significant changes in mood or intensity. There are no discernible pauses or hesitations, suggesting she speaks with ease and composure. The articulation is clear, without any noticeable strain on the vocal cords, supporting the idea of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F050.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement:\n\n1. Changes in pitch and volume: The speaker's voice rises towards the end, indicating an increase in excitement or passion.\n\n2. Speed and rhythm: The pace at which the speaker speaks suggests a sense of urgency or eagerness, contributing to the excitement conveyed.\n\n3. Emphasis and stress: There is a noticeable emphasis on certain words, suggesting that they are key points being emphasized, which can often be associated with excitement or enthusiasm.\n\n4. Energy and dynamics: The overall energy level and dynamic variations within the speech suggest excitement, especially considering the modulation of voice and the presence of vocal expressions like sighs or laughter.\n\n5. Emotional cues: Sighs and laughter are common emotional cues in excitement or joy, which are present in the audio.\n\n6. Cultural context: Certain cultural expressions or idioms used by the speaker may also convey excitement or enthusiasm, providing additional context for understanding their emotional state.\n\n7. Body language: While not directly observed, it's possible that the speaker's body language aligns with excitement, such as through gestures or posture.\n\nConsidering these various aspects, it can be inferred that the speaker is indeed feeling excited."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_M001.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, and the voice remains steady without any noticeable trembling or stress. The pauses are subtle and contribute to the overall calm and neutral demeanor of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_M019.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their tense and quickened pace, indicating they might be upset or agitated about having to take off their shoes. The sigh at the beginning of the sentence conveys a sense of weariness or exasperation. Additionally, there's a noticeable increase in the pitch and volume of their voice towards the end, which can further emphasize feelings of annoyance or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F015.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in pitch or loudness. There are no discernible crying sounds, laughter, or emotional vocalizations. The tone remains calm and composed throughout the speech. Pauses are occasional and brief, indicating a straightforward delivery with no particular emotional emphasis. Stress and tension are minimal, contributing to the overall neutral mood of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F039.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in pitch or volume. There are no discernible crying sounds, laughter, or emotional trembles in the voice. The tone remains calm and composed throughout the speech. Pauses are occasional and not excessive. Emphasis is evenly distributed, indicating a balanced and neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_2_F034.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, relaxed pace, and positive vocabulary. Phrases like 'does look really beautiful' convey a sense of pleasure and contentment. Additionally, the soft laughter heard towards the end further emphasizes the happy mood. There are no signs of distress or sadness in the vocal expressions or delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F017.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits signs of anger through their raised tone, fast pace, and loud volume. There's also an instance of yelling which indicates strong emotions. Additionally, the speaker's face might be flushed or tense, further supporting the presence of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F026.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent changes in pitch or intensity. There are no discernible crying sounds or laughter; however, there is a slight hesitation in the beginning of the speech, indicated by a pause before the first word 'Well.' This hesitation may suggest a moment of contemplation or uncertainty before starting the speech. The pace of speech is steady, indicating a regular flow of words without any rapid speech or hesitations. Emphasis is evenly distributed across the sentence, with no particular word or phrase standing out for added stress or importance. Lastly, there's no audible tremble in the voice, supporting the idea of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F011.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio exhibits several key features that indicate excitement. Firstly, there's an increase in the pitch and volume of the speech, suggesting heightened energy or enthusiasm. Additionally, the use of exclamation marks like 'Oh my God' indicates strong feelings. Furthermore, the quick pace and possibly rushed manner of speaking further support the idea of excitement. Lastly, the presence of crying sounds mixed with laughter suggests a complex blend of emotions, with excitement being one of them."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro07_F021.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an upbeat and energetic tone, speeding up their speech and having a light, airy quality to their voice. There's also a noticeable smile in their voice, indicating happiness and enthusiasm. The casual manner of speaking with possible laughter suggests they are relaxed and joyful."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F020.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone with noticeable sadness in her eyes and possibly in her voice pitch and volume. There might be hesitations or pauses indicating sorrow or grief. The emotional delivery seems subdued, perhaps lacking energy or enthusiasm, reflecting a sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M024.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be described as elevated with a raised pitch, indicating anger. There is also a noticeable pause before the speaker continues speaking, which might suggest a moment of consideration or emotion. Additionally, the emphatic way the speaker says 'wouldn't it?' suggests irritation or frustration. The presence of crying sounds further amplifies this emotion, indicating that the speaker is experiencing strong feelings of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_M000.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits signs of frustration through their labored breathing, sighing, and rushed speech pattern. The emotional tone suggests a struggle or annoyance, indicating that they might be upset or agitated about the topic being discussed, particularly related to the context of getting a new ID."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F009.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement. Firstly, there is an increase in the pitch and volume of the speaker's voice, suggesting heightened emotions. Additionally, there are instances of laughter, which often indicates amusement or joy. Furthermore, the use of exclamation marks ('!\"') in the speech suggests strong feelings or reactions. Lastly, the speaker's age being '29' might indicate that the event being referred to is particularly significant or exciting for her at this stage in life."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F022.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, with a clear increase in pitch and loudness towards the end of the phrase 'That's right!' This kind of modulation in vocal expressions indicates excitement. Additionally, there's a slight hesitation before the word 'right,' which could suggest anticipation or excitement building up to the confirmation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M018.mp4",
    "ground_truth": "Other",
    "audio_clue": "The speaker exhibits a variety of emotional cues that suggest a sense of 'Other.' These include:\n\n1. Crying sounds: The presence of tears indicates distress or sorrow.\n2. Laughter: Although not prominent, the sporadic laughter suggests a lighter, possibly ironic or sarcastic tone to the speech.\n3. Changes in tone: The speaker's tone fluctuates between a flat, resigned manner and moments of lighter, possibly sarcastic delivery.\n4. Speech rate: There is an increase in speech rate towards the end of the sentence, which may indicate frustration or urgency.\n5. Pauses: The frequent pauses suggest hesitancy or difficulty in articulating their thoughts.\n6. Emphasis and stress: The heightened pitch and emphasis on certain words ('I can't do this') indicate feelings of anxiety or insecurity.\n7. Voice trembling: A subtle tremble in the voice may suggest nervousness or fear.\n8. Lack of knowledge: The speaker explicitly admits they don't know anything about the task at hand, which aligns with an 'Other' emotional category.\n\nOverall, these features combine to create a picture of a person experiencing distress or uncertainty, aligning them with the 'Other' emotional category."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_F009.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is indicated by a steady pace and normal speech rate, without any noticeable changes in pitch or volume. There are no signs of crying, laughter, or other emotional expressions that would suggest a different mood. The lack of vocal indicators such as trembles, pauses, or changes in stress also contributes to the perception of a neutral emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M014.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating strong disapproval or frustration. The loud and aggressive manner in which the speaker speaks suggests an intense emotional state of anger. Additionally, there's a noticeable increase in pace and possibly a harsher delivery, further amplifying the sense of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M011.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion can be inferred from their steady pace and normal speaking volume without any noticeable variations or emotional cues. There are no signs of laughter, crying, or other strong emotions; the delivery is calm and composed. The stress on the words 'I think I'll have a little' does not indicate distress or excitement but rather a neutral statement of intent."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M007.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating an irritated or angry mood. There's also a noticeable pause before speaking, suggesting irritation or annoyance. The repetition of the word 'it' and the强调 on 'charged me a late fee' further support the inference of anger. Additionally, there might be a slight trembling in the voice, which could be an indicator of anger or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F000.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is indicated by a steady pace and normal speech rate without any noticeable changes in pitch or volume. There are no signs of crying, laughter, or other emotional expressions that would suggest a different mood. The lack of vocal indicators such as trembles, pauses, or changes in stress also contributes to the perception of a neutral tone."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F027.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of happiness, including a joyful tone, a smiling or laughing expression, and a light-hearted manner of speaking. The speed and rhythm of the speech suggest a sense of cheerfulness and ease. Additionally, there are no signs of distress or frustration, such as crying, sighing, or raised voices, which further support the conclusion that the speaker is happy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F056.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo; however, the tone is generally positive and reassuring, which can be perceived as a form of happiness. The choice of words like 'you won't have trouble' implies a positive outcome or resolution, contributing to an uplifting atmosphere."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M040.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any prominent signs of happiness or sadness. There are no discernible crying sounds or laughter, indicating emotional stability. The pace and rhythm of the speech are regular, without any noticeable speeding up or slowing down. Slight variations in pitch can be perceived, contributing to the subtlety of the neutral mood. Pauses are occasionally used to emphasize certain points but do not disrupt the overall neutral tone. There's no evidence of stress, voice trembling, or other emotional indicators typically associated with strong positive or negative emotions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F036.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised and forceful tone, indicating strong disapproval or frustration. The loud and emphatic manner in which the speaker says 'Don't think that way!' suggests an intense emotional state of anger or agitation. Additionally, there's a noticeable pause before the speaker continues, which might indicate they're struggling to maintain composure. Furthermore, the use of capital letters in 'Hear Me?' emphasizes the urgency and intensity of the speaker’s emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_F018.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and loud manner of speaking, indicating they are emotionally charged. The emphasis on certain words ('I ignore') and the raised volume suggest irritation or anger. Additionally, there's a noticeable pause before the speaker continues, which might indicate they're struggling to maintain composure. Furthermore, the crying sound towards the end of the sentence (if present) could serve as an auditory cue for the listener to pick up on the speaker’s emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M007.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful tone, engaging manner of speaking, and a smiling or light-hearted demeanor while talking about enjoying rock climbing and a pool. The use of positive words like 'great' also conveys a sense of joy. There are no signs of distress or sadness in the speaker's voice."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M019.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio reflects excitement through an emphatic and rapid speech rate, along with loud and clear vocal expressions. The speaker's tone rises, indicating excitement or agitation, and there are occasional pauses that add to the dramatic effect. Additionally, the use of exclamation marks ('!!') suggests strong feelings of excitement or surprise."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M035.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral mood is reflected through a steady pace and normal speech rate, lacking any prominent emotional indicators such as crying or laughter. The tone is even and there is no particular emphasis or stress on any words. Overall, the audio conveys a calm and balanced emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F011.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy tone, loud and rapid speech, along with interrupted speech patterns like stuttering, which indicates a heightened state of annoyance or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F043.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits several key indicators of anger in their speech. Firstly, there is a noticeable increase in the pitch and volume of their voice, which often indicates anger or frustration. Additionally, the presence of crying or sobbing sounds suggests an emotional outburst, which can be a strong indicator of anger. Furthermore, the irregular pace and hesitations in the speaker's speech, such as stuttering or hesitation, also convey feelings of anger or agitation. The speaker's tense and strained voice, along with the emphasis on certain words, further support the inference of anger. Lastly, the emotional state of the speaker being described as angry in the initial information adds another layer of evidence for this emotion in their speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M020.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be described as harsh and irritated, indicating anger. There is also a noticeable increase in the pitch and volume, which further emphasizes their angry mood. Additionally, the presence of crying sounds suggests an emotional outburst, contributing to the overall sense of anger. The long pauses between words suggest irritation and frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_M005.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some energetic gestures. The use of 'really surprised' and 'really happy' indicates a positive emotional response, which contributes to the overall sense of excitement. Additionally, there might be a subtle tremble in the voice, suggesting a heightened emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M028.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a rapid and forceful speech rate, loud and aggressive tone, and a strained or tense voice. There may also be signs of irritation, such as interrupting or raising their voice during the speech. The emotional delivery includes elements like shouting or raised volume, indicating strong feelings of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M015.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or body movements. The heightened pitch and quicker pace indicate excitement. Additionally, there might be a brief hesitation before the speech starts, suggesting anticipation or excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_F016.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. There are no discernible crying sounds or laughter, indicating emotional stability. The pace and rhythm of the speech are regular, without any noticeable speeding up or slowing down. The use of filler words like 'um' suggests a casual speaking style rather than one filled with intense emotions. Additionally, there's no evidence of voice trembling or other physical signs of distress, supporting the neutral mood conveyed through the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_M025.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through their forceful and rapid speech, which includes elements like shouting and raising their voice. There's also a noticeable emphasis on certain words, indicating strong feelings. Furthermore, the speaker's face might be stern or harsh, reflecting their angry mood. Crying or sobbing sounds could also suggest an emotional outburst related to anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F054.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain explicit indicators of happiness such as laughter or upbeat tempo but does carry a sense of compassion, possibly through the tone and delivery. The use of 'yeah' in a gentle and soft voice may convey a feeling of empathy or understanding towards someone's situation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M042.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a heightened pitch, faster pace, and a forceful delivery. There's also an indication of irritation and annoyance conveyed through their vocal expressions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F003.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, indicating a calm and composed demeanor. The voice remains steady throughout, suggesting an absence of tension or stress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F039.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of happiness, including:\n\n1. Laughter: The speaker's laughter indicates amusement or joy.\n2. Changes in tone: There are moments when the speaker's tone lightens up, suggesting a positive turn of events or emotions.\n3. Speech rate: The speaker's slightly quickened pace can be perceived as an indication of excitement or happiness.\n4. Pauses: The occasional pause followed by laughter suggests the speaker may be taking a moment to enjoy the situation before continuing.\n5. Emphasis: The speaker places a greater emphasis on certain words, which might indicate they are particularly pleased about something.\n\nOverall, these auditory cues suggest that the speaker is experiencing happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_M004.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There is also a noticeable pause before he speaks, suggesting contemplation or distress. The choice of words like 'no' and 'best' further emphasizes his sad mood. Additionally, there might be a softening of the voice at the end of 'left him alone,' which could indicate a sense of resignation or sorrow."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M003.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow pace, and low pitch. The emotional delivery includes pauses and a sniffle, indicating distress or sorrow."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F034.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, as indicated by their quickened pace and possibly shaky voice, suggesting irritation or annoyance. The emotional delivery also includes a sniffle, indicating they might be upset or emotional."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F036.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no discernible changes in pitch or volume. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor. The consistent rhythm and enunciation further support the idea of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M020.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be described as raised and forceful, indicating anger. There is also a noticeable emphasis on certain words, suggesting heightened emotional agitation. Furthermore, the speaker's voice may tremble slightly, which is a common physical reaction to anger or frustration. Additionally, there might be a temporary pause before the speaker continues, which could further emphasize their angry mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M009.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits intense frustration, particularly through their harsh, loud tone, crying out, and aggressive manner of speaking. The repetition of 'shouldn't' and the questioning of whether they should contradict him or not suggests a deep-seated anger or frustration. Additionally, the emotional state is further indicated by the presence of crying sounds and a change in pitch and volume, which are often associated with strong emotions like anger or distress."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro06_F006.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There's a noticeable slowing down of speech rate, which often occurs when one is sad. Additionally, there might be a hint of voice trembling or changes in pitch and volume, which further support the inference of sadness. The presence of any emotional cues like sighing or sniffing could also indicate sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M015.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent emotional cues such as crying or laughter. The consistent tone and lack of vocal strain suggest a calm and composed demeanor. There might be subtle variations in pitch due to natural speech patterns, but overall, the emotion remains neutral."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M013.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent emotional expressions like crying or laughter. The pace and volume of the speech are steady, indicating no significant changes in mood. There are no discernible pauses or hesitations, suggesting smooth and composed delivery. The stress distribution is consistent, further supporting a neutral emotional state. Overall, the audio does not convey any particular emotion except for a neutral attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F010.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow pace of speech, low pitch, and soft vocal expressions. The emotional delivery is heavy, indicating a profound sense of sorrow or disappointment. There's also an audible sniffle, suggesting that the speaker is trying to hold back tears while speaking."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M006.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits fear through their trembling voice, rapid pace, and emotional distress indicated by crying. The emotional delivery suggests anxiety or fearfulness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_M032.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their emotional state, including crying and a change in tone from happy to sad. The sigh indicates a sense of resignation or disappointment. Additionally, the use of phrases like 'but if that can't happen here' and 'I'll just have to get out' suggests an inability to tolerate the current situation, further amplifying feelings of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_F015.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal volume. There are no signs of strong positive or negative emotions like happiness or sadness. The tone is even and there are no noticeable pauses or hesitations. Crying sounds are absent, indicating that the speaker maintains composure. Laughter is not present either, supporting the idea of a neutral mood. Emphasis and stress are subtle, if present at all, contributing to the overall calmness of the speech. Voice trembling is not detected, further supporting the perception of a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_2_M041.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone can be described as elevated with a raised pitch and a quicker pace, indicating anger. There is also a noticeable emphasis on certain words, suggesting heightened agitation. Additionally, there may be some vocal disruptions like pauses or hesitations, which further support the inference of anger. Furthermore, the speaker's voice might tremble slightly, which is a common physical reaction to anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M004.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker expresses anger through a raised and forceful tone, indicating strong disapproval or frustration towards automated systems. The use of expletives such as 'ridiculous' and 'never have ever worked for me' emphasizes their dissatisfaction. Additionally, there's a noticeable increase in pace and intensity in speech, suggesting irritation or agitation. Crying sounds might suggest an emotional overwhelmed state, and the overall loud and aggressive manner of speaking further amplifies this emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_2_M001.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of joy or sorrow. There are no discernible crying sounds or laughter; however, there's a subtle undercurrent of sadness in the speaker's voice, which might indicate an underlying emotional state. The pace of speech is slow and steady, suggesting contemplation rather than intense emotions. Pauses are occasionally used to emphasize certain points, indicating careful consideration of the words being spoken. There's also a slight wobble in the voice towards the end, which could further imply a touch of melancholy."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M009.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an elevated pitch, quicker pace, and louder volume. There's also a noticeable increase inbreathlessness which often indicates excitement or anxiety. The use of exclamation marks ('!!') further emphasizes the speaker's excitement. Additionally, the mention of wanting to be in a different place and the desire for novelty suggest excitement about new beginnings or experiences."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M018.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and volume of speech, lack of heavy breathing or sighing, and a consistent tone without any noticeable emotional fluctuations."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1b_M022.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable emphasis on certain words, suggesting strong feelings. Moreover, the speaker's voice may tremble slightly, which is often a physical manifestation of anger or frustration. The speed of speech can also be rapid and choppy, reflecting a heightened emotional state. Crying sounds or sobbing, although not present, could further support the idea of anger. Laughter, if present, would likely be harsh and uncontrolled, another indication of anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F031.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a slow speech rate, low pitch, and tears in her eyes while speaking, indicating a emotional struggle."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_M063.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of happiness such as laughter or upbeat tempo; however, the soft and gentle voice of the speaker may convey a sense of calmness and positivity. The choice of words like 'polite' suggests a gentle or restrained demeanor, which can be perceived as a form of politeness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M005.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker's tone is raised and forceful, indicating anger. There is also a noticeable pause before speaking, which emphasizes the emotion. The loud and emphatic manner of speaking suggests irritation or anger."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_F002.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent signs of happiness or sadness. The regular pace and volume indicate a calm and composed delivery. There are no discernible pauses or hesitations, suggesting the speaker has thought through their response carefully. The lack of emotional cues suggests a neutral emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M015.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate without any noticeable changes in pitch or volume. There are no signs of laughter, crying, or other emotional displays. The tone remains calm and composed throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_F015.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a variety of vocal and non-verbal cues. The sigh indicates a sense of weariness or disappointment. Additionally, there's a noticeable slowing down of the speech rate, which often accompany feelings of sadness. The voice may also sound strained or tense, and there might be instances of pauses or hesitations, further emphasizing the sad mood. Crying sounds could also be present, contributing to the overall sorrowful atmosphere of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M023.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, lacking any prominent changes in pitch or intensity. There are no discernible emotional cues such as crying or laughter, and the pace and rhythm of the speech suggest a calm and composed delivery. The steady pace and normal volume indicate that the speaker is trying to convey a neutral emotion."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_F011.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no discernible changes in pitch or volume. There are no emotional cues such as crying or laughter, and the pace of speech is steady, indicating a calm and composed demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_M022.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, using standard English language without any noticeable regional variations or accents. There are no discernible emotional cues such as crying, laughter, or changes in pitch; the speech rate is steady, with equal emphasis on each syllable, indicating a calm and composed demeanor. The occasional sighs suggest a hint of frustration or weariness but do not disrupt the overall neutral attitude."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F022.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the audio, lacking any prominent emotional expressions like crying or laughter. The pace and volume of her speech remain consistent, indicating no significant changes in mood or intensity. There are no discernible pauses or hesitations, suggesting she speaks with a straightforward and composed manner. Her voice does not tremble, supporting the idea of a neutral emotional state. Overall, these auditory cues suggest that the speaker's emotion remains calm and steady."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_F025.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slower than normal speech rate, and crying or sobbing sounds. There's also an emphasis on the importance of being included, suggesting feelings of exclusion or loneliness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F015.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some hand movements or gestures. The heightened pitch and quicker pace indicate excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_M002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker expresses frustration through their tone, which likely sounds irritated and possibly elevated. The repetition of typing numbers and the phrase 'it won't listen to me' suggests an interaction with a device or system that isn’t responding as expected, contributing to the speaker’s frustration. Additionally, the sigh at the end might indicate a sense of resignation or exasperation about the situation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro02_F002.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and hurried tone, indicating they are eager or agitated. The fact that they raise their voice and emphasize certain words suggests an inability to control their emotions. There's also a noticeable tremble in their voice, which often accompany feelings of anger or frustration. Additionally, the presence of crying sounds indicates a strong emotional response, further amplifying the sense of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M017.mp4",
    "ground_truth": "Other",
    "audio_clue": "The speaker's crying indicates a strong emotional response, often linked to sadness or distress. The mention of something being 'six months old' might suggest a reference to a recent event or situation related to the speaker's feelings. Also, the quick pace and possible hesitation ('Umm') in the speech further imply a sense of urgency or emotional turmoil."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script02_1_F009.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tense and rapid tone, along with loud and emphatic speech. The emotional distress is evident from the crying sound and the harsh manner of speaking, which together create a sense of anger or frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_1_M023.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some vocal颤抖. The heightened pitch and quicker pace indicate excitement. Additionally, there might be intermittent pauses or hesitations that further emphasize the passionate and animated nature of the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_M045.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio reflects excitement through an elevated pitch, faster speaking rate, emphatic pronunciation, and possibly some vocal颤抖. There's also a noticeable pause before the speaker begins talking, which could indicate anticipation or excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_2_F017.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, indicating emotional distress or sorrow. The prolonged pause before speaking suggests hesitation or difficulty in expressing emotions. Additionally, there's a noticeable increase in the pitch and volume of her voice towards the end, which often indicates an escalation of emotions during times of distress. Furthermore, the tears falling from her eyes are a clear physical manifestation of sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_F003.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their tone, which likely sounds tense and possibly irritated. There may be instances of pauses or hesitation, suggesting uncertainty or annoyance about the situation being discussed. Additionally, there might be a change in pitch or volume, contributing to an overall sense of unease or irritation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro04_M041.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, lacking any prominent signs of joy or distress. The pace and volume of the speech remain consistent, indicating a level head. There are no discernible pauses or hesitations, suggesting smooth and composed delivery. Emphasis is evenly distributed, further supporting the neutral mood conveyed by the speaker."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro06_M007.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's voice carries a sad tone, indicating sadness. There might be a hint of melancholy in the voice, possibly due to a slow speech rate, low pitch, and softening of the voice at the end of words as suggested by the description 'His voice softened'. Additionally, the sigh 'Umm' at the beginning of the speech further emphasizes the sad mood."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_3_M007.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their heavy tone, sighing, and the use of filler words like 'umm.' There's also a noticeable increase in speaking rate and a shift from calm to agitated speech patterns towards the end, indicating rising frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F001.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the speech, with no discernible changes in pitch or emotional intensity. There are no audible cues of laughter or crying; the pace of speech is steady, indicating a calm and composed delivery. Slight hesitations, such as stuttering 'um', suggest a natural, rather than emotionally charged, delivery."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_3_M018.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker's surprised mood can be inferred from their sudden and strong emotional response, indicated by the following auditory cues:\n\n1. High-pitched and rapid breathing: This suggests a sense of urgency and surprise.\n2. Wide eyes: The wide-eyed expression often conveys astonishment or surprise.\n3. Sudden speech: The speaker starts talking abruptly, indicating they were not expecting the event that led to their surprise.\n4. Changes in pitch and volume: There may be an abrupt shift in the speaker's tone, possibly upwards, reflecting an element of shock or surprise.\n\nThese auditory indicators combined give us a vivid picture of the speaker's surprised emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro02_M003.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional cues that indicate they are feeling fear:\n\n1. Crying: The presence of tears in the speaker's voice suggests distress or fear.\n2. Changes in tone: There is a noticeable drop in pitch and an increase in volume, which are often associated with fear or anxiety.\n3. Speech rate: The speaker speaks quickly, which can be a sign of fear or panic.\n4. Pauses: The frequent pauses in the speech indicate uncertainty or fear.\n5. Emphasis: The speaker places a strong emphasis on certain words, suggesting they are worried about a particular aspect of the situation.\n6. Stress: The speaker's voice carries a tense and strained quality, further supporting the idea of fear.\n\nOverall, these emotional indicators combined suggest that the speaker is experiencing fear."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F001.mp4",
    "ground_truth": "Surprise",
    "audio_clue": "The speaker exhibits surprise through an abrupt change in pitch and a rushed speech pattern. There's also an instance of crying, which indicates strong emotions. The context where these vocal expressions occur suggests a surprising event or revelation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro08_M017.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the interaction, with no noticeable changes in pitch or speech rate. There are no emotional cues such as crying, laughter, or voice trembling. Slight pauses may occur during the spoken content, but they do not contribute to an emotional expression. The overall demeanor is neutral, without any prominent emotional features."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro05_F031.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits anger through a raised volume, faster pace, and a forceful delivery. There's also an indication of irritation and displeasure conveyed through their vocal expressions."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro03_F033.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The speaker exhibits happiness through a cheerful and upbeat tone, with a relaxed pace and a smile in her voice. There's an absence of any signs of distress or sadness, indicating a positive emotional state. The brief and light-hearted manner of speaking contributes to this perception of happiness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script03_2_M023.mp4",
    "ground_truth": "Anger",
    "audio_clue": "The speaker exhibits intense anger through a rapid and forceful speech pace, loud and aggressive vocal expressions, and a strained or tense voice. There's also an emphasis on certain words indicating frustration and irritation. Additionally, there may be signs of physical tension, such as shaking hands or body language that conveys aggression."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_impro08_F012.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no discernible crying sounds, laughter, or pauses; however, the speaker does tend to emphasize certain words, indicating a subtle stress on the spoken content. Additionally, there's a slight wobble in the voice towards the end of the phrase 'before my due date,' which hints at a subtle emotional state."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M007.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio reflects excitement through an emphatic and rapid speech rate, loud and clear vocal expressions, and possibly some energetic hand gestures or body movements. The heightened pitch and quicker pace indicate excitement."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_1_F024.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion is reflected through a steady pace and normal speech rate, lacking any prominent changes in tone or pitch. There are no signs of laughter, crying, or other emotional displays, and the voice remains steady and unwavering throughout the speech."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro05_F006.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker maintains a neutral tone throughout the conversation, with no noticeable changes in pitch or speech rate. There are no emotional cues such as crying or laughter, and the voice is steady without any signs of trembling or stress. The pace of speech is regular, indicating a calm and composed demeanor."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_M003.mp4",
    "ground_truth": "Fear",
    "audio_clue": "The speaker exhibits several emotional indicators of fear. Firstly, there is an audible sniffle, suggesting they might be trying to hold back tears. Additionally, the tone of voice can be perceived as shaky or unsure, indicating anxiety. There's also a noticeable increase in the speaking rate, possibly reflecting a sense of urgency or distress. Furthermore, the use of filler words like 'whatever' suggests a lack of confidence or concern about the situation being discussed. Lastly, the context of the statement implies an attempt to reassure someone else ('darling'), which could be a cover for the speaker’s own fears."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script01_1_F023.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their distressed tone, heavy sighs, and crying, indicating a sense of helplessness or desperation. The emotional turmoil is further emphasized by the use of filler words like 'I don't know,' suggesting a lack of control or understanding in the situation. Additionally, the pauses and hesitations in speech ('郑英伟，你先冷静一下好吗？') imply a struggle to maintain composure."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script03_1_F002.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The audio contains several indicators of excitement:\n\n1. High-pitched and fast-paced speech: The speaker's voice reflects a quick and energetic delivery, typical for moments of excitement.\n2. Emphasis and loudness: There is an increase in the volume and intensity of the speech, suggesting heightened emotions.\n3. Crying sound: A brief cry indicates strong feelings, often associated with excitement or joy.\n4. Laughter: The presence of laughter suggests amusement or happiness, contributing to the overall excitement.\n5. Changes in tone: The speaker's tone fluctuates between periods of intensity and relaxation, reflecting the dynamic nature of excitement.\n\nThese elements combined create a vivid picture of an excited individual, possibly reacting to good news or a thrilling event."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_script01_2_F007.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker's sadness is indicated through a heavy, emotional tone, slower speech rate, and a soft voice. There may be instances of pauses or hesitations, and the emotional delivery might show signs of distress or sorrow."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F032.mp4",
    "ground_truth": "Sadness",
    "audio_clue": "The speaker exhibits sadness through a heavy, strained voice, slow speech rate, and crying sounds. The sigh indicates a sense of weariness or emotional burden. The reference to someone else wishing they were with someone else suggests feelings of loneliness or longing. Additionally, the description of a person who did not take things hard and knew how to enjoy herself contrasts sharply with the speaker's current state, further emphasizing their sadness."
  },
  {
    "video_id": "IEMOCAP/video/Ses05F_script02_2_F015.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio contains several indicators of the speaker's happiness, including a joyful tone, a smiling voice, a light-hearted manner of speaking, and a quick pace. There are no signs of sadness or distress; rather, the overall mood conveyed through vocal expressions and articulation is one of elation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro04_F002.mp4",
    "ground_truth": "Neutral",
    "audio_clue": "The speaker's neutral emotion can be observed through their steady pace and normal volume. There are no signs of strong positive or negative emotions like happiness or sadness. The tone is even and there are no noticeable pauses or hesitations. However, the presence of a sniffle indicates a subtle emotional response, possibly indicating vulnerability or sensitivity under the neutral exterior."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro01_F020.mp4",
    "ground_truth": "Frustration",
    "audio_clue": "The speaker exhibits frustration through their raised tone, sighing, and the use of the phrase 'I'm not leaving.' The emphasis on 'not leaving' suggests a strong determination and frustration about not being able to leave. Additionally, there's a hint of distress in the voice, possibly due to the emotion of frustration."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro03_F023.mp4",
    "ground_truth": "Excited",
    "audio_clue": "The speaker exhibits excitement through an emphatic and rapid speech rate, loud and clear voice, and possibly some vocal flourishes or modulation in pitch. There might be a noticeable increase in volume and a quicker pace, reflecting heightened enthusiasm or anticipation."
  },
  {
    "video_id": "IEMOCAP/video/Ses05M_impro07_F008.mp4",
    "ground_truth": "Happiness",
    "audio_clue": "The audio does not contain any explicit indicators of crying or laughter. However, the use of the word 'awesome' typically conveys a positive sentiment, suggesting that the speaker may be experiencing happiness or contentment. Additionally, the relatively upbeat tone and normal pace of speech further support this interpretation."
  }
]