import stable_whisper

# ------------------------------
# Load model on CPU
# ------------------------------
model = stable_whisper.load_model("medium", device="cuda")

# ------------------------------
# Load your LRC lyrics as plain text (ignore timestamps)
# ------------------------------
with open("Enemies.lrc", "r", encoding="utf-8") as f:
    lyrics_hint = " ".join([line.split("]")[1] for line in f if "]" in line])

# ------------------------------
# Transcribe with word-level timestamps
# ------------------------------
result = model.transcribe(
    "Enemies.ogg",
    word_timestamps=True,
    language="en",
    # initial_prompt=lyrics_hint
)

# ------------------------------
# Function to format seconds as LRC timestamp
# ------------------------------
def format_time(t):
    minutes = int(t // 60)
    seconds = t % 60
    return f"{minutes:02d}:{seconds:05.2f}"

# ------------------------------
# Save word-level LRC file with start and end times
# ------------------------------
with open("Enemies.lrc.HR", "w", encoding="utf-8") as f:
    for segment in result.segments:
        words = segment.words
        for i, word in enumerate(words):
            start_time = word.start
            # Use next word's start as end, or segment end if last word
            end_time = words[i + 1].start if i + 1 < len(words) else word.end
            text = word.word.strip()
            if text:
                f.write(f"[{format_time(start_time)}|{format_time(end_time)}]{text}\n")