vmixtts/generate-accent.py

231 lines
9.9 KiB
Python

import os
import re
import subprocess
from llama_index import (
LLMPredictor,
GPTVectorStoreIndex,
SimpleDirectoryReader,
PromptHelper
)
from llama_index.node_parser import SimpleNodeParser
from llama_index.prompts import PromptTemplate
from llama_index import LLMPredictor
import ollama
# Function to compile phoneme file
def compile_phoneme_file(filename):
result = subprocess.run(["make", filename], capture_output=True, text=True)
if result.returncode != 0:
raise Exception(result.stderr)
# Function to generate a safe filename
def generate_safe_filename(language, scenario):
safe_scenario = re.sub(r'[^a-zA-Z0-9_]', '_', scenario)
safe_filename = f"{language}_{safe_scenario}.txt"
return safe_filename
# Function to identify the base language using LLM
def identify_base_language(scenario, llm_predictor):
prompt = f"Based on the scenario '{scenario}', identify the base language that the speaker is using. Provide the corresponding language code, ensuring that the language exists in ISO language codes."
response = llm_predictor.predict(prompt)
return response # Extract LLM's language code
# Function to get phoneme files for base and secondary languages
def get_base_and_secondary_phoneme_files(base_language_code, secondary_language_code):
base_phoneme_file = f"ph_{base_language_code}.txt"
secondary_phoneme_file = f"ph_{secondary_language_code}.txt"
try:
with open(base_phoneme_file, 'r') as base_file:
base_content = base_file.read()
with open(secondary_phoneme_file, 'r') as sec_file:
secondary_content = sec_file.read()
return base_content, secondary_content
except FileNotFoundError as e:
raise Exception(f"Phoneme file not found: {e}")
# Function to generate phoneme modifications with LLM
def generate_phoneme_modifications_with_llm(scenario, base_file, secondary_file, llm_predictor):
prompt = f"Based on the scenario '{scenario}', please create modifications to the base phoneme file using characteristics derived from the secondary language phoneme file. Make changes only in the base phoneme file, such as adjusting vowels or consonants, to reflect the influence of the secondary language."
response = llm_predictor.predict(prompt)
return response # Extract LLM's generated phoneme file
# Function to compile phoneme file with all rules
def compile_with_all_rules(rules, base_file):
combined_rules = "\n".join(rules)
temp_file = "temp_combined_phoneme_file.txt"
with open(temp_file, 'w') as file:
file.write(base_file + "\n" + combined_rules)
try:
compile_phoneme_file(temp_file)
print("All rules compiled successfully.")
return True # Compilation success
except Exception as e:
print(f"Compilation failed with error: {e}")
return False, str(e) # Compilation failed, return error log
# Binary search to identify faulty rules
def binary_search_for_faulty_rules(rules, base_file):
valid_rules = []
faulty_rules = []
def compile_batch(rules_batch):
combined_rules = "\n".join(rules_batch)
temp_file = "temp_batch_phoneme_file.txt"
with open(temp_file, 'w') as file:
file.write(base_file + "\n" + combined_rules)
try:
compile_phoneme_file(temp_file)
return True # Batch compiled successfully
except Exception:
return False # Compilation failed
def binary_search(rules_batch):
if len(rules_batch) == 1:
if compile_batch(rules_batch):
valid_rules.append(rules_batch[0])
else:
faulty_rules.append(rules_batch[0])
else:
mid = len(rules_batch) // 2
left_batch = rules_batch[:mid]
right_batch = rules_batch[mid:]
if not compile_batch(left_batch):
binary_search(left_batch)
else:
valid_rules.extend(left_batch)
if not compile_batch(right_batch):
binary_search(right_batch)
else:
valid_rules.extend(right_batch)
binary_search(rules)
return valid_rules, faulty_rules
# Function to attempt to fix faulty rules using LLM
def attempt_to_fix_rules(faulty_rules, base_file, llm_predictor):
fixed_rules = []
for rule in faulty_rules:
prompt = f"The following phoneme rule '{rule}' failed to compile. Please suggest a corrected version based on the base phoneme file."
response = llm_predictor.predict(prompt)
fixed_rule = response.strip()
if fixed_rule:
fixed_rules.append(fixed_rule)
return fixed_rules
# Function to save and compile the final phoneme file
def save_and_compile_final_phoneme_file(base_file, valid_rules, fixed_rules):
all_rules = valid_rules + fixed_rules
final_file_content = base_file + "\n" + "\n".join(all_rules)
output_filename = "final_phoneme_file.txt"
with open(output_filename, 'w') as file:
file.write(final_file_content)
compile_final_phoneme_file(output_filename)
# Simulated final compile function
def compile_final_phoneme_file(filename):
try:
compile_phoneme_file(filename)
print(f"Successfully compiled {filename}")
except Exception as e:
print(f"Error during compilation: {e}")
with open("compilation_errors.log", 'a') as log_file:
log_file.write(f"Compilation error for {filename}: {str(e)}\n")
def parse_phoneme_file(file_path):
with open(file_path, 'r') as f:
content = f.read()
phonemes = {}
for match in re.finditer(r'phoneme\s+(\w+)\s*//\s*(.+?)\n(.*?)endphoneme', content, re.DOTALL):
name, description, body = match.groups()
phonemes[name] = {'description': description.strip(), 'body': body.strip()}
return phonemes
def get_available_languages():
espeak_path = os.environ.get('ESPEAK_PATH')
if not espeak_path:
raise ValueError("ESPEAK_PATH environment variable not set")
phsource_dir = os.path.join(espeak_path, 'phsource')
return [f[3:] for f in os.listdir(phsource_dir) if f.startswith('ph_')]
def create_index_from_phonemes(phonemes):
documents = [f"{k}: {v['description']}\n{v['body']}" for k, v in phonemes.items()]
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)
return GPTVectorStoreIndex(nodes)
def generate_accent(scenario):
# Initialize Ollama through LLMPredictor
llm_predictor = LLMPredictor(llm=ollama.Ollama(model="llama2"))
prompt_helper = PromptHelper(max_input_size=4096, num_output=256, max_chunk_overlap=20)
available_languages = get_available_languages()
# Select languages
language_selection_prompt = PromptTemplate(
"Given the scenario: '{scenario}' and the following available languages: {languages}, "
"please select the most appropriate base language and secondary language for accent generation. "
"Respond in the format: 'Base: [language], Secondary: [language]'."
)
language_selection_query = language_selection_prompt.format(scenario=scenario, languages=', '.join(available_languages))
language_selection_response = llm_predictor.query(language_selection_query)
base_lang, secondary_lang = re.search(r'Base: (\w+), Secondary: (\w+)', language_selection_response['text']).groups()
# Parse phoneme files
espeak_path = os.environ.get('ESPEAK_PATH')
base_phonemes = parse_phoneme_file(os.path.join(espeak_path, 'phsource', f'ph_{base_lang}'))
secondary_phonemes = parse_phoneme_file(os.path.join(espeak_path, 'phsource', f'ph_{secondary_lang}'))
# Create indices
base_index = create_index_from_phonemes(base_phonemes)
secondary_index = create_index_from_phonemes(secondary_phonemes)
# Get relevant phonemes
relevant_phonemes_prompt = PromptTemplate(
"Given the phonemes for the base language ({base_lang}) and secondary language ({secondary_lang}), "
"list the phonemes from the secondary language that are most likely to influence the accent when applied to the base language. "
"Provide your response as a comma-separated list of phoneme names."
)
relevant_phonemes_query = relevant_phonemes_prompt.format(base_lang=base_lang, secondary_lang=secondary_lang)
relevant_phonemes_response = llm_predictor.query(relevant_phonemes_query)
relevant_phonemes = [p.strip() for p in relevant_phonemes_response['text'].split(',')]
# Generate accent rules
accent_rules_prompt = PromptTemplate(
"Based on the relevant phonemes identified ({relevant_phonemes}), "
"generate accent modification rules for the base language ({base_lang}). "
"Use the following format for each rule:\n\n"
"phoneme [name]\n"
" [modification instructions]\n"
"endphoneme\n\n"
"Here are the relevant secondary language phonemes for reference:\n\n"
"{secondary_phonemes}"
)
secondary_phonemes_str = "\n\n".join([f"{p}:\n{secondary_phonemes[p]['body']}" for p in relevant_phonemes])
accent_rules_query = accent_rules_prompt.format(
relevant_phonemes=', '.join(relevant_phonemes),
base_lang=base_lang,
secondary_phonemes=secondary_phonemes_str
)
accent_rules_response = llm_predictor.query(accent_rules_query)
accent_rules = accent_rules_response['text']
# Compile and validate rules (using existing functions)
rules = accent_rules.split('\n')
compile_success = compile_with_all_rules(rules, base_phonemes)
if not compile_success:
valid_rules, faulty_rules = binary_search_for_faulty_rules(rules, base_phonemes)
fixed_rules = attempt_to_fix_rules(faulty_rules, base_phonemes, llm_predictor)
save_and_compile_final_phoneme_file(base_phonemes, valid_rules, fixed_rules)
else:
save_and_compile_final_phoneme_file(base_phonemes, rules, [])
return generate_safe_filename(base_lang, scenario)