- nodes.py: Best-Practice Ranges (temperature 0.1-2.0, cfg_coef 0.5-10.0, top_k 10-500, extend_stride 5-28) - workflow_music.json: Parameter Guide als Note-Node, Default-Werte angepasst - workflow_sfx.json: SFX Parameter Guide als Note-Node hinzugefügt - main_menu.gd: Menü-Musik (intro7.mp3) mit Loop und Stop bei Spielstart - Diverse Godot-Projektdateien aktualisiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
243 lines
9.9 KiB
Python
243 lines
9.9 KiB
Python
import torch
|
|
import torchaudio
|
|
import os
|
|
import time
|
|
|
|
|
|
class MusicGenNode:
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
|
|
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
|
|
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
|
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
|
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
|
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
|
|
"top_k": ("INT", {"default": 250, "min": 10, "max": 500}),
|
|
"extend_stride": ("FLOAT", {"default": 18.0, "min": 5.0, "max": 28.0, "step": 1.0}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("AUDIO_PATH",)
|
|
RETURN_NAMES = ("audio_file",)
|
|
FUNCTION = "generate"
|
|
CATEGORY = "AudioCraft"
|
|
OUTPUT_NODE = True
|
|
|
|
def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
|
|
from audiocraft.models import MusicGen
|
|
from audiocraft.data.audio import audio_write
|
|
|
|
torch.manual_seed(seed)
|
|
|
|
print(f"[MusicGen] Loading model: {model}")
|
|
mg = MusicGen.get_pretrained(model)
|
|
mg.set_generation_params(
|
|
duration=duration,
|
|
use_sampling=True,
|
|
temperature=temperature,
|
|
cfg_coef=cfg_coef,
|
|
top_k=top_k,
|
|
top_p=0.0,
|
|
extend_stride=extend_stride,
|
|
)
|
|
|
|
print(f"[MusicGen] Generating: {prompt}")
|
|
wav = mg.generate([prompt])
|
|
|
|
output_dir = "/app/ComfyUI/output/audio"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
filename = f"musicgen_{int(time.time())}"
|
|
out_path = os.path.join(output_dir, filename)
|
|
audio_write(out_path, wav[0].cpu(), mg.sample_rate, strategy="loudness")
|
|
|
|
return (out_path + ".wav",)
|
|
|
|
|
|
class MusicGenLongNode:
|
|
"""Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
|
|
"total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}),
|
|
"segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}),
|
|
"context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
|
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
|
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
|
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
|
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
|
|
"top_k": ("INT", {"default": 250, "min": 10, "max": 500}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("AUDIO_PATH",)
|
|
RETURN_NAMES = ("audio_file",)
|
|
FUNCTION = "generate_long"
|
|
CATEGORY = "AudioCraft"
|
|
OUTPUT_NODE = True
|
|
|
|
def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
|
|
from audiocraft.models import MusicGen
|
|
from audiocraft.data.audio import audio_write
|
|
|
|
torch.manual_seed(seed)
|
|
import math
|
|
total_segments = math.ceil(total_duration / segment_duration)
|
|
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s")
|
|
|
|
mg = MusicGen.get_pretrained(model)
|
|
sample_rate = mg.sample_rate
|
|
context_samples = int(context_seconds * sample_rate)
|
|
|
|
# Erstes Segment
|
|
gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0)
|
|
mg.set_generation_params(duration=segment_duration, **gen_params)
|
|
print(f"[MusicGenLong] Segment 1 / {total_segments}")
|
|
first = mg.generate([prompt])
|
|
segments = [first[0].cpu()]
|
|
generated = segment_duration
|
|
|
|
# Weitere Segmente via Continuation
|
|
seg_num = 2
|
|
while generated < total_duration:
|
|
remaining = total_duration - generated
|
|
next_dur = min(segment_duration, remaining)
|
|
mg.set_generation_params(duration=next_dur, **gen_params)
|
|
|
|
# Letzten context_seconds des vorherigen Segments als Kontext
|
|
context = segments[-1][:, -context_samples:]
|
|
print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
|
|
cont = mg.generate_continuation(context, sample_rate, [prompt])
|
|
segments.append(cont[0].cpu())
|
|
generated += next_dur
|
|
seg_num += 1
|
|
|
|
# Crossfade und zusammenfügen (funktioniert für Mono und Stereo)
|
|
crossfade_samples = context_samples
|
|
result = segments[0]
|
|
num_channels = result.shape[0]
|
|
for seg in segments[1:]:
|
|
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
|
|
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
|
|
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
|
|
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
|
|
|
|
output_dir = "/app/ComfyUI/output/audio"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
filename = f"musicgen_long_{int(time.time())}"
|
|
out_path = os.path.join(output_dir, filename)
|
|
audio_write(out_path, result, sample_rate, strategy="loudness")
|
|
print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)")
|
|
return (out_path + ".wav",)
|
|
|
|
|
|
class AudioGenNode:
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
|
|
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
|
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
|
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
|
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
|
|
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("AUDIO_PATH",)
|
|
RETURN_NAMES = ("audio_file",)
|
|
FUNCTION = "generate"
|
|
CATEGORY = "AudioCraft"
|
|
OUTPUT_NODE = True
|
|
|
|
def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
|
|
from audiocraft.models import AudioGen
|
|
from audiocraft.data.audio import audio_write
|
|
|
|
torch.manual_seed(seed)
|
|
|
|
print(f"[AudioGen] Loading model...")
|
|
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
|
ag.set_generation_params(
|
|
duration=duration,
|
|
use_sampling=True,
|
|
temperature=temperature,
|
|
cfg_coef=cfg_coef,
|
|
top_k=top_k,
|
|
)
|
|
|
|
print(f"[AudioGen] Generating: {prompt}")
|
|
wav = ag.generate([prompt])
|
|
|
|
output_dir = "/app/ComfyUI/output/audio"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
filename = f"audiogen_{int(time.time())}"
|
|
out_path = os.path.join(output_dir, filename)
|
|
audio_write(out_path, wav[0].cpu(), ag.sample_rate, strategy="loudness")
|
|
|
|
return (out_path + ".wav",)
|
|
|
|
|
|
class AudioUpsampleNode:
|
|
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"audio_file": ("AUDIO_PATH",),
|
|
"target_sr": ([48000, 44100], {"default": 48000}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("AUDIO_PATH",)
|
|
RETURN_NAMES = ("audio_file",)
|
|
FUNCTION = "upsample"
|
|
CATEGORY = "AudioCraft"
|
|
OUTPUT_NODE = True
|
|
|
|
def upsample(self, audio_file, target_sr):
|
|
wav, sr = torchaudio.load(audio_file)
|
|
if sr != target_sr:
|
|
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
|
|
wav = resampler(wav)
|
|
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
|
|
torchaudio.save(out_path, wav, target_sr)
|
|
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
|
|
return (out_path,)
|
|
|
|
|
|
class AudioPreviewNode:
|
|
"""Zeigt Audiodatei in der ComfyUI-UI an"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {"required": {"audio_file": ("AUDIO_PATH",)}}
|
|
|
|
RETURN_TYPES = ()
|
|
FUNCTION = "preview"
|
|
CATEGORY = "AudioCraft"
|
|
OUTPUT_NODE = True
|
|
|
|
def preview(self, audio_file):
|
|
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
|
|
|
|
|
|
NODE_CLASS_MAPPINGS = {
|
|
"MusicGen": MusicGenNode,
|
|
"MusicGenLong": MusicGenLongNode,
|
|
"AudioGen": AudioGenNode,
|
|
"AudioUpsample": AudioUpsampleNode,
|
|
"AudioPreview": AudioPreviewNode,
|
|
}
|
|
|
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
"MusicGen": "MusicGen (Musik)",
|
|
"MusicGenLong": "MusicGen Long (Lange Musik)",
|
|
"AudioGen": "AudioGen (Sound Effects)",
|
|
"AudioUpsample": "Audio Upsample (Qualität)",
|
|
"AudioPreview": "Audio Preview",
|
|
}
|