import torch import torchaudio import os import time import glob as globmod import re class MusicGenNode: @classmethod def INPUT_TYPES(cls): return { "required": { "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}), "name": ("STRING", {"default": "music"}), "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}), "model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],), "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), "temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}), "cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}), "top_k": ("INT", {"default": 250, "min": 10, "max": 500}), "extend_stride": ("FLOAT", {"default": 18.0, "min": 5.0, "max": 28.0, "step": 1.0}), } } RETURN_TYPES = ("AUDIO_PATH",) RETURN_NAMES = ("audio_file",) FUNCTION = "generate" CATEGORY = "AudioCraft" OUTPUT_NODE = True def generate(self, prompt, name, duration, model, seed, temperature, cfg_coef, top_k, extend_stride): from audiocraft.models import MusicGen from audiocraft.data.audio import audio_write torch.manual_seed(seed) print(f"[MusicGen] Loading model: {model}") mg = MusicGen.get_pretrained(model) mg.set_generation_params( duration=duration, use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0, extend_stride=extend_stride, ) print(f"[MusicGen] Generating: {prompt}") wav = mg.generate([prompt]) output_dir = "/app/ComfyUI/output/audio" os.makedirs(output_dir, exist_ok=True) base_name = re.sub(r'[^\w\-]', '_', name.strip().lower()) num = _next_number(output_dir, base_name) filename = f"{base_name}_{num:03d}" out_path = os.path.join(output_dir, filename) audio_write(out_path, wav[0].cpu(), mg.sample_rate, strategy="loudness") print(f"[MusicGen] Saved: {filename}.wav") return (out_path + ".wav",) class MusicGenLongNode: """Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge""" @classmethod def INPUT_TYPES(cls): return { "required": { "prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}), "name": ("STRING", {"default": "music_long"}), "total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}), "segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}), "context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}), "model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],), "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), "temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}), "cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}), "top_k": ("INT", {"default": 250, "min": 10, "max": 500}), } } RETURN_TYPES = ("AUDIO_PATH",) RETURN_NAMES = ("audio_file",) FUNCTION = "generate_long" CATEGORY = "AudioCraft" OUTPUT_NODE = True def generate_long(self, prompt, name, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k): from audiocraft.models import MusicGen from audiocraft.data.audio import audio_write torch.manual_seed(seed) import math total_segments = math.ceil(total_duration / segment_duration) print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s") mg = MusicGen.get_pretrained(model) sample_rate = mg.sample_rate context_samples = int(context_seconds * sample_rate) # Erstes Segment gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0) mg.set_generation_params(duration=segment_duration, **gen_params) print(f"[MusicGenLong] Segment 1 / {total_segments}") first = mg.generate([prompt]) segments = [first[0].cpu()] generated = segment_duration # Weitere Segmente via Continuation seg_num = 2 while generated < total_duration: remaining = total_duration - generated next_dur = min(segment_duration, remaining) mg.set_generation_params(duration=next_dur, **gen_params) # Letzten context_seconds des vorherigen Segments als Kontext context = segments[-1][:, -context_samples:] print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)") cont = mg.generate_continuation(context, sample_rate, [prompt]) segments.append(cont[0].cpu()) generated += next_dur seg_num += 1 # Crossfade und zusammenfügen (funktioniert für Mono und Stereo) crossfade_samples = context_samples result = segments[0] num_channels = result.shape[0] for seg in segments[1:]: fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1) fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1) overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1) output_dir = "/app/ComfyUI/output/audio" os.makedirs(output_dir, exist_ok=True) base_name = re.sub(r'[^\w\-]', '_', name.strip().lower()) num = _next_number(output_dir, base_name) filename = f"{base_name}_{num:03d}" out_path = os.path.join(output_dir, filename) audio_write(out_path, result, sample_rate, strategy="loudness") print(f"[MusicGenLong] Fertig: {filename}.wav ({result.shape[1]/sample_rate:.1f}s)") return (out_path + ".wav",) def _next_number(output_dir, base_name): """Findet die nächste fortlaufende Nummer für einen Dateinamen""" pattern = os.path.join(output_dir, f"{base_name}_*.wav") existing = globmod.glob(pattern) max_num = 0 for f in existing: match = re.search(rf"{re.escape(base_name)}_(\d+)", os.path.basename(f)) if match: max_num = max(max_num, int(match.group(1))) return max_num + 1 class AudioGenNode: @classmethod def INPUT_TYPES(cls): return { "required": { "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}), "name": ("STRING", {"default": "sfx"}), "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}), "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), "temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}), "cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}), "top_k": ("INT", {"default": 250, "min": 1, "max": 1000}), } } RETURN_TYPES = ("AUDIO_PATH",) RETURN_NAMES = ("audio_file",) FUNCTION = "generate" CATEGORY = "AudioCraft" OUTPUT_NODE = True def generate(self, prompt, name, duration, seed, temperature, cfg_coef, top_k): from audiocraft.models import AudioGen from audiocraft.data.audio import audio_write torch.manual_seed(seed) print(f"[AudioGen] Loading model...") ag = AudioGen.get_pretrained("facebook/audiogen-medium") ag.set_generation_params( duration=duration, use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, ) print(f"[AudioGen] Generating: {prompt}") wav = ag.generate([prompt]) output_dir = "/app/ComfyUI/output/audio" os.makedirs(output_dir, exist_ok=True) base_name = re.sub(r'[^\w\-]', '_', name.strip().lower()) num = _next_number(output_dir, base_name) filename = f"{base_name}_{num:03d}" out_path = os.path.join(output_dir, filename) audio_write(out_path, wav[0].cpu(), ag.sample_rate, strategy="loudness") print(f"[AudioGen] Saved: {filename}.wav") return (out_path + ".wav",) class AudioUpsampleNode: """Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz""" @classmethod def INPUT_TYPES(cls): return { "required": { "audio_file": ("AUDIO_PATH",), "target_sr": ([48000, 44100], {"default": 48000}), } } RETURN_TYPES = ("AUDIO_PATH",) RETURN_NAMES = ("audio_file",) FUNCTION = "upsample" CATEGORY = "AudioCraft" OUTPUT_NODE = True def upsample(self, audio_file, target_sr): wav, sr = torchaudio.load(audio_file) if sr != target_sr: resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr) wav = resampler(wav) out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav") torchaudio.save(out_path, wav, target_sr) print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}") return (out_path,) class AudioPreviewNode: """Zeigt Audiodatei in der ComfyUI-UI an""" @classmethod def INPUT_TYPES(cls): return {"required": {"audio_file": ("AUDIO_PATH",)}} RETURN_TYPES = () FUNCTION = "preview" CATEGORY = "AudioCraft" OUTPUT_NODE = True def preview(self, audio_file): return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}} class AudioManagerNode: """Zeigt alle generierten Audio-Dateien an und kann sie löschen""" @classmethod def INPUT_TYPES(cls): return { "required": { "action": (["Dateien auflisten", "Alle loeschen", "Nur Original loeschen (behalte 48kHz)"],), } } RETURN_TYPES = () FUNCTION = "manage" CATEGORY = "AudioCraft" OUTPUT_NODE = True def manage(self, action): audio_dir = "/app/ComfyUI/output/audio" if not os.path.exists(audio_dir): return {"ui": {"text": [{"content": "Kein Audio-Ordner vorhanden."}], "files": []}} files = [] total_size = 0 for f in sorted(os.listdir(audio_dir)): if f.endswith(".wav"): fpath = os.path.join(audio_dir, f) size = os.path.getsize(fpath) total_size += size files.append({"name": f, "size_mb": round(size / 1024 / 1024, 2), "path": fpath}) if action == "Alle loeschen": deleted = 0 for f in files: os.remove(f["path"]) deleted += 1 print(f"[AudioManager] {deleted} Dateien geloescht ({total_size / 1024 / 1024:.1f} MB)") return {"ui": {"text": [{"content": f"{deleted} Dateien geloescht ({total_size / 1024 / 1024:.1f} MB frei)"}], "files": []}} elif action == "Nur Original loeschen (behalte 48kHz)": deleted = 0 freed = 0 keep = [] for f in files: if "_48000hz" not in f["name"]: os.remove(f["path"]) deleted += 1 freed += f["size_mb"] else: keep.append(f) print(f"[AudioManager] {deleted} Originale geloescht ({freed:.1f} MB frei)") return {"ui": {"text": [{"content": f"{deleted} Originale geloescht ({freed:.1f} MB frei), {len(keep)} 48kHz Dateien behalten"}], "files": keep}} # Auflisten print(f"[AudioManager] {len(files)} Dateien, {total_size / 1024 / 1024:.1f} MB") return {"ui": {"text": [{"content": f"{len(files)} Dateien | {total_size / 1024 / 1024:.1f} MB"}], "files": files}} NODE_CLASS_MAPPINGS = { "MusicGen": MusicGenNode, "MusicGenLong": MusicGenLongNode, "AudioGen": AudioGenNode, "AudioUpsample": AudioUpsampleNode, "AudioPreview": AudioPreviewNode, "AudioManager": AudioManagerNode, } NODE_DISPLAY_NAME_MAPPINGS = { "MusicGen": "MusicGen (Musik)", "MusicGenLong": "MusicGen Long (Lange Musik)", "AudioGen": "AudioGen (Sound Effects)", "AudioUpsample": "Audio Upsample (Qualität)", "AudioPreview": "Audio Preview", "AudioManager": "Audio Manager (Dateien)", }