DungeonCrawler/comfyui-audio/comfyui_audiocraft/nodes.py
Andre 5b326e2de6 Name-Feld mit fortlaufender Nummer für alle Audio-Nodes
Alle Generierungs-Nodes (MusicGen, MusicGenLong, AudioGen)
haben jetzt ein 'name' Feld. Dateien werden als
name_001.wav, name_002.wav, etc. gespeichert statt mit
Timestamp. z.B. heavy_strike_001.wav

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 18:08:29 +01:00

326 lines
13 KiB
Python

import torch
import torchaudio
import os
import time
import glob as globmod
import re
class MusicGenNode:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
"name": ("STRING", {"default": "music"}),
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 10, "max": 500}),
"extend_stride": ("FLOAT", {"default": 18.0, "min": 5.0, "max": 28.0, "step": 1.0}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "generate"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate(self, prompt, name, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
torch.manual_seed(seed)
print(f"[MusicGen] Loading model: {model}")
mg = MusicGen.get_pretrained(model)
mg.set_generation_params(
duration=duration,
use_sampling=True,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
top_p=0.0,
extend_stride=extend_stride,
)
print(f"[MusicGen] Generating: {prompt}")
wav = mg.generate([prompt])
output_dir = "/app/ComfyUI/output/audio"
os.makedirs(output_dir, exist_ok=True)
base_name = re.sub(r'[^\w\-]', '_', name.strip().lower())
num = _next_number(output_dir, base_name)
filename = f"{base_name}_{num:03d}"
out_path = os.path.join(output_dir, filename)
audio_write(out_path, wav[0].cpu(), mg.sample_rate, strategy="loudness")
print(f"[MusicGen] Saved: {filename}.wav")
return (out_path + ".wav",)
class MusicGenLongNode:
"""Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
"name": ("STRING", {"default": "music_long"}),
"total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}),
"segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}),
"context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 10, "max": 500}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "generate_long"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate_long(self, prompt, name, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
torch.manual_seed(seed)
import math
total_segments = math.ceil(total_duration / segment_duration)
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s")
mg = MusicGen.get_pretrained(model)
sample_rate = mg.sample_rate
context_samples = int(context_seconds * sample_rate)
# Erstes Segment
gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0)
mg.set_generation_params(duration=segment_duration, **gen_params)
print(f"[MusicGenLong] Segment 1 / {total_segments}")
first = mg.generate([prompt])
segments = [first[0].cpu()]
generated = segment_duration
# Weitere Segmente via Continuation
seg_num = 2
while generated < total_duration:
remaining = total_duration - generated
next_dur = min(segment_duration, remaining)
mg.set_generation_params(duration=next_dur, **gen_params)
# Letzten context_seconds des vorherigen Segments als Kontext
context = segments[-1][:, -context_samples:]
print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
cont = mg.generate_continuation(context, sample_rate, [prompt])
segments.append(cont[0].cpu())
generated += next_dur
seg_num += 1
# Crossfade und zusammenfügen (funktioniert für Mono und Stereo)
crossfade_samples = context_samples
result = segments[0]
num_channels = result.shape[0]
for seg in segments[1:]:
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
output_dir = "/app/ComfyUI/output/audio"
os.makedirs(output_dir, exist_ok=True)
base_name = re.sub(r'[^\w\-]', '_', name.strip().lower())
num = _next_number(output_dir, base_name)
filename = f"{base_name}_{num:03d}"
out_path = os.path.join(output_dir, filename)
audio_write(out_path, result, sample_rate, strategy="loudness")
print(f"[MusicGenLong] Fertig: {filename}.wav ({result.shape[1]/sample_rate:.1f}s)")
return (out_path + ".wav",)
def _next_number(output_dir, base_name):
"""Findet die nächste fortlaufende Nummer für einen Dateinamen"""
pattern = os.path.join(output_dir, f"{base_name}_*.wav")
existing = globmod.glob(pattern)
max_num = 0
for f in existing:
match = re.search(rf"{re.escape(base_name)}_(\d+)", os.path.basename(f))
if match:
max_num = max(max_num, int(match.group(1)))
return max_num + 1
class AudioGenNode:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
"name": ("STRING", {"default": "sfx"}),
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "generate"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate(self, prompt, name, duration, seed, temperature, cfg_coef, top_k):
from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write
torch.manual_seed(seed)
print(f"[AudioGen] Loading model...")
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
ag.set_generation_params(
duration=duration,
use_sampling=True,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
)
print(f"[AudioGen] Generating: {prompt}")
wav = ag.generate([prompt])
output_dir = "/app/ComfyUI/output/audio"
os.makedirs(output_dir, exist_ok=True)
base_name = re.sub(r'[^\w\-]', '_', name.strip().lower())
num = _next_number(output_dir, base_name)
filename = f"{base_name}_{num:03d}"
out_path = os.path.join(output_dir, filename)
audio_write(out_path, wav[0].cpu(), ag.sample_rate, strategy="loudness")
print(f"[AudioGen] Saved: {filename}.wav")
return (out_path + ".wav",)
class AudioUpsampleNode:
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"audio_file": ("AUDIO_PATH",),
"target_sr": ([48000, 44100], {"default": 48000}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "upsample"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def upsample(self, audio_file, target_sr):
wav, sr = torchaudio.load(audio_file)
if sr != target_sr:
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
wav = resampler(wav)
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
torchaudio.save(out_path, wav, target_sr)
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
return (out_path,)
class AudioPreviewNode:
"""Zeigt Audiodatei in der ComfyUI-UI an"""
@classmethod
def INPUT_TYPES(cls):
return {"required": {"audio_file": ("AUDIO_PATH",)}}
RETURN_TYPES = ()
FUNCTION = "preview"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def preview(self, audio_file):
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
class AudioManagerNode:
"""Zeigt alle generierten Audio-Dateien an und kann sie löschen"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"action": (["Dateien auflisten", "Alle loeschen", "Nur Original loeschen (behalte 48kHz)"],),
}
}
RETURN_TYPES = ()
FUNCTION = "manage"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def manage(self, action):
audio_dir = "/app/ComfyUI/output/audio"
if not os.path.exists(audio_dir):
return {"ui": {"text": [{"content": "Kein Audio-Ordner vorhanden."}], "files": []}}
files = []
total_size = 0
for f in sorted(os.listdir(audio_dir)):
if f.endswith(".wav"):
fpath = os.path.join(audio_dir, f)
size = os.path.getsize(fpath)
total_size += size
files.append({"name": f, "size_mb": round(size / 1024 / 1024, 2), "path": fpath})
if action == "Alle loeschen":
deleted = 0
for f in files:
os.remove(f["path"])
deleted += 1
print(f"[AudioManager] {deleted} Dateien geloescht ({total_size / 1024 / 1024:.1f} MB)")
return {"ui": {"text": [{"content": f"{deleted} Dateien geloescht ({total_size / 1024 / 1024:.1f} MB frei)"}], "files": []}}
elif action == "Nur Original loeschen (behalte 48kHz)":
deleted = 0
freed = 0
keep = []
for f in files:
if "_48000hz" not in f["name"]:
os.remove(f["path"])
deleted += 1
freed += f["size_mb"]
else:
keep.append(f)
print(f"[AudioManager] {deleted} Originale geloescht ({freed:.1f} MB frei)")
return {"ui": {"text": [{"content": f"{deleted} Originale geloescht ({freed:.1f} MB frei), {len(keep)} 48kHz Dateien behalten"}], "files": keep}}
# Auflisten
print(f"[AudioManager] {len(files)} Dateien, {total_size / 1024 / 1024:.1f} MB")
return {"ui": {"text": [{"content": f"{len(files)} Dateien | {total_size / 1024 / 1024:.1f} MB"}], "files": files}}
NODE_CLASS_MAPPINGS = {
"MusicGen": MusicGenNode,
"MusicGenLong": MusicGenLongNode,
"AudioGen": AudioGenNode,
"AudioUpsample": AudioUpsampleNode,
"AudioPreview": AudioPreviewNode,
"AudioManager": AudioManagerNode,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"MusicGen": "MusicGen (Musik)",
"MusicGenLong": "MusicGen Long (Lange Musik)",
"AudioGen": "AudioGen (Sound Effects)",
"AudioUpsample": "Audio Upsample (Qualität)",
"AudioPreview": "Audio Preview",
"AudioManager": "Audio Manager (Dateien)",
}