ComfyUI: MusicGenLong Node fuer nahtlose lange Musik

- MusicGenLong: generiert beliebig lange Musik via generate_continuation
- Segmente mit Crossfade zusammengefuegt, kein Qualitaetsverlust
- workflow_music.json: MusicGenLong oben (60-120s), MusicGen unten (10-30s)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Andre 2026-03-21 15:30:03 +01:00
parent a3c870127a
commit ee9636684d
2 changed files with 162 additions and 22 deletions

View file

@ -54,6 +54,81 @@ class MusicGenNode:
return (out_path + ".wav",)
class MusicGenLongNode:
"""Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
"total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}),
"segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}),
"context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 0.85, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 150, "min": 1, "max": 1000}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "generate_long"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
torch.manual_seed(seed)
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
mg = MusicGen.get_pretrained(model)
sample_rate = mg.sample_rate
context_samples = int(context_seconds * sample_rate)
# Erstes Segment
mg.set_generation_params(duration=segment_duration, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
first = mg.generate([prompt])
segments = [first[0].cpu()]
generated = segment_duration
# Weitere Segmente via Continuation
seg_num = 2
while generated < total_duration:
remaining = total_duration - generated
next_dur = min(segment_duration, remaining)
mg.set_generation_params(duration=next_dur, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
# Letzten context_seconds des vorherigen Segments als Kontext
context = segments[-1][:, -context_samples:]
print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
cont = mg.generate_continuation(context, sample_rate, [prompt])
segments.append(cont[0].cpu())
generated += next_dur
seg_num += 1
# Crossfade und zusammenfügen
crossfade_samples = context_samples
result = segments[0]
for seg in segments[1:]:
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
output_dir = "/app/ComfyUI/output/audio"
os.makedirs(output_dir, exist_ok=True)
filename = f"musicgen_long_{int(time.time())}"
out_path = os.path.join(output_dir, filename)
audio_write(out_path, result, sample_rate, strategy="loudness")
print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)")
return (out_path + ".wav",)
class AudioGenNode:
@classmethod
def INPUT_TYPES(cls):
@ -146,6 +221,7 @@ class AudioPreviewNode:
NODE_CLASS_MAPPINGS = {
"MusicGen": MusicGenNode,
"MusicGenLong": MusicGenLongNode,
"AudioGen": AudioGenNode,
"AudioUpsample": AudioUpsampleNode,
"AudioPreview": AudioPreviewNode,
@ -153,6 +229,7 @@ NODE_CLASS_MAPPINGS = {
NODE_DISPLAY_NAME_MAPPINGS = {
"MusicGen": "MusicGen (Musik)",
"MusicGenLong": "MusicGen Long (Lange Musik)",
"AudioGen": "AudioGen (Sound Effects)",
"AudioUpsample": "Audio Upsample (Qualität)",
"AudioPreview": "Audio Preview",

View file

@ -1,14 +1,71 @@
{
"last_node_id": 3,
"last_link_id": 2,
"last_node_id": 6,
"last_link_id": 4,
"nodes": [
{
"id": 4,
"type": "MusicGenLong",
"pos": [50, 100],
"size": {"0": 480, "1": 420},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
],
"properties": {"Node name for S&R": "MusicGenLong"},
"widgets_values": [
"dark fantasy orchestral music, deep cello strings, french horn melody, slow war drums, mysterious medieval atmosphere, epic cinematic, consistent dark tone",
90.0,
30.0,
5.0,
"facebook/musicgen-stereo-medium",
0,
0.85,
5.0,
150
]
},
{
"id": 5,
"type": "AudioUpsample",
"pos": [580, 100],
"size": {"0": 300, "1": 120},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
],
"properties": {"Node name for S&R": "AudioUpsample"},
"widgets_values": [48000]
},
{
"id": 6,
"type": "AudioPreview",
"pos": [930, 100],
"size": {"0": 250, "1": 80},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
],
"outputs": [],
"properties": {"Node name for S&R": "AudioPreview"},
"widgets_values": []
},
{
"id": 1,
"type": "MusicGen",
"pos": [50, 100],
"pos": [50, 620],
"size": {"0": 480, "1": 370},
"flags": {},
"order": 0,
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
@ -16,14 +73,14 @@
],
"properties": {"Node name for S&R": "MusicGen"},
"widgets_values": [
"epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
"dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious medieval atmosphere, epic cinematic",
30.0,
"facebook/musicgen-stereo-medium",
0,
0.9,
0.85,
5.0,
250,
18.0
150,
8.0
]
},
{
@ -61,19 +118,25 @@
],
"links": [
[1, 1, 0, 2, 0, "AUDIO_PATH"],
[2, 2, 0, 3, 0, "AUDIO_PATH"]
[2, 2, 0, 3, 0, "AUDIO_PATH"],
[3, 4, 0, 5, 0, "AUDIO_PATH"],
[4, 5, 0, 6, 0, "AUDIO_PATH"]
],
"groups": [
{
"title": "Musik Pipeline: MusicGen → Upsample 48kHz → Preview",
"bounding": [30, 65, 1180, 180],
"title": "Lange Musik (MusicGenLong) → empfohlen für 60-120s",
"bounding": [30, 65, 1180, 500],
"color": "#3d2a5a",
"font_size": 14
},
{
"title": "Kurze Musik (MusicGen) → für 10-30s Clips",
"bounding": [30, 585, 1180, 460],
"color": "#2a2a5a",
"font_size": 14
}
],
"config": {},
"extra": {
"notes": "Tipps:\n- temperature 0.8-0.9 = konsistenter Sound\n- cfg_coef 5.0 = mehr am Prompt orientiert\n- extend_stride 18 = keine Lücken bei langen Tracks\n- stereo-large für beste Qualität (6GB VRAM)"
},
"extra": {},
"version": 0.4
}