ComfyUI: MusicGenLong Node fuer nahtlose lange Musik
- MusicGenLong: generiert beliebig lange Musik via generate_continuation - Segmente mit Crossfade zusammengefuegt, kein Qualitaetsverlust - workflow_music.json: MusicGenLong oben (60-120s), MusicGen unten (10-30s) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a3c870127a
commit
ee9636684d
2 changed files with 162 additions and 22 deletions
|
|
@ -54,6 +54,81 @@ class MusicGenNode:
|
||||||
return (out_path + ".wav",)
|
return (out_path + ".wav",)
|
||||||
|
|
||||||
|
|
||||||
|
class MusicGenLongNode:
|
||||||
|
"""Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
|
||||||
|
"total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}),
|
||||||
|
"segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}),
|
||||||
|
"context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||||
|
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
|
||||||
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
||||||
|
"temperature": ("FLOAT", {"default": 0.85, "min": 0.1, "max": 2.0, "step": 0.05}),
|
||||||
|
"cfg_coef": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||||
|
"top_k": ("INT", {"default": 150, "min": 1, "max": 1000}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO_PATH",)
|
||||||
|
RETURN_NAMES = ("audio_file",)
|
||||||
|
FUNCTION = "generate_long"
|
||||||
|
CATEGORY = "AudioCraft"
|
||||||
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
|
def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
|
||||||
|
from audiocraft.models import MusicGen
|
||||||
|
from audiocraft.data.audio import audio_write
|
||||||
|
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
|
||||||
|
|
||||||
|
mg = MusicGen.get_pretrained(model)
|
||||||
|
sample_rate = mg.sample_rate
|
||||||
|
context_samples = int(context_seconds * sample_rate)
|
||||||
|
|
||||||
|
# Erstes Segment
|
||||||
|
mg.set_generation_params(duration=segment_duration, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
|
||||||
|
print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
|
||||||
|
first = mg.generate([prompt])
|
||||||
|
segments = [first[0].cpu()]
|
||||||
|
generated = segment_duration
|
||||||
|
|
||||||
|
# Weitere Segmente via Continuation
|
||||||
|
seg_num = 2
|
||||||
|
while generated < total_duration:
|
||||||
|
remaining = total_duration - generated
|
||||||
|
next_dur = min(segment_duration, remaining)
|
||||||
|
mg.set_generation_params(duration=next_dur, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
|
||||||
|
|
||||||
|
# Letzten context_seconds des vorherigen Segments als Kontext
|
||||||
|
context = segments[-1][:, -context_samples:]
|
||||||
|
print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
|
||||||
|
cont = mg.generate_continuation(context, sample_rate, [prompt])
|
||||||
|
segments.append(cont[0].cpu())
|
||||||
|
generated += next_dur
|
||||||
|
seg_num += 1
|
||||||
|
|
||||||
|
# Crossfade und zusammenfügen
|
||||||
|
crossfade_samples = context_samples
|
||||||
|
result = segments[0]
|
||||||
|
for seg in segments[1:]:
|
||||||
|
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
|
||||||
|
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
|
||||||
|
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
|
||||||
|
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
|
||||||
|
|
||||||
|
output_dir = "/app/ComfyUI/output/audio"
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
filename = f"musicgen_long_{int(time.time())}"
|
||||||
|
out_path = os.path.join(output_dir, filename)
|
||||||
|
audio_write(out_path, result, sample_rate, strategy="loudness")
|
||||||
|
print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)")
|
||||||
|
return (out_path + ".wav",)
|
||||||
|
|
||||||
|
|
||||||
class AudioGenNode:
|
class AudioGenNode:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
|
|
@ -145,15 +220,17 @@ class AudioPreviewNode:
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"MusicGen": MusicGenNode,
|
"MusicGen": MusicGenNode,
|
||||||
"AudioGen": AudioGenNode,
|
"MusicGenLong": MusicGenLongNode,
|
||||||
"AudioUpsample": AudioUpsampleNode,
|
"AudioGen": AudioGenNode,
|
||||||
"AudioPreview": AudioPreviewNode,
|
"AudioUpsample": AudioUpsampleNode,
|
||||||
|
"AudioPreview": AudioPreviewNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
"MusicGen": "MusicGen (Musik)",
|
"MusicGen": "MusicGen (Musik)",
|
||||||
"AudioGen": "AudioGen (Sound Effects)",
|
"MusicGenLong": "MusicGen Long (Lange Musik)",
|
||||||
"AudioUpsample": "Audio Upsample (Qualität)",
|
"AudioGen": "AudioGen (Sound Effects)",
|
||||||
"AudioPreview": "Audio Preview",
|
"AudioUpsample": "Audio Upsample (Qualität)",
|
||||||
|
"AudioPreview": "Audio Preview",
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,71 @@
|
||||||
{
|
{
|
||||||
"last_node_id": 3,
|
"last_node_id": 6,
|
||||||
"last_link_id": 2,
|
"last_link_id": 4,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "MusicGenLong",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 480, "1": 420},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
|
||||||
|
],
|
||||||
|
"properties": {"Node name for S&R": "MusicGenLong"},
|
||||||
|
"widgets_values": [
|
||||||
|
"dark fantasy orchestral music, deep cello strings, french horn melody, slow war drums, mysterious medieval atmosphere, epic cinematic, consistent dark tone",
|
||||||
|
90.0,
|
||||||
|
30.0,
|
||||||
|
5.0,
|
||||||
|
"facebook/musicgen-stereo-medium",
|
||||||
|
0,
|
||||||
|
0.85,
|
||||||
|
5.0,
|
||||||
|
150
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "AudioUpsample",
|
||||||
|
"pos": [580, 100],
|
||||||
|
"size": {"0": 300, "1": 120},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
|
||||||
|
],
|
||||||
|
"properties": {"Node name for S&R": "AudioUpsample"},
|
||||||
|
"widgets_values": [48000]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "AudioPreview",
|
||||||
|
"pos": [930, 100],
|
||||||
|
"size": {"0": 250, "1": 80},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {"Node name for S&R": "AudioPreview"},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"type": "MusicGen",
|
"type": "MusicGen",
|
||||||
"pos": [50, 100],
|
"pos": [50, 620],
|
||||||
"size": {"0": 480, "1": 370},
|
"size": {"0": 480, "1": 370},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 0,
|
"order": 3,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [],
|
"inputs": [],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|
@ -16,14 +73,14 @@
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "MusicGen"},
|
"properties": {"Node name for S&R": "MusicGen"},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
|
"dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious medieval atmosphere, epic cinematic",
|
||||||
30.0,
|
30.0,
|
||||||
"facebook/musicgen-stereo-medium",
|
"facebook/musicgen-stereo-medium",
|
||||||
0,
|
0,
|
||||||
0.9,
|
0.85,
|
||||||
5.0,
|
5.0,
|
||||||
250,
|
150,
|
||||||
18.0
|
8.0
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -61,19 +118,25 @@
|
||||||
],
|
],
|
||||||
"links": [
|
"links": [
|
||||||
[1, 1, 0, 2, 0, "AUDIO_PATH"],
|
[1, 1, 0, 2, 0, "AUDIO_PATH"],
|
||||||
[2, 2, 0, 3, 0, "AUDIO_PATH"]
|
[2, 2, 0, 3, 0, "AUDIO_PATH"],
|
||||||
|
[3, 4, 0, 5, 0, "AUDIO_PATH"],
|
||||||
|
[4, 5, 0, 6, 0, "AUDIO_PATH"]
|
||||||
],
|
],
|
||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"title": "Musik Pipeline: MusicGen → Upsample 48kHz → Preview",
|
"title": "Lange Musik (MusicGenLong) → empfohlen für 60-120s",
|
||||||
"bounding": [30, 65, 1180, 180],
|
"bounding": [30, 65, 1180, 500],
|
||||||
"color": "#3d2a5a",
|
"color": "#3d2a5a",
|
||||||
"font_size": 14
|
"font_size": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Kurze Musik (MusicGen) → für 10-30s Clips",
|
||||||
|
"bounding": [30, 585, 1180, 460],
|
||||||
|
"color": "#2a2a5a",
|
||||||
|
"font_size": 14
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"config": {},
|
"config": {},
|
||||||
"extra": {
|
"extra": {},
|
||||||
"notes": "Tipps:\n- temperature 0.8-0.9 = konsistenter Sound\n- cfg_coef 5.0 = mehr am Prompt orientiert\n- extend_stride 18 = keine Lücken bei langen Tracks\n- stereo-large für beste Qualität (6GB VRAM)"
|
|
||||||
},
|
|
||||||
"version": 0.4
|
"version": 0.4
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue