From 4feff437581053756eeb518e75d6ac67b16f691f Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 21 Mar 2026 14:22:45 +0100 Subject: [PATCH] ComfyUI AudioCraft: verbesserte Nodes + Workflow Template - nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter - AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet - AudioPreview Node: Vorschau direkt in ComfyUI UI - workflow_game_audio.json: Template mit Musik + SFX Pipeline - Standardmodell: musicgen-stereo-medium (besserer Sound) Co-Authored-By: Claude Sonnet 4.6 --- comfyui-audio/comfyui_audiocraft/nodes.py | 95 +++++++++++++--- comfyui-audio/workflow_game_audio.json | 131 ++++++++++++++-------- 2 files changed, 164 insertions(+), 62 deletions(-) diff --git a/comfyui-audio/comfyui_audiocraft/nodes.py b/comfyui-audio/comfyui_audiocraft/nodes.py index ae2e485..cfc0ec4 100644 --- a/comfyui-audio/comfyui_audiocraft/nodes.py +++ b/comfyui-audio/comfyui_audiocraft/nodes.py @@ -9,10 +9,14 @@ class MusicGenNode: def INPUT_TYPES(cls): return { "required": { - "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}), - "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}), - "model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],), - "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), + "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}), + "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}), + "model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],), + "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), + "temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}), + "cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}), + "top_k": ("INT", {"default": 250, "min": 1, "max": 1000}), + "extend_stride": ("FLOAT", {"default": 18.0, "min": 1.0, "max": 30.0, "step": 1.0}), } } @@ -22,7 +26,7 @@ class MusicGenNode: CATEGORY = "AudioCraft" OUTPUT_NODE = True - def generate(self, prompt, duration, model, seed): + def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride): from audiocraft.models import MusicGen from audiocraft.data.audio import audio_write @@ -30,7 +34,13 @@ class MusicGenNode: print(f"[MusicGen] Loading model: {model}") mg = MusicGen.get_pretrained(model) - mg.set_generation_params(duration=duration) + mg.set_generation_params( + duration=duration, + temperature=temperature, + cfg_coef=cfg_coef, + top_k=top_k, + extend_stride=extend_stride, + ) print(f"[MusicGen] Generating: {prompt}") wav = mg.generate([prompt]) @@ -49,9 +59,12 @@ class AudioGenNode: def INPUT_TYPES(cls): return { "required": { - "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}), - "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}), - "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), + "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}), + "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}), + "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), + "temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}), + "cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}), + "top_k": ("INT", {"default": 250, "min": 1, "max": 1000}), } } @@ -61,7 +74,7 @@ class AudioGenNode: CATEGORY = "AudioCraft" OUTPUT_NODE = True - def generate(self, prompt, duration, seed): + def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k): from audiocraft.models import AudioGen from audiocraft.data.audio import audio_write @@ -69,7 +82,12 @@ class AudioGenNode: print(f"[AudioGen] Loading model...") ag = AudioGen.get_pretrained("facebook/audiogen-medium") - ag.set_generation_params(duration=duration) + ag.set_generation_params( + duration=duration, + temperature=temperature, + cfg_coef=cfg_coef, + top_k=top_k, + ) print(f"[AudioGen] Generating: {prompt}") wav = ag.generate([prompt]) @@ -83,12 +101,59 @@ class AudioGenNode: return (out_path + ".wav",) +class AudioUpsampleNode: + """Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz""" + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "audio_file": ("AUDIO_PATH",), + "target_sr": ([48000, 44100], {"default": 48000}), + } + } + + RETURN_TYPES = ("AUDIO_PATH",) + RETURN_NAMES = ("audio_file",) + FUNCTION = "upsample" + CATEGORY = "AudioCraft" + OUTPUT_NODE = True + + def upsample(self, audio_file, target_sr): + wav, sr = torchaudio.load(audio_file) + if sr != target_sr: + resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr) + wav = resampler(wav) + out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav") + torchaudio.save(out_path, wav, target_sr) + print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}") + return (out_path,) + + +class AudioPreviewNode: + """Zeigt Audiodatei in der ComfyUI-UI an""" + @classmethod + def INPUT_TYPES(cls): + return {"required": {"audio_file": ("AUDIO_PATH",)}} + + RETURN_TYPES = () + FUNCTION = "preview" + CATEGORY = "AudioCraft" + OUTPUT_NODE = True + + def preview(self, audio_file): + return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}} + + NODE_CLASS_MAPPINGS = { - "MusicGen": MusicGenNode, - "AudioGen": AudioGenNode, + "MusicGen": MusicGenNode, + "AudioGen": AudioGenNode, + "AudioUpsample": AudioUpsampleNode, + "AudioPreview": AudioPreviewNode, } NODE_DISPLAY_NAME_MAPPINGS = { - "MusicGen": "MusicGen (Musik)", - "AudioGen": "AudioGen (Sound Effects)", + "MusicGen": "MusicGen (Musik)", + "AudioGen": "AudioGen (Sound Effects)", + "AudioUpsample": "Audio Upsample (Qualität)", + "AudioPreview": "Audio Preview", } diff --git a/comfyui-audio/workflow_game_audio.json b/comfyui-audio/workflow_game_audio.json index 27ecf32..d9df565 100644 --- a/comfyui-audio/workflow_game_audio.json +++ b/comfyui-audio/workflow_game_audio.json @@ -1,98 +1,135 @@ { - "last_node_id": 4, - "last_link_id": 0, + "last_node_id": 6, + "last_link_id": 4, "nodes": [ { "id": 1, "type": "MusicGen", - "pos": [100, 100], - "size": {"0": 400, "1": 200}, + "pos": [50, 100], + "size": {"0": 420, "1": 370}, "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [ - {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} + {"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0} ], "properties": {"Node name for S&R": "MusicGen"}, "widgets_values": [ - "dark dungeon ambience, slow, mysterious, atmospheric, medieval", - 10.0, - "facebook/musicgen-small", - 0 + "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM", + 30.0, + "facebook/musicgen-stereo-medium", + 0, + 0.9, + 5.0, + 250, + 18.0 ] }, { "id": 2, - "type": "MusicGen", - "pos": [100, 380], - "size": {"0": 400, "1": 200}, + "type": "AudioUpsample", + "pos": [520, 100], + "size": {"0": 300, "1": 120}, "flags": {}, "order": 1, "mode": 0, - "inputs": [], - "outputs": [ - {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 1} ], - "properties": {"Node name for S&R": "MusicGen"}, - "widgets_values": [ - "epic battle music, drums, dark fantasy, intense, orchestral", - 15.0, - "facebook/musicgen-small", - 42 - ] + "outputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0} + ], + "properties": {"Node name for S&R": "AudioUpsample"}, + "widgets_values": [48000] }, { "id": 3, - "type": "AudioGen", - "pos": [100, 660], - "size": {"0": 400, "1": 160}, + "type": "AudioPreview", + "pos": [870, 100], + "size": {"0": 250, "1": 80}, "flags": {}, "order": 2, "mode": 0, - "inputs": [], - "outputs": [ - {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 2} ], - "properties": {"Node name for S&R": "AudioGen"}, - "widgets_values": [ - "sword slash metal clang sound effect", - 3.0, - 0 - ] + "outputs": [], + "properties": {"Node name for S&R": "AudioPreview"}, + "widgets_values": [] }, { "id": 4, "type": "AudioGen", - "pos": [100, 880], - "size": {"0": 400, "1": 160}, + "pos": [50, 560], + "size": {"0": 420, "1": 330}, "flags": {}, "order": 3, "mode": 0, "inputs": [], "outputs": [ - {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} + {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0} ], "properties": {"Node name for S&R": "AudioGen"}, "widgets_values": [ - "footsteps on stone dungeon floor", - 3.0, - 1 + "fast sword swing whoosh attack sound effect combat", + 2.0, + 0, + 1.0, + 4.0, + 250 ] + }, + { + "id": 5, + "type": "AudioUpsample", + "pos": [520, 560], + "size": {"0": 300, "1": 120}, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 3} + ], + "outputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0} + ], + "properties": {"Node name for S&R": "AudioUpsample"}, + "widgets_values": [48000] + }, + { + "id": 6, + "type": "AudioPreview", + "pos": [870, 560], + "size": {"0": 250, "1": 80}, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 4} + ], + "outputs": [], + "properties": {"Node name for S&R": "AudioPreview"}, + "widgets_values": [] } ], - "links": [], + "links": [ + [1, 1, 0, 2, 0, "AUDIO_PATH"], + [2, 2, 0, 3, 0, "AUDIO_PATH"], + [3, 4, 0, 5, 0, "AUDIO_PATH"], + [4, 5, 0, 6, 0, "AUDIO_PATH"] + ], "groups": [ { - "title": "Musik", - "bounding": [80, 75, 440, 480], - "color": "#8B0000", + "title": "Musik (MusicGen → Upsample → Preview)", + "bounding": [30, 65, 1120, 420], + "color": "#3d2a5a", "font_size": 14 }, { - "title": "Sound Effects", - "bounding": [80, 635, 440, 430], - "color": "#1a3a4a", + "title": "Sound Effects (AudioGen → Upsample → Preview)", + "bounding": [30, 525, 1120, 390], + "color": "#1a3a2a", "font_size": 14 } ],