ComfyUI AudioCraft: verbesserte Nodes + Workflow Template

- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter
- AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet
- AudioPreview Node: Vorschau direkt in ComfyUI UI
- workflow_game_audio.json: Template mit Musik + SFX Pipeline
- Standardmodell: musicgen-stereo-medium (besserer Sound)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Andre 2026-03-21 14:22:45 +01:00
parent b28b4ddc85
commit 4feff43758
2 changed files with 164 additions and 62 deletions

View file

@ -9,10 +9,14 @@ class MusicGenNode:
def INPUT_TYPES(cls): def INPUT_TYPES(cls):
return { return {
"required": { "required": {
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}), "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}), "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
"model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],), "model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
"extend_stride": ("FLOAT", {"default": 18.0, "min": 1.0, "max": 30.0, "step": 1.0}),
} }
} }
@ -22,7 +26,7 @@ class MusicGenNode:
CATEGORY = "AudioCraft" CATEGORY = "AudioCraft"
OUTPUT_NODE = True OUTPUT_NODE = True
def generate(self, prompt, duration, model, seed): def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
from audiocraft.models import MusicGen from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write from audiocraft.data.audio import audio_write
@ -30,7 +34,13 @@ class MusicGenNode:
print(f"[MusicGen] Loading model: {model}") print(f"[MusicGen] Loading model: {model}")
mg = MusicGen.get_pretrained(model) mg = MusicGen.get_pretrained(model)
mg.set_generation_params(duration=duration) mg.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
extend_stride=extend_stride,
)
print(f"[MusicGen] Generating: {prompt}") print(f"[MusicGen] Generating: {prompt}")
wav = mg.generate([prompt]) wav = mg.generate([prompt])
@ -49,9 +59,12 @@ class AudioGenNode:
def INPUT_TYPES(cls): def INPUT_TYPES(cls):
return { return {
"required": { "required": {
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}), "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}), "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
} }
} }
@ -61,7 +74,7 @@ class AudioGenNode:
CATEGORY = "AudioCraft" CATEGORY = "AudioCraft"
OUTPUT_NODE = True OUTPUT_NODE = True
def generate(self, prompt, duration, seed): def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
from audiocraft.models import AudioGen from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write from audiocraft.data.audio import audio_write
@ -69,7 +82,12 @@ class AudioGenNode:
print(f"[AudioGen] Loading model...") print(f"[AudioGen] Loading model...")
ag = AudioGen.get_pretrained("facebook/audiogen-medium") ag = AudioGen.get_pretrained("facebook/audiogen-medium")
ag.set_generation_params(duration=duration) ag.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
)
print(f"[AudioGen] Generating: {prompt}") print(f"[AudioGen] Generating: {prompt}")
wav = ag.generate([prompt]) wav = ag.generate([prompt])
@ -83,12 +101,59 @@ class AudioGenNode:
return (out_path + ".wav",) return (out_path + ".wav",)
class AudioUpsampleNode:
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"audio_file": ("AUDIO_PATH",),
"target_sr": ([48000, 44100], {"default": 48000}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "upsample"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def upsample(self, audio_file, target_sr):
wav, sr = torchaudio.load(audio_file)
if sr != target_sr:
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
wav = resampler(wav)
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
torchaudio.save(out_path, wav, target_sr)
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
return (out_path,)
class AudioPreviewNode:
"""Zeigt Audiodatei in der ComfyUI-UI an"""
@classmethod
def INPUT_TYPES(cls):
return {"required": {"audio_file": ("AUDIO_PATH",)}}
RETURN_TYPES = ()
FUNCTION = "preview"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def preview(self, audio_file):
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"MusicGen": MusicGenNode, "MusicGen": MusicGenNode,
"AudioGen": AudioGenNode, "AudioGen": AudioGenNode,
"AudioUpsample": AudioUpsampleNode,
"AudioPreview": AudioPreviewNode,
} }
NODE_DISPLAY_NAME_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = {
"MusicGen": "MusicGen (Musik)", "MusicGen": "MusicGen (Musik)",
"AudioGen": "AudioGen (Sound Effects)", "AudioGen": "AudioGen (Sound Effects)",
"AudioUpsample": "Audio Upsample (Qualität)",
"AudioPreview": "Audio Preview",
} }

View file

@ -1,98 +1,135 @@
{ {
"last_node_id": 4, "last_node_id": 6,
"last_link_id": 0, "last_link_id": 4,
"nodes": [ "nodes": [
{ {
"id": 1, "id": 1,
"type": "MusicGen", "type": "MusicGen",
"pos": [100, 100], "pos": [50, 100],
"size": {"0": 400, "1": 200}, "size": {"0": 420, "1": 370},
"flags": {}, "flags": {},
"order": 0, "order": 0,
"mode": 0, "mode": 0,
"inputs": [], "inputs": [],
"outputs": [ "outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} {"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
], ],
"properties": {"Node name for S&R": "MusicGen"}, "properties": {"Node name for S&R": "MusicGen"},
"widgets_values": [ "widgets_values": [
"dark dungeon ambience, slow, mysterious, atmospheric, medieval", "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
10.0, 30.0,
"facebook/musicgen-small", "facebook/musicgen-stereo-medium",
0 0,
0.9,
5.0,
250,
18.0
] ]
}, },
{ {
"id": 2, "id": 2,
"type": "MusicGen", "type": "AudioUpsample",
"pos": [100, 380], "pos": [520, 100],
"size": {"0": 400, "1": 200}, "size": {"0": 300, "1": 120},
"flags": {}, "flags": {},
"order": 1, "order": 1,
"mode": 0, "mode": 0,
"inputs": [], "inputs": [
"outputs": [ {"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
], ],
"properties": {"Node name for S&R": "MusicGen"}, "outputs": [
"widgets_values": [ {"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
"epic battle music, drums, dark fantasy, intense, orchestral", ],
15.0, "properties": {"Node name for S&R": "AudioUpsample"},
"facebook/musicgen-small", "widgets_values": [48000]
42
]
}, },
{ {
"id": 3, "id": 3,
"type": "AudioGen", "type": "AudioPreview",
"pos": [100, 660], "pos": [870, 100],
"size": {"0": 400, "1": 160}, "size": {"0": 250, "1": 80},
"flags": {}, "flags": {},
"order": 2, "order": 2,
"mode": 0, "mode": 0,
"inputs": [], "inputs": [
"outputs": [ {"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
], ],
"properties": {"Node name for S&R": "AudioGen"}, "outputs": [],
"widgets_values": [ "properties": {"Node name for S&R": "AudioPreview"},
"sword slash metal clang sound effect", "widgets_values": []
3.0,
0
]
}, },
{ {
"id": 4, "id": 4,
"type": "AudioGen", "type": "AudioGen",
"pos": [100, 880], "pos": [50, 560],
"size": {"0": 400, "1": 160}, "size": {"0": 420, "1": 330},
"flags": {}, "flags": {},
"order": 3, "order": 3,
"mode": 0, "mode": 0,
"inputs": [], "inputs": [],
"outputs": [ "outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0} {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
], ],
"properties": {"Node name for S&R": "AudioGen"}, "properties": {"Node name for S&R": "AudioGen"},
"widgets_values": [ "widgets_values": [
"footsteps on stone dungeon floor", "fast sword swing whoosh attack sound effect combat",
3.0, 2.0,
1 0,
1.0,
4.0,
250
] ]
},
{
"id": 5,
"type": "AudioUpsample",
"pos": [520, 560],
"size": {"0": 300, "1": 120},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
],
"properties": {"Node name for S&R": "AudioUpsample"},
"widgets_values": [48000]
},
{
"id": 6,
"type": "AudioPreview",
"pos": [870, 560],
"size": {"0": 250, "1": 80},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
],
"outputs": [],
"properties": {"Node name for S&R": "AudioPreview"},
"widgets_values": []
} }
], ],
"links": [], "links": [
[1, 1, 0, 2, 0, "AUDIO_PATH"],
[2, 2, 0, 3, 0, "AUDIO_PATH"],
[3, 4, 0, 5, 0, "AUDIO_PATH"],
[4, 5, 0, 6, 0, "AUDIO_PATH"]
],
"groups": [ "groups": [
{ {
"title": "Musik", "title": "Musik (MusicGen → Upsample → Preview)",
"bounding": [80, 75, 440, 480], "bounding": [30, 65, 1120, 420],
"color": "#8B0000", "color": "#3d2a5a",
"font_size": 14 "font_size": 14
}, },
{ {
"title": "Sound Effects", "title": "Sound Effects (AudioGen → Upsample → Preview)",
"bounding": [80, 635, 440, 430], "bounding": [30, 525, 1120, 390],
"color": "#1a3a4a", "color": "#1a3a2a",
"font_size": 14 "font_size": 14
} }
], ],