ComfyUI AudioCraft: verbesserte Nodes + Workflow Template
- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter - AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet - AudioPreview Node: Vorschau direkt in ComfyUI UI - workflow_game_audio.json: Template mit Musik + SFX Pipeline - Standardmodell: musicgen-stereo-medium (besserer Sound) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b28b4ddc85
commit
4feff43758
2 changed files with 164 additions and 62 deletions
|
|
@ -9,10 +9,14 @@ class MusicGenNode:
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
|
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
|
||||||
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}),
|
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
|
||||||
"model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],),
|
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
|
||||||
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
||||||
|
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
||||||
|
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||||
|
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
|
||||||
|
"extend_stride": ("FLOAT", {"default": 18.0, "min": 1.0, "max": 30.0, "step": 1.0}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,7 +26,7 @@ class MusicGenNode:
|
||||||
CATEGORY = "AudioCraft"
|
CATEGORY = "AudioCraft"
|
||||||
OUTPUT_NODE = True
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
def generate(self, prompt, duration, model, seed):
|
def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
|
||||||
from audiocraft.models import MusicGen
|
from audiocraft.models import MusicGen
|
||||||
from audiocraft.data.audio import audio_write
|
from audiocraft.data.audio import audio_write
|
||||||
|
|
||||||
|
|
@ -30,7 +34,13 @@ class MusicGenNode:
|
||||||
|
|
||||||
print(f"[MusicGen] Loading model: {model}")
|
print(f"[MusicGen] Loading model: {model}")
|
||||||
mg = MusicGen.get_pretrained(model)
|
mg = MusicGen.get_pretrained(model)
|
||||||
mg.set_generation_params(duration=duration)
|
mg.set_generation_params(
|
||||||
|
duration=duration,
|
||||||
|
temperature=temperature,
|
||||||
|
cfg_coef=cfg_coef,
|
||||||
|
top_k=top_k,
|
||||||
|
extend_stride=extend_stride,
|
||||||
|
)
|
||||||
|
|
||||||
print(f"[MusicGen] Generating: {prompt}")
|
print(f"[MusicGen] Generating: {prompt}")
|
||||||
wav = mg.generate([prompt])
|
wav = mg.generate([prompt])
|
||||||
|
|
@ -49,9 +59,12 @@ class AudioGenNode:
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
|
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
|
||||||
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
|
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
|
||||||
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
||||||
|
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
||||||
|
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||||
|
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -61,7 +74,7 @@ class AudioGenNode:
|
||||||
CATEGORY = "AudioCraft"
|
CATEGORY = "AudioCraft"
|
||||||
OUTPUT_NODE = True
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
def generate(self, prompt, duration, seed):
|
def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
|
||||||
from audiocraft.models import AudioGen
|
from audiocraft.models import AudioGen
|
||||||
from audiocraft.data.audio import audio_write
|
from audiocraft.data.audio import audio_write
|
||||||
|
|
||||||
|
|
@ -69,7 +82,12 @@ class AudioGenNode:
|
||||||
|
|
||||||
print(f"[AudioGen] Loading model...")
|
print(f"[AudioGen] Loading model...")
|
||||||
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
||||||
ag.set_generation_params(duration=duration)
|
ag.set_generation_params(
|
||||||
|
duration=duration,
|
||||||
|
temperature=temperature,
|
||||||
|
cfg_coef=cfg_coef,
|
||||||
|
top_k=top_k,
|
||||||
|
)
|
||||||
|
|
||||||
print(f"[AudioGen] Generating: {prompt}")
|
print(f"[AudioGen] Generating: {prompt}")
|
||||||
wav = ag.generate([prompt])
|
wav = ag.generate([prompt])
|
||||||
|
|
@ -83,12 +101,59 @@ class AudioGenNode:
|
||||||
return (out_path + ".wav",)
|
return (out_path + ".wav",)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioUpsampleNode:
|
||||||
|
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"audio_file": ("AUDIO_PATH",),
|
||||||
|
"target_sr": ([48000, 44100], {"default": 48000}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO_PATH",)
|
||||||
|
RETURN_NAMES = ("audio_file",)
|
||||||
|
FUNCTION = "upsample"
|
||||||
|
CATEGORY = "AudioCraft"
|
||||||
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
|
def upsample(self, audio_file, target_sr):
|
||||||
|
wav, sr = torchaudio.load(audio_file)
|
||||||
|
if sr != target_sr:
|
||||||
|
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
|
||||||
|
wav = resampler(wav)
|
||||||
|
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
|
||||||
|
torchaudio.save(out_path, wav, target_sr)
|
||||||
|
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
|
||||||
|
return (out_path,)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioPreviewNode:
|
||||||
|
"""Zeigt Audiodatei in der ComfyUI-UI an"""
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {"required": {"audio_file": ("AUDIO_PATH",)}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ()
|
||||||
|
FUNCTION = "preview"
|
||||||
|
CATEGORY = "AudioCraft"
|
||||||
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
|
def preview(self, audio_file):
|
||||||
|
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"MusicGen": MusicGenNode,
|
"MusicGen": MusicGenNode,
|
||||||
"AudioGen": AudioGenNode,
|
"AudioGen": AudioGenNode,
|
||||||
|
"AudioUpsample": AudioUpsampleNode,
|
||||||
|
"AudioPreview": AudioPreviewNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
"MusicGen": "MusicGen (Musik)",
|
"MusicGen": "MusicGen (Musik)",
|
||||||
"AudioGen": "AudioGen (Sound Effects)",
|
"AudioGen": "AudioGen (Sound Effects)",
|
||||||
|
"AudioUpsample": "Audio Upsample (Qualität)",
|
||||||
|
"AudioPreview": "Audio Preview",
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,98 +1,135 @@
|
||||||
{
|
{
|
||||||
"last_node_id": 4,
|
"last_node_id": 6,
|
||||||
"last_link_id": 0,
|
"last_link_id": 4,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"type": "MusicGen",
|
"type": "MusicGen",
|
||||||
"pos": [100, 100],
|
"pos": [50, 100],
|
||||||
"size": {"0": 400, "1": 200},
|
"size": {"0": 420, "1": 370},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 0,
|
"order": 0,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [],
|
"inputs": [],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "MusicGen"},
|
"properties": {"Node name for S&R": "MusicGen"},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"dark dungeon ambience, slow, mysterious, atmospheric, medieval",
|
"epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
|
||||||
10.0,
|
30.0,
|
||||||
"facebook/musicgen-small",
|
"facebook/musicgen-stereo-medium",
|
||||||
0
|
0,
|
||||||
|
0.9,
|
||||||
|
5.0,
|
||||||
|
250,
|
||||||
|
18.0
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "MusicGen",
|
"type": "AudioUpsample",
|
||||||
"pos": [100, 380],
|
"pos": [520, 100],
|
||||||
"size": {"0": 400, "1": 200},
|
"size": {"0": 300, "1": 120},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 1,
|
"order": 1,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [],
|
"inputs": [
|
||||||
"outputs": [
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "MusicGen"},
|
"outputs": [
|
||||||
"widgets_values": [
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
|
||||||
"epic battle music, drums, dark fantasy, intense, orchestral",
|
],
|
||||||
15.0,
|
"properties": {"Node name for S&R": "AudioUpsample"},
|
||||||
"facebook/musicgen-small",
|
"widgets_values": [48000]
|
||||||
42
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
"type": "AudioGen",
|
"type": "AudioPreview",
|
||||||
"pos": [100, 660],
|
"pos": [870, 100],
|
||||||
"size": {"0": 400, "1": 160},
|
"size": {"0": 250, "1": 80},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 2,
|
"order": 2,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [],
|
"inputs": [
|
||||||
"outputs": [
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "AudioGen"},
|
"outputs": [],
|
||||||
"widgets_values": [
|
"properties": {"Node name for S&R": "AudioPreview"},
|
||||||
"sword slash metal clang sound effect",
|
"widgets_values": []
|
||||||
3.0,
|
|
||||||
0
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 4,
|
"id": 4,
|
||||||
"type": "AudioGen",
|
"type": "AudioGen",
|
||||||
"pos": [100, 880],
|
"pos": [50, 560],
|
||||||
"size": {"0": 400, "1": 160},
|
"size": {"0": 420, "1": 330},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 3,
|
"order": 3,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [],
|
"inputs": [],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "AudioGen"},
|
"properties": {"Node name for S&R": "AudioGen"},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"footsteps on stone dungeon floor",
|
"fast sword swing whoosh attack sound effect combat",
|
||||||
3.0,
|
2.0,
|
||||||
1
|
0,
|
||||||
|
1.0,
|
||||||
|
4.0,
|
||||||
|
250
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "AudioUpsample",
|
||||||
|
"pos": [520, 560],
|
||||||
|
"size": {"0": 300, "1": 120},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
|
||||||
|
],
|
||||||
|
"properties": {"Node name for S&R": "AudioUpsample"},
|
||||||
|
"widgets_values": [48000]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "AudioPreview",
|
||||||
|
"pos": [870, 560],
|
||||||
|
"size": {"0": 250, "1": 80},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {"Node name for S&R": "AudioPreview"},
|
||||||
|
"widgets_values": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"links": [],
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO_PATH"],
|
||||||
|
[2, 2, 0, 3, 0, "AUDIO_PATH"],
|
||||||
|
[3, 4, 0, 5, 0, "AUDIO_PATH"],
|
||||||
|
[4, 5, 0, 6, 0, "AUDIO_PATH"]
|
||||||
|
],
|
||||||
"groups": [
|
"groups": [
|
||||||
{
|
{
|
||||||
"title": "Musik",
|
"title": "Musik (MusicGen → Upsample → Preview)",
|
||||||
"bounding": [80, 75, 440, 480],
|
"bounding": [30, 65, 1120, 420],
|
||||||
"color": "#8B0000",
|
"color": "#3d2a5a",
|
||||||
"font_size": 14
|
"font_size": 14
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"title": "Sound Effects",
|
"title": "Sound Effects (AudioGen → Upsample → Preview)",
|
||||||
"bounding": [80, 635, 440, 430],
|
"bounding": [30, 525, 1120, 390],
|
||||||
"color": "#1a3a4a",
|
"color": "#1a3a2a",
|
||||||
"font_size": 14
|
"font_size": 14
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue