Fix: Stereo-Crossfade, AudioGen use_sampling, Workflow Positionen

- MusicGenLong: Crossfade expand fuer Stereo-Audio (2 Kanäle)
- MusicGenLong: Segmentzaehler mit math.ceil statt int+1
- AudioGen: use_sampling=True hinzugefuegt
- workflow_music.json: Node-Positionen der unteren Pipeline korrigiert

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Andre 2026-03-21 15:40:18 +01:00
parent f46de40be4
commit d10703b50d
2 changed files with 13 additions and 9 deletions

View file

@ -85,7 +85,9 @@ class MusicGenLongNode:
from audiocraft.data.audio import audio_write
torch.manual_seed(seed)
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
import math
total_segments = math.ceil(total_duration / segment_duration)
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s")
mg = MusicGen.get_pretrained(model)
sample_rate = mg.sample_rate
@ -94,7 +96,7 @@ class MusicGenLongNode:
# Erstes Segment
gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0)
mg.set_generation_params(duration=segment_duration, **gen_params)
print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
print(f"[MusicGenLong] Segment 1 / {total_segments}")
first = mg.generate([prompt])
segments = [first[0].cpu()]
generated = segment_duration
@ -114,12 +116,13 @@ class MusicGenLongNode:
generated += next_dur
seg_num += 1
# Crossfade und zusammenfügen
# Crossfade und zusammenfügen (funktioniert für Mono und Stereo)
crossfade_samples = context_samples
result = segments[0]
num_channels = result.shape[0]
for seg in segments[1:]:
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
@ -162,6 +165,7 @@ class AudioGenNode:
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
ag.set_generation_params(
duration=duration,
use_sampling=True,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,

View file

@ -86,10 +86,10 @@
{
"id": 2,
"type": "AudioUpsample",
"pos": [580, 100],
"pos": [580, 620],
"size": {"0": 300, "1": 120},
"flags": {},
"order": 1,
"order": 4,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
@ -103,10 +103,10 @@
{
"id": 3,
"type": "AudioPreview",
"pos": [930, 100],
"pos": [930, 620],
"size": {"0": 250, "1": 80},
"flags": {},
"order": 2,
"order": 5,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}