Fix: Stereo-Crossfade, AudioGen use_sampling, Workflow Positionen
- MusicGenLong: Crossfade expand fuer Stereo-Audio (2 Kanäle) - MusicGenLong: Segmentzaehler mit math.ceil statt int+1 - AudioGen: use_sampling=True hinzugefuegt - workflow_music.json: Node-Positionen der unteren Pipeline korrigiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f46de40be4
commit
d10703b50d
2 changed files with 13 additions and 9 deletions
|
|
@ -85,7 +85,9 @@ class MusicGenLongNode:
|
||||||
from audiocraft.data.audio import audio_write
|
from audiocraft.data.audio import audio_write
|
||||||
|
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
|
import math
|
||||||
|
total_segments = math.ceil(total_duration / segment_duration)
|
||||||
|
print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s")
|
||||||
|
|
||||||
mg = MusicGen.get_pretrained(model)
|
mg = MusicGen.get_pretrained(model)
|
||||||
sample_rate = mg.sample_rate
|
sample_rate = mg.sample_rate
|
||||||
|
|
@ -94,7 +96,7 @@ class MusicGenLongNode:
|
||||||
# Erstes Segment
|
# Erstes Segment
|
||||||
gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0)
|
gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0)
|
||||||
mg.set_generation_params(duration=segment_duration, **gen_params)
|
mg.set_generation_params(duration=segment_duration, **gen_params)
|
||||||
print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
|
print(f"[MusicGenLong] Segment 1 / {total_segments}")
|
||||||
first = mg.generate([prompt])
|
first = mg.generate([prompt])
|
||||||
segments = [first[0].cpu()]
|
segments = [first[0].cpu()]
|
||||||
generated = segment_duration
|
generated = segment_duration
|
||||||
|
|
@ -114,12 +116,13 @@ class MusicGenLongNode:
|
||||||
generated += next_dur
|
generated += next_dur
|
||||||
seg_num += 1
|
seg_num += 1
|
||||||
|
|
||||||
# Crossfade und zusammenfügen
|
# Crossfade und zusammenfügen (funktioniert für Mono und Stereo)
|
||||||
crossfade_samples = context_samples
|
crossfade_samples = context_samples
|
||||||
result = segments[0]
|
result = segments[0]
|
||||||
|
num_channels = result.shape[0]
|
||||||
for seg in segments[1:]:
|
for seg in segments[1:]:
|
||||||
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
|
fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
|
||||||
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
|
fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1)
|
||||||
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
|
overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
|
||||||
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
|
result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
|
||||||
|
|
||||||
|
|
@ -162,6 +165,7 @@ class AudioGenNode:
|
||||||
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
||||||
ag.set_generation_params(
|
ag.set_generation_params(
|
||||||
duration=duration,
|
duration=duration,
|
||||||
|
use_sampling=True,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
cfg_coef=cfg_coef,
|
cfg_coef=cfg_coef,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
|
|
||||||
|
|
@ -86,10 +86,10 @@
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "AudioUpsample",
|
"type": "AudioUpsample",
|
||||||
"pos": [580, 100],
|
"pos": [580, 620],
|
||||||
"size": {"0": 300, "1": 120},
|
"size": {"0": 300, "1": 120},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 1,
|
"order": 4,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
|
||||||
|
|
@ -103,10 +103,10 @@
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
"type": "AudioPreview",
|
"type": "AudioPreview",
|
||||||
"pos": [930, 100],
|
"pos": [930, 620],
|
||||||
"size": {"0": 250, "1": 80},
|
"size": {"0": 250, "1": 80},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 2,
|
"order": 5,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
|
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue