gstscaletempo.c 29.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * GStreamer
 * Copyright (C) 2008 Rov Juvano <rovjuvano@users.sourceforge.net>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/**
 * SECTION:element-scaletempo
 *
 * Scale tempo while maintaining pitch
 * (WSOLA-like technique with cross correlation)
 * Inspired by SoundTouch library by Olli Parviainen
27
 *
28
 * Use Sceletempo to apply playback rates without the chipmunk effect.
29 30
 *
 * <refsect2>
31 32
 * <title>Example pipelines</title>
 * <para>
33
 * |[
34 35
 * filesrc location=media.ext ! decodebin name=d \
 *     d. ! queue ! audioconvert ! audioresample ! scaletempo ! audioconvert ! audioresample ! autoaudiosink \
36
 *     d. ! queue ! videoconvert ! autovideosink
37
 * ]|
38
 * OR
39
 * |[
40
 * playbin uri=... audio_sink="scaletempo ! audioconvert ! audioresample ! autoaudiosink"
41
 * ]|
42 43
 * When an application sends a seek event with rate != 1.0, Scaletempo applies
 * the rate change by scaling the tempo without scaling the pitch.
44 45 46 47 48
 *
 * Scaletempo works by producing audio in constant sized chunks
 * (#GstScaletempo:stride) but consuming chunks proportional to the playback
 * rate.
 *
49
 * Scaletempo then smooths the output by blending the end of one stride with
50 51
 * the next (#GstScaletempo:overlap).
 *
52
 * Scaletempo smooths the overlap further by searching within the input buffer
53 54 55 56
 * for the best overlap position.  Scaletempo uses a statistical cross
 * correlation (roughly a dot-product).  Scaletempo consumes most of its CPU
 * cycles here. One can use the #GstScaletempo:search propery to tune how far
 * the algoritm looks.
57 58 59 60 61 62 63 64 65 66 67 68 69 70
 * </para>
 * </refsect2>
 */

/*
 * Note: frame = audio key unit (i.e. one sample for each channel)
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <gst/gst.h>
#include <gst/base/gstbasetransform.h>
71
#include <gst/audio/audio.h>
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#include <string.h>             /* for memset */

#include "gstscaletempo.h"

GST_DEBUG_CATEGORY_STATIC (gst_scaletempo_debug);
#define GST_CAT_DEFAULT gst_scaletempo_debug

/* Filter signals and args */
enum
{
  LAST_SIGNAL
};

enum
{
  PROP_0,
  PROP_RATE,
  PROP_STRIDE,
  PROP_OVERLAP,
  PROP_SEARCH,
};

#define SUPPORTED_CAPS \
GST_STATIC_CAPS ( \
96
    GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (F32)) "; " \
97
    GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (F64)) "; " \
98
    GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (S16)) \
99 100 101 102 103 104 105 106 107 108 109 110 111 112
)

static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    SUPPORTED_CAPS);

static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    SUPPORTED_CAPS);

#define DEBUG_INIT(bla) GST_DEBUG_CATEGORY_INIT (gst_scaletempo_debug, "scaletempo", 0, "scaletempo element");

113 114 115
#define gst_scaletempo_parent_class parent_class
G_DEFINE_TYPE_WITH_CODE (GstScaletempo, gst_scaletempo,
    GST_TYPE_BASE_TRANSFORM, DEBUG_INIT (0));
116

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
#define CREATE_BEST_OVERLAP_OFFSET_FLOAT_FUNC(type) \
static guint \
best_overlap_offset_##type (GstScaletempo * st) \
{ \
  g##type *pw, *po, *ppc, *search_start; \
  g##type best_corr = G_MININT; \
  guint best_off = 0; \
  gint i, off; \
  \
  pw = st->table_window; \
  po = st->buf_overlap; \
  po += st->samples_per_frame; \
  ppc = st->buf_pre_corr; \
  for (i = st->samples_per_frame; i < st->samples_overlap; i++) { \
    *ppc++ = *pw++ * *po++; \
  } \
  \
  search_start = (g##type *) st->buf_queue + st->samples_per_frame; \
  for (off = 0; off < st->frames_search; off++) { \
    g##type corr = 0; \
    g##type *ps = search_start; \
    ppc = st->buf_pre_corr; \
    for (i = st->samples_per_frame; i < st->samples_overlap; i++) { \
      corr += *ppc++ * *ps++; \
    } \
    if (corr > best_corr) { \
      best_corr = corr; \
      best_off = off; \
    } \
    search_start += st->samples_per_frame; \
  } \
  \
  return best_off * st->bytes_per_frame; \
150 151
}

152 153 154
CREATE_BEST_OVERLAP_OFFSET_FLOAT_FUNC (float);
CREATE_BEST_OVERLAP_OFFSET_FLOAT_FUNC (double);

155 156 157
/* buffer padding for loop optimization: sizeof(gint32) * (loop_size - 1) */
#define UNROLL_PADDING (4*3)
static guint
158
best_overlap_offset_s16 (GstScaletempo * st)
159 160 161 162 163 164 165 166
{
  gint32 *pw, *ppc;
  gint16 *po, *search_start;
  gint64 best_corr = G_MININT64;
  guint best_off = 0;
  guint off;
  glong i;

167 168 169 170 171
  pw = st->table_window;
  po = st->buf_overlap;
  po += st->samples_per_frame;
  ppc = st->buf_pre_corr;
  for (i = st->samples_per_frame; i < st->samples_overlap; i++) {
172 173 174
    *ppc++ = (*pw++ * *po++) >> 15;
  }

175 176
  search_start = (gint16 *) st->buf_queue + st->samples_per_frame;
  for (off = 0; off < st->frames_search; off++) {
177 178
    gint64 corr = 0;
    gint16 *ps = search_start;
179 180 181 182
    ppc = st->buf_pre_corr;
    ppc += st->samples_overlap - st->samples_per_frame;
    ps += st->samples_overlap - st->samples_per_frame;
    i = -((glong) st->samples_overlap - (glong) st->samples_per_frame);
183 184 185 186 187 188 189 190 191 192 193
    do {
      corr += ppc[i + 0] * ps[i + 0];
      corr += ppc[i + 1] * ps[i + 1];
      corr += ppc[i + 2] * ps[i + 2];
      corr += ppc[i + 3] * ps[i + 3];
      i += 4;
    } while (i < 0);
    if (corr > best_corr) {
      best_corr = corr;
      best_off = off;
    }
194
    search_start += st->samples_per_frame;
195 196
  }

197
  return best_off * st->bytes_per_frame;
198 199
}

200 201 202 203 204 205 206 207 208 209 210 211 212
#define CREATE_OUTPUT_OVERLAP_FLOAT_FUNC(type) \
static void \
output_overlap_##type (GstScaletempo * st, gpointer buf_out, guint bytes_off) \
{ \
  g##type *pout = buf_out; \
  g##type *pb = st->table_blend; \
  g##type *po = st->buf_overlap; \
  g##type *pin = (g##type *) (st->buf_queue + bytes_off); \
  gint i; \
  for (i = 0; i < st->samples_overlap; i++) { \
    *pout++ = *po - *pb++ * (*po - *pin++); \
    po++; \
  } \
213 214
}

215 216 217
CREATE_OUTPUT_OVERLAP_FLOAT_FUNC (float);
CREATE_OUTPUT_OVERLAP_FLOAT_FUNC (double);

218
static void
219
output_overlap_s16 (GstScaletempo * st, gpointer buf_out, guint bytes_off)
220 221
{
  gint16 *pout = buf_out;
222 223 224
  gint32 *pb = st->table_blend;
  gint16 *po = st->buf_overlap;
  gint16 *pin = (gint16 *) (st->buf_queue + bytes_off);
225
  gint i;
226
  for (i = 0; i < st->samples_overlap; i++) {
227 228 229 230 231 232
    *pout++ = *po - ((*pb++ * (*po - *pin++)) >> 16);
    po++;
  }
}

static guint
233
fill_queue (GstScaletempo * st, GstBuffer * buf_in, guint offset)
234
{
235
  guint bytes_in = gst_buffer_get_size (buf_in) - offset;
236
  guint offset_unchanged = offset;
237
  GstMapInfo map;
238

239
  gst_buffer_map (buf_in, &map, GST_MAP_READ);
240 241 242 243 244 245 246
  if (st->bytes_to_slide > 0) {
    if (st->bytes_to_slide < st->bytes_queued) {
      guint bytes_in_move = st->bytes_queued - st->bytes_to_slide;
      memmove (st->buf_queue, st->buf_queue + st->bytes_to_slide,
          bytes_in_move);
      st->bytes_to_slide = 0;
      st->bytes_queued = bytes_in_move;
247 248
    } else {
      guint bytes_in_skip;
249 250 251 252
      st->bytes_to_slide -= st->bytes_queued;
      bytes_in_skip = MIN (st->bytes_to_slide, bytes_in);
      st->bytes_queued = 0;
      st->bytes_to_slide -= bytes_in_skip;
253 254 255 256 257 258
      offset += bytes_in_skip;
      bytes_in -= bytes_in_skip;
    }
  }

  if (bytes_in > 0) {
259 260 261 262
    guint bytes_in_copy =
        MIN (st->bytes_queue_max - st->bytes_queued, bytes_in);
    memcpy (st->buf_queue + st->bytes_queued, map.data + offset, bytes_in_copy);
    st->bytes_queued += bytes_in_copy;
263 264
    offset += bytes_in_copy;
  }
265
  gst_buffer_unmap (buf_in, &map);
266 267 268 269 270

  return offset - offset_unchanged;
}

static void
271
reinit_buffers (GstScaletempo * st)
272 273 274 275
{
  gint i, j;
  guint frames_overlap;
  guint new_size;
276
  GstClockTime latency;
277

278 279
  guint frames_stride = st->ms_stride * st->sample_rate / 1000.0;
  st->bytes_stride = frames_stride * st->bytes_per_frame;
280 281

  /* overlap */
282
  frames_overlap = frames_stride * st->percent_overlap;
283
  if (frames_overlap < 1) {     /* if no overlap */
284 285 286 287
    st->bytes_overlap = 0;
    st->bytes_standing = st->bytes_stride;
    st->samples_standing = st->bytes_standing / st->bytes_per_sample;
    st->output_overlap = NULL;
288
  } else {
289 290 291 292 293 294
    guint prev_overlap = st->bytes_overlap;
    st->bytes_overlap = frames_overlap * st->bytes_per_frame;
    st->samples_overlap = frames_overlap * st->samples_per_frame;
    st->bytes_standing = st->bytes_stride - st->bytes_overlap;
    st->samples_standing = st->bytes_standing / st->bytes_per_sample;
    st->buf_overlap = g_realloc (st->buf_overlap, st->bytes_overlap);
295
    /* S16 uses gint32 blend table, floats/doubles use their respective type */
296
    st->table_blend =
297 298 299
        g_realloc (st->table_blend,
        st->samples_overlap * (st->format ==
            GST_AUDIO_FORMAT_S16 ? 4 : st->bytes_per_sample));
300 301 302
    if (st->bytes_overlap > prev_overlap) {
      memset ((guint8 *) st->buf_overlap + prev_overlap, 0,
          st->bytes_overlap - prev_overlap);
303
    }
304
    if (st->format == GST_AUDIO_FORMAT_S16) {
305
      gint32 *pb = st->table_blend;
306 307 308
      gint64 blend = 0;
      for (i = 0; i < frames_overlap; i++) {
        gint32 v = blend / frames_overlap;
309
        for (j = 0; j < st->samples_per_frame; j++) {
310 311 312 313
          *pb++ = v;
        }
        blend += 65535;         /* 2^16 */
      }
314
      st->output_overlap = output_overlap_s16;
315
    } else if (st->format == GST_AUDIO_FORMAT_F32) {
316
      gfloat *pb = st->table_blend;
317 318 319
      gfloat t = (gfloat) frames_overlap;
      for (i = 0; i < frames_overlap; i++) {
        gfloat v = i / t;
320
        for (j = 0; j < st->samples_per_frame; j++) {
321 322 323
          *pb++ = v;
        }
      }
324
      st->output_overlap = output_overlap_float;
325 326 327 328 329 330 331 332 333 334
    } else {
      gdouble *pb = st->table_blend;
      gdouble t = (gdouble) frames_overlap;
      for (i = 0; i < frames_overlap; i++) {
        gdouble v = i / t;
        for (j = 0; j < st->samples_per_frame; j++) {
          *pb++ = v;
        }
      }
      st->output_overlap = output_overlap_double;
335 336 337 338
    }
  }

  /* best overlap */
339 340 341 342
  st->frames_search =
      (frames_overlap <= 1) ? 0 : st->ms_search * st->sample_rate / 1000.0;
  if (st->frames_search < 1) {  /* if no search */
    st->best_overlap_offset = NULL;
343
  } else {
344
    /* S16 uses gint32 buffer, floats/doubles use their respective type */
345
    guint bytes_pre_corr =
346 347
        (st->samples_overlap - st->samples_per_frame) * (st->format ==
        GST_AUDIO_FORMAT_S16 ? 4 : st->bytes_per_sample);
348 349 350
    st->buf_pre_corr =
        g_realloc (st->buf_pre_corr, bytes_pre_corr + UNROLL_PADDING);
    st->table_window = g_realloc (st->table_window, bytes_pre_corr);
351
    if (st->format == GST_AUDIO_FORMAT_S16) {
352 353 354 355
      gint64 t = frames_overlap;
      gint32 n = 8589934588LL / (t * t);        /* 4 * (2^31 - 1) / t^2 */
      gint32 *pw;

356 357
      memset ((guint8 *) st->buf_pre_corr + bytes_pre_corr, 0, UNROLL_PADDING);
      pw = st->table_window;
358 359
      for (i = 1; i < frames_overlap; i++) {
        gint32 v = (i * (t - i) * n) >> 15;
360
        for (j = 0; j < st->samples_per_frame; j++) {
361 362 363
          *pw++ = v;
        }
      }
364
      st->best_overlap_offset = best_overlap_offset_s16;
365
    } else if (st->format == GST_AUDIO_FORMAT_F32) {
366
      gfloat *pw = st->table_window;
367 368
      for (i = 1; i < frames_overlap; i++) {
        gfloat v = i * (frames_overlap - i);
369
        for (j = 0; j < st->samples_per_frame; j++) {
370 371 372
          *pw++ = v;
        }
      }
373
      st->best_overlap_offset = best_overlap_offset_float;
374 375 376 377 378 379 380 381 382
    } else {
      gdouble *pw = st->table_window;
      for (i = 1; i < frames_overlap; i++) {
        gdouble v = i * (frames_overlap - i);
        for (j = 0; j < st->samples_per_frame; j++) {
          *pw++ = v;
        }
      }
      st->best_overlap_offset = best_overlap_offset_double;
383 384 385 386
    }
  }

  new_size =
387 388 389 390 391 392
      (st->frames_search + frames_stride +
      frames_overlap) * st->bytes_per_frame;
  if (st->bytes_queued > new_size) {
    if (st->bytes_to_slide > st->bytes_queued) {
      st->bytes_to_slide -= st->bytes_queued;
      st->bytes_queued = 0;
393
    } else {
394 395 396 397 398
      guint new_queued = MIN (st->bytes_queued - st->bytes_to_slide, new_size);
      memmove (st->buf_queue,
          st->buf_queue + st->bytes_queued - new_queued, new_queued);
      st->bytes_to_slide = 0;
      st->bytes_queued = new_queued;
399 400
    }
  }
401

402 403
  st->bytes_queue_max = new_size;
  st->buf_queue = g_realloc (st->buf_queue, st->bytes_queue_max);
404

405
  latency =
406 407 408 409 410 411
      gst_util_uint64_scale (st->bytes_queue_max, GST_SECOND,
      st->bytes_per_frame * st->sample_rate);
  if (st->latency != latency) {
    st->latency = latency;
    gst_element_post_message (GST_ELEMENT (st),
        gst_message_new_latency (GST_OBJECT (st)));
412 413
  }

414 415
  st->bytes_stride_scaled = st->bytes_stride * st->scale;
  st->frames_stride_scaled = st->bytes_stride_scaled / st->bytes_per_frame;
416 417 418

  GST_DEBUG
      ("%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
419 420 421 422 423
      st->scale, st->frames_stride_scaled,
      (gint) (st->bytes_stride / st->bytes_per_frame),
      (gint) (st->bytes_standing / st->bytes_per_frame),
      (gint) (st->bytes_overlap / st->bytes_per_frame), st->frames_search,
      (gint) (st->bytes_queue_max / st->bytes_per_frame),
424
      gst_audio_format_to_string (st->format));
425 426

  st->reinit_buffers = FALSE;
427 428
}

429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
static GstBuffer *
reverse_buffer (GstScaletempo * st, GstBuffer * inbuf)
{
  GstBuffer *outbuf;
  GstMapInfo imap, omap;

  gst_buffer_map (inbuf, &imap, GST_MAP_READ);
  outbuf = gst_buffer_new_and_alloc (imap.size);
  gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);

  if (st->format == GST_AUDIO_FORMAT_F64) {
    const gint64 *ip = (const gint64 *) imap.data;
    gint64 *op = (gint64 *) (omap.data + omap.size - 8 * st->samples_per_frame);
    guint i, n = imap.size / (8 * st->samples_per_frame);
    guint j, c = st->samples_per_frame;

    for (i = 0; i < n; i++) {
      for (j = 0; j < c; j++)
        op[j] = ip[j];
      op -= c;
      ip += c;
    }
  } else {
    const gint32 *ip = (const gint32 *) imap.data;
    gint32 *op = (gint32 *) (omap.data + omap.size - 4 * st->samples_per_frame);
    guint i, n = imap.size / (4 * st->samples_per_frame);
    guint j, c = st->samples_per_frame;

    for (i = 0; i < n; i++) {
      for (j = 0; j < c; j++)
        op[j] = ip[j];
      op -= c;
      ip += c;
    }
  }

  gst_buffer_unmap (inbuf, &imap);
  gst_buffer_unmap (outbuf, &omap);

  return outbuf;
}
470 471 472 473 474 475

/* GstBaseTransform vmethod implementations */
static GstFlowReturn
gst_scaletempo_transform (GstBaseTransform * trans,
    GstBuffer * inbuf, GstBuffer * outbuf)
{
476
  GstScaletempo *st = GST_SCALETEMPO (trans);
477 478 479
  gint8 *pout;
  guint offset_in, bytes_out;
  GstMapInfo omap;
480
  GstClockTime timestamp;
481 482 483 484
  GstBuffer *tmpbuf = NULL;

  if (st->reverse)
    tmpbuf = reverse_buffer (st, inbuf);
485 486 487

  gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
  pout = (gint8 *) omap.data;
488 489 490
  bytes_out = omap.size;

  offset_in = fill_queue (st, tmpbuf ? tmpbuf : inbuf, 0);
491
  bytes_out = 0;
492
  while (st->bytes_queued >= st->bytes_queue_max) {
493 494 495 496
    guint bytes_off = 0;
    gdouble frames_to_slide;
    guint frames_to_stride_whole;

497
    /* output stride */
498 499 500
    if (st->output_overlap) {
      if (st->best_overlap_offset) {
        bytes_off = st->best_overlap_offset (st);
501
      }
502
      st->output_overlap (st, pout, bytes_off);
503
    }
504 505 506 507
    memcpy (pout + st->bytes_overlap,
        st->buf_queue + bytes_off + st->bytes_overlap, st->bytes_standing);
    pout += st->bytes_stride;
    bytes_out += st->bytes_stride;
508

509
    /* input stride */
510 511 512
    memcpy (st->buf_overlap,
        st->buf_queue + bytes_off + st->bytes_stride, st->bytes_overlap);
    frames_to_slide = st->frames_stride_scaled + st->frames_stride_error;
513
    frames_to_stride_whole = (gint) frames_to_slide;
514 515
    st->bytes_to_slide = frames_to_stride_whole * st->bytes_per_frame;
    st->frames_stride_error = frames_to_slide - frames_to_stride_whole;
516

517
    offset_in += fill_queue (st, tmpbuf ? tmpbuf : inbuf, offset_in);
518
  }
519 520
  gst_buffer_unmap (outbuf, &omap);

521 522 523 524 525 526 527 528 529 530 531 532 533 534
  if (st->reverse) {
    timestamp = st->in_segment.stop - GST_BUFFER_TIMESTAMP (inbuf);
    if (timestamp < st->latency)
      timestamp = 0;
    else
      timestamp -= st->latency;
  } else {
    timestamp = GST_BUFFER_TIMESTAMP (inbuf) - st->in_segment.start;
    if (timestamp < st->latency)
      timestamp = 0;
    else
      timestamp -= st->latency;
  }
  GST_BUFFER_TIMESTAMP (outbuf) = timestamp / st->scale + st->in_segment.start;
535 536
  GST_BUFFER_DURATION (outbuf) =
      gst_util_uint64_scale (bytes_out, GST_SECOND,
537
      st->bytes_per_frame * st->sample_rate);
538
  gst_buffer_set_size (outbuf, bytes_out);
539

540 541 542
  if (tmpbuf)
    gst_buffer_unref (tmpbuf);

543 544 545
  return GST_FLOW_OK;
}

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
static GstFlowReturn
gst_scaletempo_submit_input_buffer (GstBaseTransform * trans,
    gboolean is_discont, GstBuffer * input)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

  if (scaletempo->in_segment.format == GST_FORMAT_TIME) {
    input =
        gst_audio_buffer_clip (input, &scaletempo->in_segment,
        scaletempo->sample_rate, scaletempo->bytes_per_frame);
    if (!input)
      return GST_FLOW_OK;
  }

  return GST_BASE_TRANSFORM_CLASS (parent_class)->submit_input_buffer (trans,
      is_discont, input);
}

564 565 566
static gboolean
gst_scaletempo_transform_size (GstBaseTransform * trans,
    GstPadDirection direction,
567
    GstCaps * caps, gsize size, GstCaps * othercaps, gsize * othersize)
568 569 570 571 572
{
  if (direction == GST_PAD_SINK) {
    GstScaletempo *scaletempo = GST_SCALETEMPO (trans);
    gint bytes_to_out;

573
    if (scaletempo->reinit_buffers)
574 575
      reinit_buffers (scaletempo);

576 577
    bytes_to_out = size + scaletempo->bytes_queued - scaletempo->bytes_to_slide;
    if (bytes_to_out < (gint) scaletempo->bytes_queue_max) {
578 579 580
      *othersize = 0;
    } else {
      /* while (total_buffered - stride_length * n >= queue_max) n++ */
581 582 583 584
      *othersize = scaletempo->bytes_stride * ((guint) (
              (bytes_to_out - scaletempo->bytes_queue_max +
                  /* rounding protection */ scaletempo->bytes_per_frame)
              / scaletempo->bytes_stride_scaled) + 1);
585 586 587 588 589 590 591 592 593 594
    }

    return TRUE;
  }
  return FALSE;
}

static gboolean
gst_scaletempo_sink_event (GstBaseTransform * trans, GstEvent * event)
{
595 596
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

597 598
  if (GST_EVENT_TYPE (event) == GST_EVENT_SEGMENT) {
    GstSegment segment;
599

600
    gst_event_copy_segment (event, &segment);
601

602 603 604 605
    if (segment.format != GST_FORMAT_TIME
        || scaletempo->scale != ABS (segment.rate)
        || ! !scaletempo->reverse != ! !(segment.rate < 0.0)) {
      if (segment.format != GST_FORMAT_TIME || ABS (segment.rate - 1.0) < 1e-10) {
606
        scaletempo->scale = 1.0;
607 608 609 610 611
        gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo),
            TRUE);
      } else {
        gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo),
            FALSE);
612 613
        scaletempo->scale = ABS (segment.rate);
        scaletempo->reverse = segment.rate < 0.0;
614 615 616 617 618 619 620 621 622
        scaletempo->bytes_stride_scaled =
            scaletempo->bytes_stride * scaletempo->scale;
        scaletempo->frames_stride_scaled =
            scaletempo->bytes_stride_scaled / scaletempo->bytes_per_frame;
        GST_DEBUG ("%.3f scale, %.3f stride_in, %i stride_out",
            scaletempo->scale, scaletempo->frames_stride_scaled,
            (gint) (scaletempo->bytes_stride / scaletempo->bytes_per_frame));

        scaletempo->bytes_to_slide = 0;
623 624 625
      }
    }

626 627 628 629 630 631 632
    scaletempo->in_segment = segment;
    scaletempo->out_segment = segment;

    if (scaletempo->scale != 1.0 || scaletempo->reverse) {
      guint32 seqnum;

      segment.applied_rate = segment.rate;
633
      segment.rate = 1.0;
634

635
      if (segment.stop != -1) {
636 637
        segment.stop =
            (segment.stop - segment.start) / ABS (segment.applied_rate) +
638
            segment.start;
639 640
      }

641 642 643 644 645
      scaletempo->out_segment = segment;

      seqnum = gst_event_get_seqnum (event);
      gst_event_unref (event);

646
      event = gst_event_new_segment (&segment);
647 648 649
      gst_event_set_seqnum (event, seqnum);

      return gst_pad_push_event (GST_BASE_TRANSFORM_SRC_PAD (trans), event);
650
    }
651 652 653
  } else if (GST_EVENT_TYPE (event) == GST_EVENT_FLUSH_STOP) {
    gst_segment_init (&scaletempo->in_segment, GST_FORMAT_UNDEFINED);
    gst_segment_init (&scaletempo->out_segment, GST_FORMAT_UNDEFINED);
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
  } else if (GST_EVENT_TYPE (event) == GST_EVENT_GAP) {
    if (scaletempo->scale != 1.0) {
      GstClockTime gap_ts, gap_duration;
      gst_event_parse_gap (event, &gap_ts, &gap_duration);
      if (scaletempo->reverse) {
        gap_ts = scaletempo->in_segment.stop - gap_ts;
      } else {
        gap_ts = gap_ts - scaletempo->in_segment.start;
      }
      gap_ts = gap_ts / scaletempo->scale + scaletempo->in_segment.start;
      if (GST_CLOCK_TIME_IS_VALID (gap_duration)) {
        gap_duration = gap_duration / ABS (scaletempo->scale);
      }
      gst_event_unref (event);
      event = gst_event_new_gap (gap_ts, gap_duration);
    }
670
  }
671

672
  return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (trans, event);
673 674 675 676 677 678 679 680 681
}

static gboolean
gst_scaletempo_set_caps (GstBaseTransform * trans,
    GstCaps * incaps, GstCaps * outcaps)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

  gint width, bps, nch, rate;
682
  GstAudioInfo info;
683
  GstAudioFormat format;
684 685

  if (!gst_audio_info_from_caps (&info, incaps))
686 687
    return FALSE;

688 689 690
  nch = GST_AUDIO_INFO_CHANNELS (&info);
  rate = GST_AUDIO_INFO_RATE (&info);
  width = GST_AUDIO_INFO_WIDTH (&info);
691
  format = GST_AUDIO_INFO_FORMAT (&info);
692

693 694 695
  bps = width / 8;

  GST_DEBUG ("caps: %" GST_PTR_FORMAT ", %d bps", incaps, bps);
696

697 698
  if (rate != scaletempo->sample_rate
      || nch != scaletempo->samples_per_frame
699
      || bps != scaletempo->bytes_per_sample || format != scaletempo->format) {
700 701 702 703
    scaletempo->sample_rate = rate;
    scaletempo->samples_per_frame = nch;
    scaletempo->bytes_per_sample = bps;
    scaletempo->bytes_per_frame = nch * bps;
704
    scaletempo->format = format;
705
    scaletempo->reinit_buffers = TRUE;
706 707 708 709 710
  }

  return TRUE;
}

711 712 713 714 715 716 717
static gboolean
gst_scaletempo_start (GstBaseTransform * trans)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

  gst_segment_init (&scaletempo->in_segment, GST_FORMAT_UNDEFINED);
  gst_segment_init (&scaletempo->out_segment, GST_FORMAT_UNDEFINED);
718
  scaletempo->reinit_buffers = TRUE;
719 720 721 722

  return TRUE;
}

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
static gboolean
gst_scaletempo_stop (GstBaseTransform * trans)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

  g_free (scaletempo->buf_queue);
  scaletempo->buf_queue = NULL;
  g_free (scaletempo->buf_overlap);
  scaletempo->buf_overlap = NULL;
  g_free (scaletempo->table_blend);
  scaletempo->table_blend = NULL;
  g_free (scaletempo->buf_pre_corr);
  scaletempo->buf_pre_corr = NULL;
  g_free (scaletempo->table_window);
  scaletempo->table_window = NULL;
738
  scaletempo->reinit_buffers = TRUE;
739 740 741 742

  return TRUE;
}

743 744 745 746 747 748 749 750
static gboolean
gst_scaletempo_query (GstBaseTransform * trans, GstPadDirection direction,
    GstQuery * query)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (trans);

  if (direction == GST_PAD_SRC) {
    switch (GST_QUERY_TYPE (query)) {
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
      case GST_QUERY_SEGMENT:
      {
        GstFormat format;
        gint64 start, stop;

        format = scaletempo->out_segment.format;

        start =
            gst_segment_to_stream_time (&scaletempo->out_segment, format,
            scaletempo->out_segment.start);
        if ((stop = scaletempo->out_segment.stop) == -1)
          stop = scaletempo->out_segment.duration;
        else
          stop =
              gst_segment_to_stream_time (&scaletempo->out_segment, format,
              stop);

        gst_query_set_segment (query, scaletempo->out_segment.rate, format,
            start, stop);
        return TRUE;
      }
772 773 774 775
      case GST_QUERY_LATENCY:{
        GstPad *peer;

        if ((peer = gst_pad_get_peer (GST_BASE_TRANSFORM_SINK_PAD (trans)))) {
Sanjay NM's avatar
Sanjay NM committed
776
          if ((gst_pad_query (peer, query))) {
777 778 779 780 781 782 783 784 785 786 787
            GstClockTime min, max;
            gboolean live;

            gst_query_parse_latency (query, &live, &min, &max);

            GST_DEBUG_OBJECT (scaletempo, "Peer latency: min %"
                GST_TIME_FORMAT " max %" GST_TIME_FORMAT,
                GST_TIME_ARGS (min), GST_TIME_ARGS (max));

            /* add our own latency */
            GST_DEBUG_OBJECT (scaletempo, "Our latency: %" GST_TIME_FORMAT,
788 789
                GST_TIME_ARGS (scaletempo->latency));
            min += scaletempo->latency;
790
            if (max != GST_CLOCK_TIME_NONE)
791
              max += scaletempo->latency;
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812

            GST_DEBUG_OBJECT (scaletempo, "Calculated total latency : min %"
                GST_TIME_FORMAT " max %" GST_TIME_FORMAT,
                GST_TIME_ARGS (min), GST_TIME_ARGS (max));
            gst_query_set_latency (query, live, min, max);
          }
          gst_object_unref (peer);
        }

        return TRUE;
      }
      default:{
        return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction,
            query);
      }
    }
  } else {
    return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction,
        query);
  }
}
813 814 815 816 817 818 819 820 821 822

/* GObject vmethod implementations */
static void
gst_scaletempo_get_property (GObject * object,
    guint prop_id, GValue * value, GParamSpec * pspec)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (object);

  switch (prop_id) {
    case PROP_RATE:
823
      g_value_set_double (value, scaletempo->scale);
824 825
      break;
    case PROP_STRIDE:
826
      g_value_set_uint (value, scaletempo->ms_stride);
827 828
      break;
    case PROP_OVERLAP:
829
      g_value_set_double (value, scaletempo->percent_overlap);
830 831
      break;
    case PROP_SEARCH:
832
      g_value_set_uint (value, scaletempo->ms_search);
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static void
gst_scaletempo_set_property (GObject * object,
    guint prop_id, const GValue * value, GParamSpec * pspec)
{
  GstScaletempo *scaletempo = GST_SCALETEMPO (object);

  switch (prop_id) {
    case PROP_STRIDE:{
      guint new_value = g_value_get_uint (value);
849 850 851
      if (scaletempo->ms_stride != new_value) {
        scaletempo->ms_stride = new_value;
        scaletempo->reinit_buffers = TRUE;
852 853 854 855 856
      }
      break;
    }
    case PROP_OVERLAP:{
      gdouble new_value = g_value_get_double (value);
857 858 859
      if (scaletempo->percent_overlap != new_value) {
        scaletempo->percent_overlap = new_value;
        scaletempo->reinit_buffers = TRUE;
860 861 862 863 864
      }
      break;
    }
    case PROP_SEARCH:{
      guint new_value = g_value_get_uint (value);
865 866 867
      if (scaletempo->ms_search != new_value) {
        scaletempo->ms_search = new_value;
        scaletempo->reinit_buffers = TRUE;
868 869 870 871 872 873 874 875 876 877 878 879 880
      }
      break;
    }
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static void
gst_scaletempo_class_init (GstScaletempoClass * klass)
{
  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
881
  GstElementClass *gstelement_class = GST_ELEMENT_CLASS (klass);
882 883 884 885 886 887 888
  GstBaseTransformClass *basetransform_class = GST_BASE_TRANSFORM_CLASS (klass);

  gobject_class->get_property = GST_DEBUG_FUNCPTR (gst_scaletempo_get_property);
  gobject_class->set_property = GST_DEBUG_FUNCPTR (gst_scaletempo_set_property);

  g_object_class_install_property (gobject_class, PROP_RATE,
      g_param_spec_double ("rate", "Playback Rate", "Current playback rate",
889
          G_MININT, G_MAXINT, 1.0, G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
890 891 892 893

  g_object_class_install_property (gobject_class, PROP_STRIDE,
      g_param_spec_uint ("stride", "Stride Length",
          "Length in milliseconds to output each stride", 1, 5000, 30,
894
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
895 896 897

  g_object_class_install_property (gobject_class, PROP_OVERLAP,
      g_param_spec_double ("overlap", "Overlap Length",
898 899
          "Percentage of stride to overlap", 0, 1, .2,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
900 901 902 903

  g_object_class_install_property (gobject_class, PROP_SEARCH,
      g_param_spec_uint ("search", "Search Length",
          "Length in milliseconds to search for best overlap position", 0, 500,
904
          14, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
905

906 907
  gst_element_class_add_static_pad_template (gstelement_class, &src_template);
  gst_element_class_add_static_pad_template (gstelement_class, &sink_template);
908
  gst_element_class_set_static_metadata (gstelement_class, "Scaletempo",
909
      "Filter/Effect/Rate/Audio",
910 911 912 913 914
      "Sync audio tempo with playback rate",
      "Rov Juvano <rovjuvano@users.sourceforge.net>");

  basetransform_class->sink_event =
      GST_DEBUG_FUNCPTR (gst_scaletempo_sink_event);
915 916 917 918
  basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_scaletempo_set_caps);
  basetransform_class->transform_size =
      GST_DEBUG_FUNCPTR (gst_scaletempo_transform_size);
  basetransform_class->transform = GST_DEBUG_FUNCPTR (gst_scaletempo_transform);
919
  basetransform_class->query = GST_DEBUG_FUNCPTR (gst_scaletempo_query);
920
  basetransform_class->start = GST_DEBUG_FUNCPTR (gst_scaletempo_start);
921
  basetransform_class->stop = GST_DEBUG_FUNCPTR (gst_scaletempo_stop);
922 923
  basetransform_class->submit_input_buffer =
      GST_DEBUG_FUNCPTR (gst_scaletempo_submit_input_buffer);
924 925 926
}

static void
927
gst_scaletempo_init (GstScaletempo * scaletempo)
928 929
{
  /* defaults */
930 931 932
  scaletempo->ms_stride = 30;
  scaletempo->percent_overlap = .2;
  scaletempo->ms_search = 14;
933 934

  /* uninitialized */
935 936 937 938 939 940
  scaletempo->scale = 0;
  scaletempo->sample_rate = 0;
  scaletempo->frames_stride_error = 0;
  scaletempo->bytes_stride = 0;
  scaletempo->bytes_queued = 0;
  scaletempo->bytes_to_slide = 0;
941 942
  gst_segment_init (&scaletempo->in_segment, GST_FORMAT_UNDEFINED);
  gst_segment_init (&scaletempo->out_segment, GST_FORMAT_UNDEFINED);
943
}