Hung, Yun-Ning; Wichern, Gordon; Roux, Jonathan Le Transcription Is All You Need: Learning To Separate Musical Mixtures With Score As Supervision Proceedings Article In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 46–50, 2021, (ISSN: 2379-190X). Abstract | Links | BibTeX | Tags: audio source separation, Conferences, Instruments, music, music transcription, Particle separators, Source separation, Time-frequency analysis, Training, weakly-labeled data, weakly-supervised separation Wu, Chih-Wei; Dittmar, Christian; Southall, Carl; Vogl, Richard; Widmer, Gerhard; Hockman, Jason A; Muller, Meinard; Lerch, Alexander A Review of Automatic Drum Transcription Journal Article In: IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 26, no. 9, pp. 1457–1483, 2018, ISSN: 2329-9290. Abstract | Links | BibTeX | Tags: Automatic Music Transcription, deep learning, Instruments, Machine Learning, Matrix Factorization, Rhythm, Spectrogram, Speech processing, Task analysis, Transient analysis2021
@inproceedings{hung_transcription_2021,
title = {Transcription Is All You Need: Learning To Separate Musical Mixtures With Score As Supervision},
author = {Yun-Ning Hung and Gordon Wichern and Jonathan Le Roux},
url = {https://ieeexplore.ieee.org/abstract/document/9413358/authors#authors},
doi = {10.1109/ICASSP39728.2021.9413358},
year = {2021},
date = {2021-06-01},
urldate = {2024-02-08},
booktitle = {ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {46\textendash50},
abstract = {Most music source separation systems require large collections of isolated sources for training, which can be difficult to obtain. In this work, we use musical scores, which are comparatively easy to obtain, as a weak label for training a source separation system. In contrast with previous score-informed separation approaches, our system does not require isolated sources, and score is used only as a training target, not required for inference. Our model consists of a separator that outputs a time-frequency mask for each instrument, and a transcriptor that acts as a critic, providing both temporal and frequency supervision to guide the learning of the separator. A harmonic mask constraint is introduced as another way of leveraging score information during training, and we propose two novel adversarial losses for additional fine-tuning of both the transcriptor and the separator. Results demonstrate that using score information outper-forms temporal weak-labels, and adversarial structures lead to further improvements in both separation and transcription performance.},
note = {ISSN: 2379-190X},
keywords = {audio source separation, Conferences, Instruments, music, music transcription, Particle separators, Source separation, Time-frequency analysis, Training, weakly-labeled data, weakly-supervised separation},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
@article{wu_review_2018,
title = {A Review of Automatic Drum Transcription},
author = {Chih-Wei Wu and Christian Dittmar and Carl Southall and Richard Vogl and Gerhard Widmer and Jason A Hockman and Meinard Muller and Alexander Lerch},
url = {http://www.musicinformatics.gatech.edu/wp-content_nondefault/uploads/2018/05/Wu-et-al.-2018-A-review-of-automatic-drum-transcription.pdf},
doi = {10.1109/TASLP.2018.2830113},
issn = {2329-9290},
year = {2018},
date = {2018-01-01},
journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume = {26},
number = {9},
pages = {1457--1483},
abstract = {In Western popular music, drums and percussion are an important means to emphasize and shape the rhythm, often defining the musical style. If computers were able to analyze the drum part in recorded music, it would enable a variety of rhythm-related music processing tasks. Especially the detection and classification of drum sound events by computational methods is considered to be an important and challenging research problem in the broader field of Music Information Retrieval. Over the last two decades, several authors have attempted to tackle this problem under the umbrella term Automatic Drum Transcription (ADT). This paper presents a comprehensive review of ADT research, including a thorough discussion of the task-specific challenges, categorization of existing techniques, and evaluation of several state-of-the-art systems. To provide more insights on the practice of ADT systems, we focus on two families of ADT techniques, namely methods based on Non-negative Matrix Factorization and Recurrent Neural Networks. We explain the methods' technical details and drum-specific variations and evaluate these approaches on publicly available datasets with a consistent experimental setup. Finally, the open issues and under-explored areas in ADT research are identified and discussed, providing future directions in this field.},
keywords = {Automatic Music Transcription, deep learning, Instruments, Machine Learning, Matrix Factorization, Rhythm, Spectrogram, Speech processing, Task analysis, Transient analysis},
pubstate = {published},
tppubtype = {article}
}
publications
Transcription Is All You Need: Learning To Separate Musical Mixtures With Score As Supervision Proceedings Article In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 46–50, 2021, (ISSN: 2379-190X). A Review of Automatic Drum Transcription Journal Article In: IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 26, no. 9, pp. 1457–1483, 2018, ISSN: 2329-9290.2021
2018