@conference{Perez2019Exploiting, author = "Mar{\'i}a P{\'e}rez-Ortiz and Peter Tino and Rafal Mantiuk and C{\'e}sar Herv{\'a}s-Mart{\'i}nez", abstract = "Data augmentation is rapidly gaining attention in machinelearning. Synthetic data can be generated by simple transfor-mations or through the data distribution. In the latter case,the main challenge is to estimate the label associated to newsynthetic patterns. This paper studies the effect of generat-ing synthetic data by convex combination of patterns and theuse of these as unsupervised information in a semi-supervisedlearning framework with support vector machines, avoidingthus the need to label synthetic examples. We perform ex-periments on a total of 53 binary classification datasets. Ourresults show that this type of data over-sampling supportsthe well-known cluster assumption in semi-supervised learn-ing, showing outstanding results for small high-dimensionaldatasets and imbalanced learning problems.", booktitle = "Proceedings of the Thirty-Third AAAI (Association for the Advancement of Artificial Intelligence) Conference on Artificial Intelligence (AAAI'19)", doi = "10.1609/aaai.v33i01.33014715", isbn = "978-1-57735-809-1", issn = "2159-5399", month = "27th February", organization = "Honolulu,Hawaii, USA", pages = "4715-4722", title = "{E}xploiting synthetically generated data with semi-supervised learning for small and imbalanced datasets", url = "doi.org/10.1609/aaai.v33i01.33014715", year = "2019", }