@article{WoeberMehnenCurtoetal., author = {W{\"o}ber, Wilfried and Mehnen, Lars and Curto, Manuel and Dias Tibihika, Papius and Tesfaye, Genanaw and Meimberg, Harald}, title = {Investigating Shape Variation Using Generalized Procrustes Analysis and Machine Learning}, series = {Applied Sciences}, volume = {2022}, journal = {Applied Sciences}, number = {12(6), 3158}, pages = {26}, abstract = {Abstract: The biological investigation of a population's shape diversity using digital images is typi- cally reliant on geometrical morphometrics, which is an approach based on user-defined landmarks. In contrast to this traditional approach, the progress in deep learning has led to numerous applications ranging from specimen identification to object detection. Typically, these models tend to become black boxes, which limits the usage of recent deep learning models for biological applications. However, the progress in explainable artificial intelligence tries to overcome this limitation. This study compares the explanatory power of unsupervised machine learning models to traditional landmark-based approaches for population structure investigation. We apply convolutional autoencoders as well as Gaussian process latent variable models to two Nile tilapia datasets to investigate the latent structure using consensus clustering. The explanatory factors of the machine learning models were extracted and compared to generalized Procrustes analysis. Hypotheses based on the Bayes factor are formulated to test the unambiguity of population diversity unveiled by the machine learning models. The findings show that it is possible to obtain biologically meaningful results relying on unsupervised machine learning. Furthermore we show that the machine learning models unveil latent structures close to the true population clusters. We found that 80\% of the true population clusters relying on the convolutional autoencoder are significantly different to the remaining clusters. Similarly, 60\% of the true population clusters relying on the Gaussian process latent variable model are significantly different. We conclude that the machine learning models outperform generalized Procrustes analysis, where 16\% of the population cluster was found to be significantly different. However, the applied machine learning models still have limited biological explainability. We recommend further in-depth investigations to unveil the explanatory factors in the used model. Keywords: generalized procrustes analysis; machine learning; convolutional autoencoder; Gaussian process latent variable models}, subject = {generalized procrustes analysis}, language = {en} } @article{WoeberCurtoTibihikaetal., author = {W{\"o}ber, Wilfried and Curto, Manuel and Tibihika, Papius D. and Meulenboek, Paul and Alemayehu, Esayas and Mehnen, Lars and Meimberg, Harald and Sykacek, Peter}, title = {Identifying geographically differentiated features of Ethopian Nile tilapia (Oreochromis niloticus) morphology with machine learning}, series = {PlosONE}, volume = {16}, journal = {PlosONE}, number = {4}, subject = {Machine Learning}, language = {en} } @inproceedings{WoeberTibihikaOlaverriMonrealetal., author = {W{\"o}ber, Wilfried and Tibihika, Papius D and Olaverri-Monreal, Cristina and Mehnen, Lars and Sykacek, Peter and Meimberg, Harald}, title = {Comparison of Unsupervised Learning Methods for Natural Image Processing}, series = {Biodiversity Information Science and Standards}, booktitle = {Biodiversity Information Science and Standards}, number = {3}, subject = {Machine Learning}, language = {en} } @article{WoeberMehnenSykaceketal., author = {W{\"o}ber, Wilfried and Mehnen, Lars and Sykacek, Peter and Meimberg, Harald}, title = {Investigating Explanatory Factors of Machine Learning Models for Plant Classification}, series = {Plants}, volume = {2021}, journal = {Plants}, number = {10(12):2674}, pages = {20}, abstract = {Recent progress in machine learning and deep learning has enabled the implementation of plant and crop detection using systematic inspection of the leaf shapes and other morphological characters for identification systems for precision farming. However, the models used for this approach tend to become black-box models, in the sense that it is difficult to trace characters that are the base for the classification. The interpretability is therefore limited and the explanatory factors may not be based on reasonable visible characters. We investigate the explanatory factors of recent machine learning and deep learning models for plant classification tasks. Based on a Daucus carota and a Beta vulgaris image data set, we implement plant classification models and compare those models by their predictive performance as well as explainability. For comparison we implemented a feed forward convolutional neuronal network as a default model. To evaluate the performance, we trained an unsupervised Bayesian Gaussian process latent variable model as well as a convolutional autoencoder for feature extraction and rely on a support vector machine for classification. The explanatory factors of all models were extracted and analyzed. The experiments show, that feed forward convolutional neuronal networks (98.24\% and 96.10\% mean accuracy) outperforms the Bayesian Gaussian process latent variable pipeline (92.08\% and 94.31\% mean accuracy) as well as the convolutional autoenceoder pipeline (92.38\% and 93.28\% mean accuracy) based approaches in terms of classification accuracy, even though not significant for Beta vulgaris images. Additionally, we found that the neuronal network used biological uninterpretable image regions for the plant classification task. In contrast to that, the unsupervised learning models rely on explainable visual characters. We conclude that supervised convolutional neuronal networks must be used carefully to ensure biological interpretability. We recommend unsupervised machine learning, careful feature investigation, and statistical feature analysis for biological applications. View Full-Text Keywords: deep learning; machine learning; plant leaf morphometrics; explainable AI}, subject = {deep learning}, language = {en} }