Publications
2024
Fajardo, S.; Argüello, P.
Sociopolitical evolution, population clustering, and technology among early sedentary communities in northeastern Andes, Colombia Journal Article
In: Journal of Anthropological Archaeology, vol. 76, no. December, 2024.
@article{fajardo_sociopolitical_2024,
title = {Sociopolitical evolution, population clustering, and technology among early sedentary communities in northeastern Andes, Colombia},
author = {S. Fajardo and P. Argüello},
url = {https://www.sciencedirect.com/science/article/pii/S027841652400059X},
doi = {10.1016/j.jaa.2024.101628},
year = {2024},
date = {2024-12-01},
urldate = {2024-02-01},
journal = {Journal of Anthropological Archaeology},
volume = {76},
number = {December},
address = {Rochester, NY},
abstract = {Several prehistoric societies did not develop robust hierarchical systems even after centuries of population clustering and advancements in constructing structural earthworks and crafting materials like ceramics and alloys. What social dynamics characterized these non-state complex societies and how did they influence technological production? Here we analyze population clustering and hierarchical structures through two regional settlement studies in the northeastern Andes of Colombia. Employing both a traditional Inverse Distance Weighting interpolation (IDW) approach and an unsupervised machine learning method, Density-Based Spatial Clustering of Applications with Noise (DBSCAN), we identify settlement clusters within the pre-Columbian sedentary settlement sequence. Analyzing rank-size distribution and A-coefficients based on identified clusters, we discern differences in hierarchical systems between the two regions. Results reveal that these early sedentary communities did not establish strong settlement hierarchies over centuries of clustering. Our findings suggest that the lack of robust hierarchical systems in Muisca societies may be attributed to slow and non-linear settlement clustering and limited site specialization. We compare this with evidence for technologies in the Muisca area, arguing that the emergence of strong and permanent settlement clustering is a threshold for early communities before developing information-storage technologies, such as standardized representations for counting or writing.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dieles, T. J.; Mattsson, C. E. S.; Takes, F. W.
Identifying successful football teams in the European player transfer network Journal Article
In: Applied Network Science, vol. 9, iss. 65, 2024.
@article{Dieles2024,
title = {Identifying successful football teams in the European player transfer network},
author = {T. J. Dieles and C. E. S. Mattsson and F. W. Takes},
doi = {https://doi.org/10.1007/s41109-024-00675-7},
year = {2024},
date = {2024-10-18},
journal = {Applied Network Science},
volume = {9},
issue = {65},
abstract = {This paper considers the European transfer market for professional football players as a network to study the relation between a team’s position in this network and performance in its domestic league. Our analysis is centered on eight top European leagues. The market in each season is represented as a weighted directed network capturing the transfers of players to or from the teams in these leagues, and we also consider the cumulative network over the past 28 years. We find that the overall structure of this transfer market network has properties commonly observed in real-world networks, such as a skewed degree distribution, high clustering, and small-world characteristics. To assess football teams we first construct a measure of within-league performance that is comparable across leagues. Regression analysis is used to relate league performance with both the network position and level of engagement of the team in the transfer market, under two complimentary setups. Network position variables include, e.g., betweenness centrality, closeness centrality and node clustering coefficient, whereas market engagement variables capture a team’s activity in the transfer market, e.g., total number of player transfers and total paid for players. For the season snapshots, the number of transfers correspond to weighted in- and out-degree. Our analysis first corroborates several recent findings relating aspects of market engagement with teams’ league performance. A higher number of incoming transfers indicates worse performance and better resourced teams perform better. Then, and across specifications, we find that network position variables remain salient even when engagement variables are already considered. This substantiates the notion in the existing literature that a high degree corresponds to better team performance and suggests that network aspects of trading strategy may affect a team’s success in their respective domestic league (or vice versa). In this sense, the approach and findings presented in this paper may in the future guide team’s player acquisition policies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Groot, A.; Fletcher, G.; Manen, G.; Saxena, A.; Serebrenik, A.; Taylor, L. E. M.
A canon is a blunt force instrument: data science, canons, and generative frictions Book Section
In: Jarke, J.; Bates, J. (Ed.): Dialogues in Data Power Shifting Response-abilities in a Datafied World, Bristol University Press, 2024, ISBN: 1-5292-3830-7 978-1-5292-3830-3.
@incollection{de_groot_canon_2024,
title = {A canon is a blunt force instrument: data science, canons, and generative frictions},
author = {A. Groot and G. Fletcher and G. Manen and A. Saxena and A. Serebrenik and L. E. M. Taylor},
editor = {J. Jarke and J. Bates},
isbn = {1-5292-3830-7 978-1-5292-3830-3},
year = {2024},
date = {2024-09-01},
urldate = {2024-09-01},
booktitle = {Dialogues in Data Power Shifting Response-abilities in a Datafied World},
publisher = {Bristol University Press},
abstract = {Spatially close, though worlds apart. The contributors to this commentary - ‘we’; ‘us’ - conduct research and teach on data and technology-related issues at three Dutch universities. Some of us work at the same departments, and teach in the same programmes. We bump into one another during our daily commutes, and replenish our energy levels with the help of the same coffee machines after our lectures. We talk, and sometimes even discuss our research with one another. But do we also understand each other? What would that even mean? When we talk about ‘data’, do we talk about the same thing? Is that even necessary? What does ‘science’ for each of us entail? What does this mean for the education we collectively provide? What is the direction - scientifically, ethically, politically - the bachelor programmes we are all involved in head toward? National science policy in the Netherlands, as well as at the level of universities themselves, tends to prioritise in various ways computer and computational sciences over the social sciences and humanities (Taylor et al., 2023). We feel that the oppositions that are produced and reinforced through such policies are both false and unproductive, and this collective uneasiness motivated some of us to initiate a conversation about what it would mean to think and work together. How do our academic lives ‘hang together’ (Mol, 2014) beyond our encounters near coffee machines in the hallways, and our names on the timetables the students would find when logging in to their university pages?When asking these and many other questions, we realised that we lacked the language, a common vocabulary, to not only answer the questions with which we started, but also ask them. Not only did many of key concepts used in our research and education - data, algorithm, ethics, ontology, law - mean and do different things for all of us, but concepts indispensable to some - e.g. justice -, would be nonexistent in the disciplinary universe of others.We therefore needed to take a step back and reflect on how to have a conversation without sharing a common language. Our provisional solution was to take what we dubbed as ‘canonical objects’ as the focal points in our discussions. We borrow the notion of the canon from literary criticism, where it is used to mean a body of literature that over time comes to be taught as defining a particular culture (Bloom, 1994). For this reason, the canon has also been the focus of decolonial critics, who argue that we should critically interrogate the hegemonic discourses of Western culture (Spivak, 1990).Based on this notion, we started to analyse concepts which each of us consider conceptually stable enough in our different disciplines that they might be taught on a bachelor’s-level course. We, in other words, took our disciplinary backgrounds and educational responsibilities as conversational starting points. Our roughly defined meta-question was how our disciplinary backgrounds produced different conceptions of the same terms, how these differences could be generative or problematic, and how our disciplines become invested in a particular interpretation?What we called canonical objects is also strongly related to how some of us used and understood the notion of boundary objects. A classic definition of boundary objects is that these “have different meanings in different social worlds but their structure is common enough to more than one world to make them recognizable, a means of translation.” (Star & Griesemer, 1989). Boundary objects thus allow different ‘social worlds’ to work together without requiring them to be able to (completely) understand one another. If our canonical objects would indeed function like boundary objects, we would have to find out and explicate in what way we would be working together, and how these concepts help us do that.As part of our exploration we also include answers from the generative large language model ChatGPT3.5. This LLM draws on internet content, and therefore offers a generalised and social version of the canon, replicating the most common tropes about our chosen objects of study available online.Furthermore, interesting both conceptually and practically, was and still is, our attempt to create some level of mutual understanding (Gadamer, 2014), potentially with the help of boundary objects whose functioning depends on a lack of mutual understanding. How does our attempt to foster understanding about how we hang together or not, change our collaborations? What does this attempt do to the canonical objects that we used as conversational lubricants? How, to put that differently, does discussion and explicating our disciplinary divisions, change our capacities to e.g. teach together? And subsequently, what are generative but also less and non-generative ways of disagreeing with one another?In this contribution we present the results of the conversation we have had so far about two canonical concepts: ‘AI’ and ‘trust’. Together we made a list of potential canonical concepts (see the Appendix) - so concepts that would be taught in a BSc/BA program/course - and from this list picked two of those with the most multifaceted disciplinary usage to discuss here. Each of us was asked to briefly explain how from their (disciplinary) point of view the concept was understood and taught in our undergraduate programmes. These brief reflections are accompanied by statements about our own positionality (Harding, 1989; Haraway, 1991) in which each of us situates him/herself in the academic tradition in which they were educated. We have included these because we presumed that academic disciplines (and what have been termed signature pedagogies, Poole, 2009) were and still are the key factors that influence the types of academic social worlds most of us live in. In the discussion we present some of the themes that emerged in our conversation, and that help to understand how our academic activities hang together - or not.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Kazmina, Y.; Heemskerk, E. M.; Bokányi, E.; Takes, F. W.
Socio-economic segregation in a population-scale social network Journal Article
In: Social Networks, vol. 78, pp. 279–291, 2024, ISSN: 0378-8733.
@article{kazmina_socio-economic_2024,
title = {Socio-economic segregation in a population-scale social network},
author = {Y. Kazmina and E. M. Heemskerk and E. Bokányi and F. W. Takes},
url = {https://www.sciencedirect.com/science/article/pii/S0378873324000157},
doi = {10.1016/j.socnet.2024.02.005},
issn = {0378-8733},
year = {2024},
date = {2024-07-01},
urldate = {2024-05-13},
journal = {Social Networks},
volume = {78},
pages = {279–291},
abstract = {We propose a social network-aware approach to study socio-economic segregation. The key question that we address is whether patterns of segregation are more pronounced in social networks than in the common spatial neighborhood-focused manifestations of segregation. We, therefore, conduct a population-scale social network analysis to study socio-economic segregation at a comprehensive and highly granular social network level. For this, we utilize social network data from Statistics Netherlands on 17.2 million registered residents of the Netherlands that are connected through around 1.3 billion ties distributed over five distinct tie types. We take income assortativity as a measure of socio-economic segregation, compare a social network and spatial neighborhood approach, and find that the social network structure exhibits two times as much segregation. As such, this work complements the spatial perspective on segregation in both literature and policymaking. While at a widely used unit of spatial aggregation (e.g., the geographical neighborhood), patterns of socio-economic segregation may appear relatively minimal, they may in fact persist in the underlying social network structure. Furthermore, we discover higher social network segregation in larger cities, shedding a different light on the common view of cities as hubs for diverse socio-economic mixing. A population-scale social network perspective hence offers a way to uncover hitherto “hidden” segregation that extends beyond spatial neighborhoods and infiltrates multiple aspects of human life.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fajardo, S.; Zeekaf, J.; Andel, T.; Maombe, C.; Nyambe, T.; Mudenda, G.; Aleo, A.; Kayuni, M. N.; Langejans, G. H. J.
Traditional adhesive production systems in Zambia and their archaeological implications Journal Article
In: Journal of Anthropological Archaeology, vol. 74, pp. 101586, 2024, ISSN: 0278-4165.
@article{fajardo_traditional_2024,
title = {Traditional adhesive production systems in Zambia and their archaeological implications},
author = {S. Fajardo and J. Zeekaf and T. Andel and C. Maombe and T. Nyambe and G. Mudenda and A. Aleo and M. N. Kayuni and G. H. J. Langejans},
url = {https://www.sciencedirect.com/science/article/pii/S0278416524000175},
doi = {10.1016/j.jaa.2024.101586},
issn = {0278-4165},
year = {2024},
date = {2024-06-01},
urldate = {2024-05-13},
journal = {Journal of Anthropological Archaeology},
volume = {74},
pages = {101586},
abstract = {This study explores traditional adhesives using an ethnobiological approach within a multisocioecological context in Zambia. Through semi-structured interviews, videotaped demonstrations, and herbarium collections, we investigated the traditional adhesives people know and use, the flexibility of production processes, resource usage, and knowledge transmission in adhesive production. Our findings reveal flexibility in adhesive production systems. People use a wide range of organic and inorganic materials in their adhesive recipes. Recipes are flexible, demonstrating the ability to adapt to changes and substitute materials as needed to achieve the desired end product. Additionally, our study reveals a variety of redundant pathways for knowledge transmission typically confined within individual population groups. These include same-sex vertical transmission and distinct learning spaces and processes. Also, we identified material procurement zones showing that people are prepared to travel 70 km for ingredients. We use our findings to review the archaeology and we discuss the identification of archaeological adhesives, the functional roles of adhesive materials, adhesive storage, and the sustained human interaction with species from families such as Euphorbiaceae and Apiade. Our findings underscore the diversity and adaptability of traditional adhesive production and suggest that further research on adhesives would reveal similar diversity within the archaeological record.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fajri, R. M.; Saxena, A.; Pei, Y.; Pechenizkiy, M.
FAL-CUR: Fair Active Learning using Uncertainty and Representativeness on Fair Clustering Journal Article
In: Expert Systems with Applications, vol. 242, pp. 122842, 2024, ISSN: 0957-4174.
@article{fajri_fal-cur_2024,
title = {FAL-CUR: Fair Active Learning using Uncertainty and Representativeness on Fair Clustering},
author = {R. M. Fajri and A. Saxena and Y. Pei and M. Pechenizkiy},
url = {https://www.sciencedirect.com/science/article/pii/S0957417423033444},
doi = {10.1016/j.eswa.2023.122842},
issn = {0957-4174},
year = {2024},
date = {2024-05-01},
urldate = {2024-05-13},
journal = {Expert Systems with Applications},
volume = {242},
pages = {122842},
abstract = {Active Learning (AL) techniques have proven to be highly effective in reducing data labeling costs across a range of machine learning tasks. Nevertheless, one known challenge of these methods is their potential to introduce unfairness towards sensitive attributes. Although recent approaches have focused on enhancing fairness in AL, they tend to reduce the model’s accuracy. To address this issue, we propose a novel strategy, named Fair Active Learning using fair Clustering, Uncertainty, and Representativeness (FAL-CUR), to improve fairness in AL. FAL-CUR tackles the fairness problem in AL by combining fair clustering with an acquisition function that determines which samples to query based on their uncertainty and representativeness scores. We evaluate the performance of FAL-CUR on four real-world datasets, and the results demonstrate that FAL-CUR achieves a 15%–20% improvement in fairness compared to the best state-of-the-art method in terms of equalized odds while maintaining stable accuracy scores. Furthermore, an ablation study highlights the crucial roles of fair clustering in preserving fairness and the acquisition function in stabilizing the accuracy performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fajardo, S.; Kozowyk, P. R. B.; Langejans, G. H. J.
Reply to: Problems with two recent Petri net analyses of Neanderthal adhesive technology Journal Article
In: Scientific Reports, vol. 14, no. 1, pp. 10489, 2024, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{fajardo_reply_2024,
title = {Reply to: Problems with two recent Petri net analyses of Neanderthal adhesive technology},
author = {S. Fajardo and P. R. B. Kozowyk and G. H. J. Langejans},
url = {https://www.nature.com/articles/s41598-024-60674-7},
doi = {10.1038/s41598-024-60674-7},
issn = {2045-2322},
year = {2024},
date = {2024-05-01},
urldate = {2024-05-13},
journal = {Scientific Reports},
volume = {14},
number = {1},
pages = {10489},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Saxena, A.; Fletcher, G.; Pechenizkiy, M.
FairSNA: Algorithmic Fairness in Social Network Analysis Journal Article
In: ACM Computing Surveys, vol. 56, no. 8, pp. 213:1–213:45, 2024, ISSN: 0360-0300.
@article{saxena_fairsna_2024,
title = {FairSNA: Algorithmic Fairness in Social Network Analysis},
author = {A. Saxena and G. Fletcher and M. Pechenizkiy},
url = {https://dl.acm.org/doi/10.1145/3653711},
doi = {10.1145/3653711},
issn = {0360-0300},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-01},
journal = {ACM Computing Surveys},
volume = {56},
number = {8},
pages = {213:1–213:45},
abstract = {In recent years, designing fairness-aware methods has received much attention in various domains, including machine learning, natural language processing, and information retrieval. However, in social network analysis (SNA), designing fairness-aware methods for various research problems by considering structural bias and inequalities of large-scale social networks has not received much attention. In this work, we highlight how the structural bias of social networks impacts the fairness of different SNA methods. We further discuss fairness aspects that should be considered while proposing network structure-based solutions for different SNA problems, such as link prediction, influence maximization, centrality ranking, and community detection. This survey-cum-vision clearly highlights that very few works have considered fairness and bias while proposing solutions; even these works are mainly focused on some research topics, such as link prediction, influence maximization, and PageRank. However, fairness has not yet been addressed for other research topics, such as influence blocking and community detection. We review the state of the art for different research topics in SNA, including the considered fairness constraints, their limitations, and our vision. This survey also covers evaluation metrics, available datasets and synthetic network generating models used in such studies. Finally, we highlight various open research directions that require researchers’ attention to bridge the gap between fairness and SNA.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mannion, S.; MacCarron, P.; Saxena, A.; Takes, F. W.
Fast degree-preserving rewiring of complex networks Miscellaneous
2024, (arXiv:2401.12047 [physics]).
@misc{mannion_fast_2024,
title = {Fast degree-preserving rewiring of complex networks},
author = {S. Mannion and P. MacCarron and A. Saxena and F. W. Takes},
url = {http://arxiv.org/abs/2401.12047},
doi = {10.48550/arXiv.2401.12047},
year = {2024},
date = {2024-04-01},
urldate = {2024-05-13},
publisher = {arXiv},
abstract = {In this paper we introduce a new, fast, degree-preserving rewiring algorithm for altering the assortativity of complex networks, which we call textbackslashtextitFast total link (FTL) rewiring algorithm. Commonly used existing algorithms require a large number of iterations, in particular in the case of large dense networks. This can especially be problematic when we wish to study ensembles of networks. In this work we aim to overcome aforementioned scalability problems by performing a rewiring of all edges at once to achieve a very high assortativity value before rewiring samples of edges at once to reduce this high assortativity value to the target value. The proposed method performs better than existing methods by several orders of magnitude for a range of structurally diverse complex networks, both in terms of the number of iterations taken, and time taken to reach a given assortativity value. Here we test our proposed algorithm on networks with up to $100,000$ nodes and around $750,000$ edges and find that the relative improvements in speed remain, showing that the algorithm is both efficient and scalable.},
note = {arXiv:2401.12047 [physics]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Aiello, L. M.; Vybornova, A.; Juhász, S.; Szell, M.; Bokányi, E.
Urban highways are barriers to social ties Miscellaneous
2024, (arXiv:2404.11596 [physics]).
@misc{aiello_urban_2024,
title = {Urban highways are barriers to social ties},
author = {L. M. Aiello and A. Vybornova and S. Juhász and M. Szell and E. Bokányi},
url = {http://arxiv.org/abs/2404.11596},
doi = {10.48550/arXiv.2404.11596},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-01},
publisher = {arXiv},
abstract = {Urban highways are common, especially in the US, making cities more car-centric. They promise the annihilation of distance but obstruct pedestrian mobility, thus playing a key role in limiting social interactions locally. Although this limiting role is widely acknowledged in urban studies, the quantitative relationship between urban highways and social ties is barely tested. Here we define a Barrier Score that relates massive, geolocated online social network data to highways in the 50 largest US cities. At the unprecedented granularity of individual social ties, we show that urban highways are associated with decreased social connectivity. This barrier effect is especially strong for short distances and consistent with historical cases of highways that were built to purposefully disrupt or isolate Black neighborhoods. By combining spatial infrastructure with social tie data, our method adds a new dimension to demographic studies of social segregation. Our study can inform reparative planning for an evidence-based reduction of spatial inequality, and more generally, support a better integration of the social fabric in urban planning.},
note = {arXiv:2404.11596 [physics]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Setia, S.; Chhabra, A.; Arjun Verma, A.; Saxena, A.
Mediating effects of NLP-based parameters on the readability of crowdsourced wikipedia articles Journal Article
In: Applied Intelligence, vol. 54, no. 5, pp. 4370–4391, 2024, ISSN: 1573-7497.
@article{setia_mediating_2024,
title = {Mediating effects of NLP-based parameters on the readability of crowdsourced wikipedia articles},
author = {S. Setia and A. Chhabra and A. Arjun Verma and A. Saxena},
url = {https://doi.org/10.1007/s10489-024-05399-w},
doi = {10.1007/s10489-024-05399-w},
issn = {1573-7497},
year = {2024},
date = {2024-03-01},
urldate = {2024-05-13},
journal = {Applied Intelligence},
volume = {54},
number = {5},
pages = {4370–4391},
abstract = {In this era of information and communication technology, a large population relies on the Internet to gather information. One of the most popular information sources on the Internet is Wikipedia. Wikipedia is a free encyclopedia that provides a wide range of information to its users. However, there have been concerns about the readability of information on Wikipedia time and again. The readability of the text is defined as the ease of understanding the underlying text. Past studies have analyzed the readability of Wikipedia articles with the help of conventional readability metrics, such as the Flesch-Kincaid readability score and the Automatic Readability Index (ARI). Such metrics only consider the surface-level parameters, such as the number of words, sentences, and paragraphs in the text, to quantify the readability. However, the readability of the text must also take into account the quality of the text. In this study, we consider many new NLP-based parameters capturing the quality of the text, such as lexical diversity, semantic diversity, lexical complexity, and semantic complexity and analyze their impact on the readability of Wikipedia articles using artificial neural networks. Besides NLP parameters, the crowdsourced parameters also affect the readability, and therefore, we also analyze the impact of crowdsourced parameters and observe that the crowdsourced parameters not only influence the readability scores but also affect the NLP parameters of the text. Additionally, we investigate the mediating effect of NLP parameters that connect the crowdsourced parameters to the readability of the text. The results show that the impact of crowdsourced parameters on readability is partially due to the profound effect of NLP-based parameters.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Macedo, M.; Saxena, A.
Gender differences in online communication: A case study of Soccer Miscellaneous
2024, (arXiv:2403.11051 [cs]).
@misc{macedo_gender_2024,
title = {Gender differences in online communication: A case study of Soccer},
author = {M. Macedo and A. Saxena},
url = {http://arxiv.org/abs/2403.11051},
doi = {10.48550/arXiv.2403.11051},
year = {2024},
date = {2024-03-01},
urldate = {2024-05-13},
publisher = {arXiv},
abstract = {Social media and digital platforms allow us to express our opinions freely and easily to a vast number of people. In this study, we examine whether there are gender-based differences in how communication happens via Twitter in regard to soccer. Soccer is one of the most popular sports, and therefore, on social media, it engages a diverse audience regardless of their technical knowledge. We collected Twitter data for three months (March-June) for English and Portuguese that contains 9.5 million Tweets related to soccer, and only 18.38% tweets were identified as belonging to women, highlighting a possible gender gap already in the number of people who participated actively in this topic. We then conduct a fine-grained text-level and network-level analysis to identify the gender differences that might exist while communicating on Twitter. Our results show that women express their emotions more intensely than men, regardless of the differences in volume. The network generated from Portuguese has lower homophily than English. However, this difference in homophily does not impact how females express their emotions and sentiments, suggesting that these aspects are inherent norms or characteristics of genders. Our study unveils more gaps through qualitative and quantitative analyses, highlighting the importance of examining and reporting gender gaps in online communication to create a more inclusive space where people can openly share their opinions.},
note = {arXiv:2403.11051 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Pandey, P. K.; Arya, A.; Saxena, A.
X-distribution: Retraceable Power-law Exponent of Complex Networks Journal Article
In: ACM Transactions on Knowledge Discovery from Data, vol. 18, no. 5, pp. 117:1–117:12, 2024, ISSN: 1556-4681.
@article{pandey_x-distribution_2024,
title = {X-distribution: Retraceable Power-law Exponent of Complex Networks},
author = {P. K. Pandey and A. Arya and A. Saxena},
url = {https://dl.acm.org/doi/10.1145/3639413},
doi = {10.1145/3639413},
issn = {1556-4681},
year = {2024},
date = {2024-02-01},
urldate = {2024-05-13},
journal = {ACM Transactions on Knowledge Discovery from Data},
volume = {18},
number = {5},
pages = {117:1–117:12},
abstract = {Network modeling has been explored extensively by means of theoretical analysis as well as numerical simulations for Network Reconstruction (NR). The network reconstruction problem requires the estimation of the power-law exponent (γ) of a given input network. Thus, the effectiveness of the NR solution depends on the accuracy of the calculation of γ. In this article, we re-examine the degree distribution-based estimation of γ, which is not very accurate due to approximations. We propose X-distribution, which is more accurate than degree distribution. Various state-of-the-art network models, including CPM, NRM, RefOrCite2, BA, CDPAM, and DMS, are considered for simulation purposes, and simulated results support the proposed claim. Further, we apply X-distribution over several real-world networks to calculate their power-law exponents, which differ from those calculated using respective degree distributions. It is observed that X-distributions exhibit more linearity (straight line) on the log-log scale than degree distributions. Thus, X-distribution is more suitable for the evaluation of power-law exponent using linear fitting (on the log-log scale). The MATLAB implementation of power-law exponent (γ) calculation using X-distribution for different network models and the real-world datasets used in our experiments are available at https://github.com/Aikta-Arya/X-distribution-Retraceable-Power-Law-Exponent-of-Complex-Networks.git.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jong, R. G.; Loo, M. P. J.; Takes, F. W.
The effect of distant connections on node anonymity in complex networks Journal Article
In: Scientific Reports, vol. 14, no. 1, pp. 1156, 2024, ISSN: 2045-2322.
@article{de_jong_effect_2024,
title = {The effect of distant connections on node anonymity in complex networks},
author = {R. G. Jong and M. P. J. Loo and F. W. Takes},
url = {https://doi.org/10.1038/s41598-023-50617-z},
doi = {10.1038/s41598-023-50617-z},
issn = {2045-2322},
year = {2024},
date = {2024-01-01},
journal = {Scientific Reports},
volume = {14},
number = {1},
pages = {1156},
abstract = {Ensuring privacy of individuals is of paramount importance to social network analysis research. Previous work assessed anonymity in a network based on the non-uniqueness of a node’s ego network. In this work, we show that this approach does not adequately account for the strong de-anonymizing effect of distant connections. We first propose the use of d-k-anonymity, a novel measure that takes knowledge up to distance d of a considered node into account. Second, we introduce anonymity-cascade, which exploits the so-called infectiousness of uniqueness: mere information about being connected to another unique node can make a given node uniquely identifiable. These two approaches, together with relevant “twin node” processing steps in the underlying graph structure, offer practitioners flexible solutions, tunable in precision and computation time. This enables the assessment of anonymity in large-scale networks with up to millions of nodes and edges. Experiments on graph models and a wide range of real-world networks show drastic decreases in anonymity when connections at distance 2 are considered. Moreover, extending the knowledge beyond the ego network with just one extra link often already decreases overall anonymity by over 50%. These findings have important implications for privacy-aware sharing of sensitive network data.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Olivares, E.; Boekhout, H. D.; Saxena, A.; Takes, F. W.
A Framework for Empirically Evaluating Pretrained Link Prediction Models Proceedings Article
In: Cherifi, H.; Rocha, L. M.; Cherifi, C.; Donduran, M. (Ed.): Complex Networks & Their Applications XII. Proceedings of the 12th International Conference on Complex Networks (Complex Networks 2023), pp. 150–161, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-53468-3.
@inproceedings{sanchez_olivares_framework_2024,
title = {A Framework for Empirically Evaluating Pretrained Link Prediction Models},
author = {E. Sánchez-Olivares and H. D. Boekhout and A. Saxena and F. W. Takes},
editor = {H. Cherifi and L. M. Rocha and C. Cherifi and M. Donduran},
doi = {10.1007/978-3-031-53468-3_13},
isbn = {978-3-031-53468-3},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {Complex Networks & Their Applications XII. Proceedings of the 12th International Conference on Complex Networks (Complex Networks 2023)},
pages = {150–161},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {This paper proposes a novel framework for empirically assessing the effect of network characteristics on the performance of pretrained link prediction models. In link prediction, the task is to predict missing or future links in a given network dataset. We focus on the pretrained setting, in which such a predictive model is trained on one dataset, and employed on another dataset. The framework allows one to overcome a number of nontrivial challenges in adequately testing the performance of such a pretrained model in a proper cross-validated setting. Experiments are performed on a corpus of 49 structurally diverse real-world complex network datasets from various domains with up to hundreds of thousands of nodes and edges. Overall results indicate that the extent to which a network is clustered is strongly related to whether this network is a suitable candidate to create a pretrained model on. Moreover, we systematically assessed the relationship between topological similarity and performance difference of pretrained models and a model trained on the same data. We find that similar network pairs in terms of clustering coefficient, and to a lesser extent degree assortativity and gini coefficient, yield minimal performance difference. The findings presented in this work pave the way for automated model selection based on topological similarity of the networks, as well as larger-scale deployment of pretrained link prediction models for transfer learning.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liang, Z.; Li, Y.; Huang, T.; Saxena, A.; Pei, Y.; Pechenizkiy, Mykola
Heterophily-Based Graph Neural Network for Imbalanced Classification Proceedings Article
In: Cherifi, H.; Rocha, L. M.; Cherifi, C.; Donduran, M. (Ed.): Complex Networks & Their Applications XII, pp. 74–86, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-53468-3.
@inproceedings{liang_heterophily-based_2024,
title = {Heterophily-Based Graph Neural Network for Imbalanced Classification},
author = {Z. Liang and Y. Li and T. Huang and A. Saxena and Y. Pei and Mykola Pechenizkiy},
editor = {H. Cherifi and L. M. Rocha and C. Cherifi and M. Donduran},
doi = {10.1007/978-3-031-53468-3_7},
isbn = {978-3-031-53468-3},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
booktitle = {Complex Networks & Their Applications XII},
pages = {74–86},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Graph neural networks (GNNs) have shown promise in addressing graph-related problems, including node classification. However, in real-world scenarios, data often exhibits an imbalanced, sometimes highly-skewed, distribution with dominant classes representing the majority, where certain classes are severely underrepresented. This leads to a suboptimal performance of standard GNNs on imbalanced graphs. In this paper, we introduce a unique approach that tackles imbalanced classification on graphs by considering graph heterophily. We investigate the intricate relationship between class imbalance and graph heterophily, revealing that minority classes not only exhibit a scarcity of samples but also manifest lower levels of homophily, facilitating the propagation of erroneous information among neighboring nodes. Drawing upon this insight, we propose an efficient method, called Fast Im-GBK, which integrates an imbalance classification strategy with heterophily-aware GNNs to effectively address the class imbalance problem while significantly reducing training time. Our experiments on real-world graphs demonstrate our model’s superiority in classification performance and efficiency for node classification tasks compared to existing baselines.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jung-Muller, M.; Ceria, A.; Wang, H.
Higher-Order Temporal Network Prediction Proceedings Article
In: Cherifi, H.; Rocha, L. M.; Cherifi, C.; Donduran, M. (Ed.): Complex Networks & Their Applications XII, pp. 461–472, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-53503-1.
@inproceedings{jung-muller_higher-order_2024,
title = {Higher-Order Temporal Network Prediction},
author = {M. Jung-Muller and A. Ceria and H. Wang},
editor = {H. Cherifi and L. M. Rocha and C. Cherifi and M. Donduran},
doi = {10.1007/978-3-031-53503-1_38},
isbn = {978-3-031-53503-1},
year = {2024},
date = {2024-01-01},
booktitle = {Complex Networks & Their Applications XII},
pages = {461–472},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {A social interaction (so-called higher-order event/interaction) can be regarded as the activation of the hyperlink among the corresponding individuals. Social interactions can be, thus, represented as higher-order temporal networks, that record the higher-order events occurring at each time step over time. The prediction of higher-order interactions is usually overlooked in traditional temporal network prediction methods, where a higher-order interaction is regarded as a set of pairwise interactions. We propose a memory-based model that predicts the higher-order temporal network (or events) one step ahead, based on the network observed in the past and a baseline utilizing pairwise temporal network prediction method. In eight real-world networks, we find that our model consistently outperforms the baseline. Importantly, our model reveals how past interactions of the target hyperlink and different types of hyperlinks that overlap with the target hyperlinks contribute to the prediction of the activation of the target link in the future.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jong, R. G.; Loo, M. P. J.; Takes, F. W.
The anonymization problem in social networks Miscellaneous
2024.
@misc{dejong2024anonymizationproblemsocialnetworks,
title = {The anonymization problem in social networks},
author = {R. G. Jong and M. P. J. Loo and F. W. Takes},
url = {https://arxiv.org/abs/2409.16163},
year = {2024},
date = {2024-01-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
2023
Arya, A.; Pandey, P. K.; Saxena, A.
Balanced and Unbalanced Triangle Count in Signed Networks Journal Article
In: IEEE Transactions on Knowledge and Data Engineering, vol. 35, no. 12, pp. 12491–12496, 2023, ISSN: 1558-2191, (Conference Name: IEEE Transactions on Knowledge and Data Engineering).
@article{arya_balanced_2023,
title = {Balanced and Unbalanced Triangle Count in Signed Networks},
author = {A. Arya and P. K. Pandey and A. Saxena},
url = {https://ieeexplore.ieee.org/abstract/document/10115002},
doi = {10.1109/TKDE.2023.3272657},
issn = {1558-2191},
year = {2023},
date = {2023-12-01},
urldate = {2024-05-13},
journal = {IEEE Transactions on Knowledge and Data Engineering},
volume = {35},
number = {12},
pages = {12491–12496},
abstract = {Triangle count is a frequently used network statistic, possessing high computational cost. Moreover, this task gets even more complex in the case of signed networks which consist of unbalanced and balanced triangles. In this work, we propose a fast Incremental Triangle Counting (ITC) algorithm for counting all types of triangles, including balanced and unbalanced. The proposed algorithm updates the count of different types of triangles for newly added nodes and edges only instead of recalculating the same triangle multiple times for the entire network repeatedly. Thus, the proposed ITC algorithm also works for dynamic networks. The experimental results show that the proposed method is practically efficient having run time complexity of O(m k_textbackslashmax)O(mkmax), where mm represents the number of edges and k_textbackslashmaxkmax represents the maximum degree of the given signed network.},
note = {Conference Name: IEEE Transactions on Knowledge and Data Engineering},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bokányi, E.; Vizi, Z.; Koltai, J.; Röst, G.; Karsai, M.
Real-time estimation of the effective reproduction number of COVID-19 from behavioral data Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 21452, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{bokanyi_real-time_2023,
title = {Real-time estimation of the effective reproduction number of COVID-19 from behavioral data},
author = {E. Bokányi and Z. Vizi and J. Koltai and G. Röst and M. Karsai},
url = {https://www.nature.com/articles/s41598-023-46418-z},
doi = {10.1038/s41598-023-46418-z},
issn = {2045-2322},
year = {2023},
date = {2023-12-01},
urldate = {2024-05-13},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {21452},
abstract = {Monitoring the effective reproduction number $$R_t$$of a rapidly unfolding pandemic in real-time is key to successful mitigation and prevention strategies. However, existing methods based on case numbers, hospital admissions or fatalities suffer from multiple measurement biases and temporal lags due to high test positivity rates or delays in symptom development or administrative reporting. Alternative methods such as web search and social media tracking are less directly indicating epidemic prevalence over time. We instead record age-stratified anonymous contact matrices at a daily resolution using a longitudinal online-offline survey in Hungary during the first two waves of the COVID-19 pandemic. This approach is innovative, cheap, and provides information in near real-time for estimating $$R_t$$at a daily resolution. Moreover, it allows to complement traditional surveillance systems by signaling periods when official monitoring infrastructures are unreliable due to observational biases.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Saxena, A.; Bierbooms, C. Gutiérrez; Pechenizkiy, M.
Fairness-aware fake news mitigation using counter information propagation Journal Article
In: Applied Intelligence, vol. 53, no. 22, pp. 27483–27504, 2023, ISSN: 1573-7497.
@article{saxena_fairness-aware_2023,
title = {Fairness-aware fake news mitigation using counter information propagation},
author = {A. Saxena and C. Gutiérrez Bierbooms and M. Pechenizkiy},
url = {https://doi.org/10.1007/s10489-023-04928-3},
doi = {10.1007/s10489-023-04928-3},
issn = {1573-7497},
year = {2023},
date = {2023-11-01},
urldate = {2024-05-13},
journal = {Applied Intelligence},
volume = {53},
number = {22},
pages = {27483–27504},
abstract = {Given the adverse impact of fake news propagation on Social media, fake news mitigation has been one of the main research directions. However, existing approaches neglect fairness towards each community while minimizing the adverse impact of fake news propagation. This results in the exclusion of some minor and underrepresented communities from the benefits of the intervention, which can have important societal repercussions. This research proposes a fairness-aware truth-campaigning method, called FWRRS (Fairness-aware Weighted Reversible Reachable System), which focuses on blocking the influence propagation of a competing entity, in this case, with the use case of fake news mitigation. The proposed method employs weighted reversible reachable trees and maximin fairness to achieve its goals. Experimental analysis shows that FWRRS outperforms fairness-oblivious and fairness-aware methods in terms of both total outreach and fairness. The results show that in the proposed approach, such fairness does not come at a cost in efficiency, and in fact, in most cases, it works as a catalyst for achieving better effectiveness in the future. In real-world networks, we observe up to $$textbackslashsim $$10% improvement in the saved nodes and $$textbackslashsim $$57% improvement in maximin fairness as compared to the second best-performing baseline, which varies for each network.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zou, L.; Ceria, A.; Wang, H.
Short- and long-term temporal network prediction based on network memory Journal Article
In: Applied Network Science, vol. 8, no. 1, pp. 76, 2023, ISSN: 2364-8228.
@article{zou_short-_2023,
title = {Short- and long-term temporal network prediction based on network memory},
author = {L. Zou and A. Ceria and H. Wang},
url = {https://doi.org/10.1007/s41109-023-00597-w},
doi = {10.1007/s41109-023-00597-w},
issn = {2364-8228},
year = {2023},
date = {2023-11-01},
urldate = {2024-05-13},
journal = {Applied Network Science},
volume = {8},
number = {1},
pages = {76},
abstract = {Temporal networks are networks whose topology changes over time. Two nodes in a temporal network are connected at a discrete time step only if they have a contact/interaction at that time. The classic temporal network prediction problem aims to predict the temporal network one time step ahead based on the network observed in the past of a given duration. This problem has been addressed mostly via machine learning algorithms, at the expense of high computational costs and limited interpretation of the underlying mechanisms that form the networks. Hence, we propose to predict the connection of each node pair one step ahead based on the connections of this node pair itself and of node pairs that share a common node with this target node pair in the past. The concrete design of our two prediction models is based on the analysis of the memory property of real-world physical networks, i.e., to what extent two snapshots of a network at different times are similar in topology (or overlap). State-of-the-art prediction methods that allow interpretation are considered as baseline models. In seven real-world physical contact networks, our methods are shown to outperform the baselines in both prediction accuracy and computational complexity. They perform better in networks with stronger memory. Importantly, our models reveal how the connections of different types of node pairs in the past contribute to the connection estimation of a target node pair. Predicting temporal networks like physical contact networks in the long-term future beyond short-term i.e., one step ahead is crucial to forecast and mitigate the spread of epidemics and misinformation on the network. This long-term prediction problem has been seldom explored. Therefore, we propose basic methods that adapt each aforementioned prediction model to address classic short-term network prediction problem for long-term network prediction task. The prediction quality of all adapted models is evaluated via the accuracy in predicting each network snapshot and in reproducing key network properties. The prediction based on one of our models tends to have the highest accuracy and lowest computational complexity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fajardo, S.; Kozowyk, P. R. B.; Langejans, G. H. J.
Measuring ancient technological complexity and its cognitive implications using Petri nets Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 14961, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{fajardo_measuring_2023,
title = {Measuring ancient technological complexity and its cognitive implications using Petri nets},
author = {S. Fajardo and P. R. B. Kozowyk and G. H. J. Langejans},
url = {https://www.nature.com/articles/s41598-023-42078-1},
doi = {10.1038/s41598-023-42078-1},
issn = {2045-2322},
year = {2023},
date = {2023-09-01},
urldate = {2024-05-13},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {14961},
abstract = {We implement a method from computer sciences to address a challenge in Paleolithic archaeology: how to infer cognition differences from material culture. Archaeological material culture is linked to cognition, and more complex ancient technologies are assumed to have required complex cognition. We present an application of Petri net analysis to compare Neanderthal tar production technologies and tie the results to cognitive requirements. We applied three complexity metrics, each relying on their own unique definitions of complexity, to the modeled production processes. Based on the results, we propose that Neanderthal technical cognition may have been analogous to that of contemporary modern humans. This method also enables us to distinguish the high-order cognitive functions combining traits like planning, inhibitory control, and learning that were likely required by different ancient technological processes. The Petri net approach can contribute to our understanding of technology and cognitive evolution as it can be used on different materials and technologies, across time and species.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kozowyk, P. R. B.; Fajardo, S.; Langejans, G. H. J.
Scaling Palaeolithic tar production processes exponentially increases behavioural complexity Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 14709, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{kozowyk_scaling_2023,
title = {Scaling Palaeolithic tar production processes exponentially increases behavioural complexity},
author = {P. R. B. Kozowyk and S. Fajardo and G. H. J. Langejans},
url = {https://www.nature.com/articles/s41598-023-41963-z},
doi = {10.1038/s41598-023-41963-z},
issn = {2045-2322},
year = {2023},
date = {2023-09-01},
urldate = {2024-05-13},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {14709},
abstract = {Technological processes, reconstructed from the archaeological record, are used to study the evolution of behaviour and cognition of Neanderthals and early modern humans. In comparisons, technologies that are more complex infer more complex behaviour and cognition. The manufacture of birch bark tar adhesives is regarded as particularly telling and often features in debates about Neanderthal cognition. One method of tar production, the ‘condensation technique’, demonstrates a pathway for Neanderthals to have discovered birch bark tar. However, to improve on the relatively low yield, and to turn tar into a perennial innovation, this method likely needed to be scaled up. Yet, it is currently unknown how scaling Palaeolithic technological processes influences their complexity. We used Petri net models and the Extended Cyclomatic Metric to measure system complexity of birch tar production with a single and three concurrent condensation assemblies. Our results show that changing the number of concurrent tar production assemblies substantially increases the measured complexity. This has potential implications on the behavioural and cognitive capacities required by Neanderthals, such as an increase in cooperation or inhibition control.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Barata, A. Pereira; Takes, F. W.; Herik, H. J.; Veenman, C. J.
Fair tree classifier using strong demographic parity Journal Article
In: Machine Learning, 2023, ISSN: 1573-0565.
@article{pereira_barata_fair_2023,
title = {Fair tree classifier using strong demographic parity},
author = {A. Pereira Barata and F. W. Takes and H. J. Herik and C. J. Veenman},
url = {https://doi.org/10.1007/s10994-023-06376-z},
doi = {10.1007/s10994-023-06376-z},
issn = {1573-0565},
year = {2023},
date = {2023-08-01},
urldate = {2023-08-01},
journal = {Machine Learning},
abstract = {When dealing with sensitive data in automated data-driven decision-making, an important concern is to learn predictors with high performance towards a class label, whilst minimising for the discrimination towards any sensitive attribute, like gender or race, induced from biased data. Hybrid tree optimisation criteria have been proposed which combine classification performance and fairness. Although the threshold-free ROC-AUC is the standard for measuring classification model performance, current fair tree classification methods mainly optimise for a fixed threshold on the fairness metric. In this paper, we propose SCAFF—splitting criterion AUC for Fairness—a compound decision tree splitting criterion which combines the threshold-free strong demographic parity with ROC-AUC termed, easily applicable as an ensemble. Our method simultaneously leverages multiple sensitive attributes of which the values may be multicategorical, and is tunable with respect to the unavoidable performance-fairness trade-off. In our experiments, we demonstrate how SCAFF generates effective models with competitive performance and fairness with respect to binary, multicategorical, and multiple sensitive attributes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jong, R. G.; Loo, M. P. J.; Takes, F. W.
Algorithms for Efficiently Computing Structural Anonymity in Complex Networks Journal Article
In: ACM Journal of Experimental Algorithmics, vol. 28, pp. 1.7:1–1.7:22, 2023, ISSN: 1084-6654.
@article{de_jong_algorithms_2023,
title = {Algorithms for Efficiently Computing Structural Anonymity in Complex Networks},
author = {R. G. Jong and M. P. J. Loo and F. W. Takes},
url = {https://dl.acm.org/doi/10.1145/3604908},
doi = {10.1145/3604908},
issn = {1084-6654},
year = {2023},
date = {2023-08-01},
urldate = {2024-04-08},
journal = {ACM Journal of Experimental Algorithmics},
volume = {28},
pages = {1.7:1–1.7:22},
abstract = {This article proposes methods for efficiently computing the anonymity of entities in networks. We do so by partitioning nodes into equivalence classes where a node is k-anonymous if it is equivalent to k-1 other nodes. This assessment of anonymity is crucial when one wants to share data and must ensure the anonymity of entities represented is compliant with privacy laws. Additionally, in such an assessment, it is necessary to account for a realistic amount of information in the hands of a possible attacker that attempts to de-anonymize entities in the network. However, measures introduced in earlier work often assume a fixed amount of attacker knowledge. Therefore, in this work, we use a new parameterized measure for anonymity called d-k-anonymity. This measure can be used to model the scenario where an attacker has perfect knowledge of a node’s surroundings up to a given distance d. This poses nontrivial computational challenges, as naive approaches would employ large numbers of possibly computationally expensive graph isomorphism checks. This article proposes novel algorithms that severely reduce this computational burden. In particular, we present an iterative approach, assisted by techniques for preprocessing nodes that are trivially automorphic and heuristics that exploit graph invariants. We evaluate our algorithms on three well-known graph models and a wide range of empirical network datasets. Results show that our approaches significantly speed up the computation by multiple orders of magnitude, which allows one to compute d-k-anonymity for a range of meaningful values of d on large empirical networks with tens of thousands of nodes and over a million edges.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Doewes, A.; Kurdhi, N.; Saxena, A.
Evaluating Quadratic Weighted Kappa as the Standard Performance Metric for Automated Essay Scoring Proceedings Article
In: Feng, M.; Käser, T.; Talukdar, P. (Ed.): Proceedings of the 16th International Conference on Educational Data Mining, pp. 103–113, 2023, (Publisher: International Educational Data Mining Society (IEDMS)).
@inproceedings{doewes_evaluating_2023,
title = {Evaluating Quadratic Weighted Kappa as the Standard Performance Metric for Automated Essay Scoring},
author = {A. Doewes and N. Kurdhi and A. Saxena},
editor = {M. Feng and T. Käser and P. Talukdar},
doi = {10.5281/zenodo.8115784},
year = {2023},
date = {2023-07-01},
booktitle = {Proceedings of the 16th International Conference on Educational Data Mining},
pages = {103–113},
abstract = {Automated Essay Scoring (AES) tools aim to improve the efficiency and consistency of essay scoring by using machine learning algorithms. In the existing research work on this topic, most researchers agree that human-automated score agreement remains the benchmark for assessing the accuracy of machine-generated scores. To measure the performance of AES models, the Quadratic Weighted Kappa (QWK) is commonly used as the evaluation metric. However, we have identified several limitations of using QWK as the sole metric for evaluating AES model performance. These limitations include its sensitivity to the rating scale, the potential for the so-called “kappa paradox” to occur, the impact of prevalence, the impact of the position of agreements in the diagonal agreement matrix, and its limitation in handling a large number of raters. Our findings suggest that relying solely on QWK as the evaluation metric for AES performance may not be sufficient. We further discuss insights into additional metrics to comprehensively evaluate the performance and accuracy of AES models.},
note = {Publisher: International Educational Data Mining Society (IEDMS)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Bokányi, E.; Heemskerk, E. M.; Takes, F. W.
The anatomy of a population-scale social network Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 9209, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{bokanyi_anatomy_2023,
title = {The anatomy of a population-scale social network},
author = {E. Bokányi and E. M. Heemskerk and F. W. Takes},
url = {https://www.nature.com/articles/s41598-023-36324-9},
doi = {10.1038/s41598-023-36324-9},
issn = {2045-2322},
year = {2023},
date = {2023-06-01},
urldate = {2024-04-08},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {9209},
abstract = {Large-scale human social network structure is typically inferred from digital trace samples of online social media platforms or mobile communication data. Instead, here we investigate the social network structure of a complete population, where people are connected by high-quality links sourced from administrative registers of family, household, work, school, and next-door neighbors. We examine this multilayer social opportunity structure through three common concepts in network analysis: degree, closure, and distance. Findings present how particular network layers contribute to presumably universal scale-free and small-world properties of networks. Furthermore, we suggest a novel measure of excess closure and apply this in a life-course perspective to show how the social opportunity structure of individuals varies along age, socio-economic status, and education level.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mattsson, C. E. S.; Criscione, T.; Takes, F. W.
Circulation of a digital community currency Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 5864, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{mattsson_circulation_2023,
title = {Circulation of a digital community currency},
author = {C. E. S. Mattsson and T. Criscione and F. W. Takes},
url = {https://www.nature.com/articles/s41598-023-33184-1},
doi = {10.1038/s41598-023-33184-1},
issn = {2045-2322},
year = {2023},
date = {2023-04-01},
urldate = {2024-04-08},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {5864},
abstract = {Circulation is the characteristic feature of successful currency systems, from community currencies to cryptocurrencies to national currencies. In this paper, we propose a network analysis approach especially suited for studying circulation given a system’s digital transaction records. Sarafu is a digital community currency that was active in Kenya over a period that saw considerable economic disruption due to the COVID-19 pandemic. We represent its circulation as a network of monetary flow among the 40,000 Sarafu users. Network flow analysis reveals that circulation was highly modular, geographically localized, and occurring among users with diverse livelihoods. Across localized sub-populations, network cycle analysis supports the intuitive notion that circulation requires cycles. Moreover, the sub-networks underlying circulation are consistently degree disassortative and we find evidence of preferential attachment. Community-based institutions often take on the role of local hubs, and network centrality measures confirm the importance of early adopters and of women’s participation. This work demonstrates that networks of monetary flow enable the study of circulation within currency systems at a striking level of detail, and our findings can be used to inform the development of community currencies in marginalized areas.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceria, A.; Wang, H.
Temporal-topological properties of higher-order evolving networks Journal Article
In: Scientific Reports, vol. 13, no. 1, pp. 5885, 2023, ISSN: 2045-2322, (Publisher: Nature Publishing Group).
@article{ceria_temporal-topological_2023,
title = {Temporal-topological properties of higher-order evolving networks},
author = {A. Ceria and H. Wang},
url = {https://www.nature.com/articles/s41598-023-32253-9},
doi = {10.1038/s41598-023-32253-9},
issn = {2045-2322},
year = {2023},
date = {2023-04-01},
urldate = {2024-05-13},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {5885},
abstract = {Human social interactions are typically recorded as time-specific dyadic interactions, and represented as evolving (temporal) networks, where links are activated/deactivated over time. However, individuals can interact in groups of more than two people. Such group interactions can be represented as higher-order events of an evolving network. Here, we propose methods to characterize the temporal-topological properties of higher-order events to compare networks and identify their (dis)similarities. We analyzed 8 real-world physical contact networks, finding the following: (a) Events of different orders close in time tend to be also close in topology; (b) Nodes participating in many different groups (events) of a given order tend to involve in many different groups (events) of another order; Thus, individuals tend to be consistently active or inactive in events across orders; (c) Local events that are close in topology are correlated in time, supporting observation (a). Differently, in 5 collaboration networks, observation (a) is almost absent; Consistently, no evident temporal correlation of local events has been observed in collaboration networks. Such differences between the two classes of networks may be explained by the fact that physical contacts are proximity based, in contrast to collaboration networks. Our methods may facilitate the investigation of how properties of higher-order events affect dynamic processes unfolding on them and possibly inspire the development of more refined models of higher-order time-varying networks.},
note = {Publisher: Nature Publishing Group},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Panchendrarajan, R.; Saxena, A.
Topic-based influential user detection: a survey Journal Article
In: Applied Intelligence, vol. 53, no. 5, pp. 5998–6024, 2023, ISSN: 1573-7497.
@article{panchendrarajan_topic-based_2023,
title = {Topic-based influential user detection: a survey},
author = {R. Panchendrarajan and A. Saxena},
url = {https://doi.org/10.1007/s10489-022-03831-7},
doi = {10.1007/s10489-022-03831-7},
issn = {1573-7497},
year = {2023},
date = {2023-03-01},
urldate = {2024-05-13},
journal = {Applied Intelligence},
volume = {53},
number = {5},
pages = {5998–6024},
abstract = {Online Social networks have become an easy means of communication for users to share their opinion on various topics, including breaking news, public events, and products. The content posted by a user can influence or affect other users, and the users who could influence or affect a high number of users are called influential users. Identifying such influential users has a wide range of applications in the field of marketing, including product advertisement, recommendation, and brand evaluation. However, the users’ influence varies in different topics, and hence a tremendous interest has been shown towards identifying topic-based influential users over the past few years. Topic-level information in the content posted by the users can be used in various stages of the topic-based influential user detection (IUD) problem, including data gathering, construction of influence network, quantifying the influence between two users, and analyzing the impact of the detected influential user. This has opened up a wide range of opportunities to utilize the existing techniques to model and analyze the topic-level influence in online social networks. In this paper, we perform a comprehensive study of existing techniques used to infer the topic-based influential users in online social networks. We present a detailed review of these approaches in a taxonomy while highlighting the challenges and limitations associated with each technique. Moreover, we perform a detailed study of different evaluation techniques used in the literature to overcome the challenges that arise in evaluating topic-based IUD approaches. Furthermore, closely related research topics and open research questions in topic-based IUD are discussed to provide a deep understanding of the literature and future directions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
McNeil, M.; Mattsson, C. E. S.; Takes, F. W.; Bogdanov, P.
CADENCE: Community-Aware Detection of Dynamic Network States Proceedings Article
In: Proceedings of the 2023 SIAM International Conference on Data Mining (SDM), pp. 1–9, Society for Industrial and Applied Mathematics, 2023.
@inproceedings{mcneil_cadence_2023,
title = {CADENCE: Community-Aware Detection of Dynamic Network States},
author = {M. McNeil and C. E. S. Mattsson and F. W. Takes and P. Bogdanov},
url = {https://epubs.siam.org/doi/abs/10.1137/1.9781611977653.ch1},
doi = {10.1137/1.9781611977653.ch1},
year = {2023},
date = {2023-01-01},
urldate = {2024-04-08},
booktitle = {Proceedings of the 2023 SIAM International Conference on Data Mining (SDM)},
pages = {1–9},
publisher = {Society for Industrial and Applied Mathematics},
series = {Proceedings},
abstract = {Dynamic interaction data is often aggregated in a sequence of network snapshots before being employed in downstream analysis. The two common ways of defining network snapshots are i) a fixed time interval or ii) fixed number of interactions per snapshot. The choice of aggregation has a significant impact on subsequent analysis, and it is not trivial to select one approach over another for a given dataset. More importantly assuming snapshot regularity is data-agnostic and may be at odds with the underlying interaction dynamics.
To address these challenges, we propose a method for community-aware detection of network states (CADENCE) based on the premise of stable interaction time-frames within network communities. We simultaneously detect network communities and partition the global interaction activity into scale-adaptive snapshots where the level of interaction within communities remains stable. We model a temporal network as a node-node-time tensor and use a structured canonical polyadic decomposition with a piece-wise constant temporal factor to iteratively identify communities and their activity levels. We demonstrate that transitions between network snapshots learned by CADENCE constitute network change points of better quality than those predicted by state-of-the-art network change point detectors. Furthermore, the network structure within individual snapshots reflects ground truth communities better than baselines for adaptive tensor granularity. Through a case study on a real-world Reddit dataset, we showcase the interpretability of CADENCE motivated snapshots as periods separated by significant events.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
To address these challenges, we propose a method for community-aware detection of network states (CADENCE) based on the premise of stable interaction time-frames within network communities. We simultaneously detect network communities and partition the global interaction activity into scale-adaptive snapshots where the level of interaction within communities remains stable. We model a temporal network as a node-node-time tensor and use a structured canonical polyadic decomposition with a piece-wise constant temporal factor to iteratively identify communities and their activity levels. We demonstrate that transitions between network snapshots learned by CADENCE constitute network change points of better quality than those predicted by state-of-the-art network change point detectors. Furthermore, the network structure within individual snapshots reflects ground truth communities better than baselines for adaptive tensor granularity. Through a case study on a real-world Reddit dataset, we showcase the interpretability of CADENCE motivated snapshots as periods separated by significant events.
Boekhout, H. D.; Blokland, A. A. J.; Takes, F. W.
A large-scale longitudinal structured dataset of the dark web cryptomarket Evolution (2014-2015) Miscellaneous
2023, (_eprint: 2311.11878).
@misc{boekhout_large-scale_2023,
title = {A large-scale longitudinal structured dataset of the dark web cryptomarket Evolution (2014-2015)},
author = {H. D. Boekhout and A. A. J. Blokland and F. W. Takes},
doi = {10.48550/arXiv.2311.11878},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
publisher = {arXiv},
abstract = {Dark Web Marketplaces (DWM) facilitate the online trade of illicit goods. Due to the illicit nature of these marketplaces, quality datasets are scarce and difficult to produce. The Dark Net Market archives (2015) presented raw scraped source files crawled from a selection of DWMs, including Evolution. Here, we present, specifically for the Evolution DWM, a structured dataset extracted from Dark Net Market archive data. Uniquely, many of the data quality issues inherent to crawled data are resolved. The dataset covers over 500 thousand forum posts and over 80 thousand listings, providing data on forums, topics, posts, forum users, market vendors, listings, and more. Additionally, we present temporal weighted communication networks extracted from this data. The presented dataset provides easy access to a high quality DWM dataset to facilitate the study of criminal behaviour and communication on such DWMs, which may provide a relevant source of knowledge for researchers across disciplines, from social science to law to network science.},
note = {_eprint: 2311.11878},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Boekhout, H. D.; Blokland, A. A. J.; Takes, F. W.
Early warning signals for predicting cryptomarket vendor success using dark net forum networks Miscellaneous
arXiv 2306.16568, 2023, (_eprint: 2306.16568).
@misc{boekhout_early_2023,
title = {Early warning signals for predicting cryptomarket vendor success using dark net forum networks},
author = {H. D. Boekhout and A. A. J. Blokland and F. W. Takes},
url = {https://doi.org/10.48550/arXiv.2306.16568},
doi = {10.48550/arXiv.2306.16568},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
publisher = {arXiv},
abstract = {In this work we focus on identifying key players in dark net cryptomarkets that facilitate online trade of illegal goods. Law enforcement aims to disrupt criminal activity conducted through these markets by targeting key players vital to the market's existence and success. We particularly focus on detecting successful vendors responsible for the majority of illegal trade. Our methodology aims to uncover whether the task of key player identification should center around plainly measuring user and forum activity, or that it requires leveraging specific patterns of user communication. We focus on a large-scale dataset from the Evolution cryptomarket, which we model as an evolving communication network. While user and forum activity measures are useful for identifying the most successful vendors, we find that betweenness centrality additionally identifies those with lesser activity in the network. But more importantly, analyzing the forum data over time, we find evidence that attaining a high betweenness score comes before vendor success. This suggests that the proposed network-driven approach of modelling user communication might prove useful as an early warning signal for key player identification.},
howpublished = {arXiv 2306.16568},
note = {_eprint: 2306.16568},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Su, Z.; Helles, R.; Al-Laith, A.; Veilahti, A.; Saxena, A.; Simonsen, J. G.
Privacy Lost in Online Education: Analysis of Web Tracking Evolution Proceedings Article
In: Yang, X.; Suhartanto, H.; Wang, G.; Wang, B.; Jiang, J.; Li, B.; Zhu, H.; Cui, N. (Ed.): Advanced Data Mining and Applications, pp. 440–455, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-46664-9.
@inproceedings{su_privacy_2023,
title = {Privacy Lost in Online Education: Analysis of Web Tracking Evolution},
author = {Z. Su and R. Helles and A. Al-Laith and A. Veilahti and A. Saxena and J. G. Simonsen},
editor = {X. Yang and H. Suhartanto and G. Wang and B. Wang and J. Jiang and B. Li and H. Zhu and N. Cui},
doi = {10.1007/978-3-031-46664-9_30},
isbn = {978-3-031-46664-9},
year = {2023},
date = {2023-01-01},
booktitle = {Advanced Data Mining and Applications},
pages = {440–455},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Digital tracking poses a significant and multifaceted threat to personal privacy and integrity. Tracking techniques, such as the use of cookies and scripts, are widespread on the World Wide Web and have become more pervasive in the past decade. This paper focuses on the historical analysis of tracking practices specifically on educational websites, which require particular attention due to their often mandatory usage by users, including young individuals who may not adequately assess privacy implications. The paper proposes a framework for comparing tracking activities on a specific domain of websites by contrasting a sample of these sites with a control group consisting of sites with comparable traffic levels, but without a specific functional purpose. This comparative analysis allows us to evaluate the distinctive evolution of tracking on educational platforms against a standard benchmark. Our findings reveal that although educational websites initially demonstrated lower levels of tracking, their growth rate from 2012 to 2021 has exceeded that of the control group, resulting in higher levels of tracking at present. Through our investigation into the expansion of various types of trackers, we suggest that the accelerated growth of tracking on educational websites is partly attributable to the increased use of interactive features, facilitated by third-party services that enable the collection of user data. The paper concludes by proposing ways in which web developers can safeguard their design choices to mitigate user exposure to tracking.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Saxena, A.; Sethiya, N.; Saini, J. S.; Gupta, Y.; Iyengar, S. R. S.
Social Network Analysis of the Caste-Based Reservation System in India Proceedings Article
In: Dinh, T. N.; Li, M. (Ed.): Computational Data and Social Networks, pp. 203–214, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-26303-3.
@inproceedings{saxena_social_2023,
title = {Social Network Analysis of the Caste-Based Reservation System in India},
author = {A. Saxena and N. Sethiya and J. S. Saini and Y. Gupta and S. R. S. Iyengar},
editor = {T. N. Dinh and M. Li},
doi = {10.1007/978-3-031-26303-3_18},
isbn = {978-3-031-26303-3},
year = {2023},
date = {2023-01-01},
booktitle = {Computational Data and Social Networks},
pages = {203–214},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Being as old as human civilization, discrimination based on various grounds such as race, creed, gender, and caste has existed for a long time. To undo the impact of this long-enduring historical discrimination, governments worldwide have adopted various forms of affirmative action, such as positive discrimination, employment equity, and quota system. In India, people are considered to belong to Backward Class (BC) or Forward Class (FC), and the Indian government designed an affirmative action, locally known as the “Reservation" policy, to reduce the discrimination between both groups. Through this affirmative action, the government provides support to people from the backward class (BC). Although being one of the most controversial and frequently debated issues, the reservation system in India lacks rigorous scientific study and analysis. In this paper, we model the dynamics of the reservation system based on the cultural divide among the Indian population using social network analysis. The mathematical model, using the Erdös-Rényi network, shows that the addition of weak ties between the two groups leads to a logarithmic reduction in the social distance. Our experimental simulations establish the claim for the different clans of frequently studied social network models as well as real-world networks. We further show that a small number of links created by the reservation process are adequate for a society to live in harmony.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Fajardo, S.; Kleijn, J.; Takes, F. W.; Langejans, G. H. J.
Modelling and measuring complexity of traditional and ancient technologies using Petri nets Journal Article
In: PLOS ONE, vol. 17, no. 11, pp. e0278310, 2022, ISSN: 1932-6203, (Publisher: Public Library of Science).
@article{fajardo_modelling_2022,
title = {Modelling and measuring complexity of traditional and ancient technologies using Petri nets},
author = {S. Fajardo and J. Kleijn and F. W. Takes and G. H. J. Langejans},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0278310},
doi = {10.1371/journal.pone.0278310},
issn = {1932-6203},
year = {2022},
date = {2022-11-01},
urldate = {2024-04-08},
journal = {PLOS ONE},
volume = {17},
number = {11},
pages = {e0278310},
abstract = {Technologies and their production systems are used by archaeologists and anthropologists to study complexity of socio-technical systems. However, there are several issues that hamper agreement about what constitutes complexity and how we can systematically compare the complexity of production systems. In this work, we propose a novel approach to assess the behavioural and structural complexity of production systems using Petri nets. Petri nets are well-known formal models commonly used in, for example, biological and business process modelling, as well as software engineering. The use of Petri nets overcomes several obstacles of current approaches in archaeology and anthropology, such as the incompatibility of the intrinsic sequential logic of the available methods with inherently non-sequential processes, and the inability to explicitly model activities and resources separately. We test the proposed Petri net modelling approach on two traditional production systems of adhesives made by Ju/’hoan makers from Nyae, Namibia from Ammocharis coranica and Ozoroa schinzii plants. We run simulations in which we assess the complexity of these two adhesive production systems in detail and show how Petri net dynamics reveal the structural and behavioural complexity of different production scenarios. We show that concurrency may be prevalent in the production system of adhesive technologies and discuss how changes in location during the process may serve to control the behavioural complexity of a production system. The approach presented in this paper paves the way for future systematic visualization, analysis, and comparison of ancient production systems, accounting for the inherent complex, concurrent, and action/resource-oriented aspects of such processes.},
note = {Publisher: Public Library of Science},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Beule, F. De; Elia, S.; Garcia-Bernardo, J.; Heemskerk, E. M.; Jaklič, A.; Takes, F. W.; Zdziarski, M.
Proximity at a distance: The relationship between foreign subsidiary co-location and MNC headquarters board interlock formation Journal Article
In: International Business Review, vol. 31, no. 4, pp. 101971, 2022, ISSN: 0969-5931.
@article{de_beule_proximity_2022,
title = {Proximity at a distance: The relationship between foreign subsidiary co-location and MNC headquarters board interlock formation},
author = {F. De Beule and S. Elia and J. Garcia-Bernardo and E. M. Heemskerk and A. Jaklič and F. W. Takes and M. Zdziarski},
url = {https://www.sciencedirect.com/science/article/pii/S096959312100189X},
doi = {10.1016/j.ibusrev.2021.101971},
issn = {0969-5931},
year = {2022},
date = {2022-08-01},
urldate = {2024-04-08},
journal = {International Business Review},
volume = {31},
number = {4},
pages = {101971},
abstract = {Corporations seek various relationships, such as board interlocks, with other firms to reduce resource dependencies. The consistent theoretical expectation and empirical finding that physical proximity is an important driver for board interlock formation is seemingly at odds with the emerging and growing literature on transnational board interlock ties. We argue that the effect of proximity on multinational corporation (MNC) board interlock formation can also be attributed to the firms’ internationalization strategy, namely, when they have co-located subsidiaries in foreign markets. We call this “proximity at a distance”. We test our assumptions on a dataset covering almost 43,000 board interlocks among MNC headquarters and their 12 million subsidiary co-location pairs. We confirm that proximity among headquarters increases the odds of interlocking but also find robust evidence that co-located subsidiaries also increase firms’ propensity to interlock, particularly for transnational board interlocks. Our results help provide an explanation for the “paradox of distance” by showing that the interlock between two distant MNCs may be driven by proximity to their foreign subsidiaries. As such, we illustrate how MNCs’ resource-dependent strategic responses can occur at the headquarters level to address uncertainties experienced at the subsidiary level.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Nadiri, Amirhossein; Takes, F. W.
A Large-scale Temporal Analysis of User Lifespan Durability on the Reddit Social Media Platform Proceedings Article
In: Companion Proceedings of the Web Conference 2022, pp. 677–685, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 978-1-4503-9130-6.
@inproceedings{nadiri_large-scale_2022,
title = {A Large-scale Temporal Analysis of User Lifespan Durability on the Reddit Social Media Platform},
author = {Amirhossein Nadiri and F. W. Takes},
url = {https://dl.acm.org/doi/10.1145/3487553.3524699},
doi = {10.1145/3487553.3524699},
isbn = {978-1-4503-9130-6},
year = {2022},
date = {2022-08-01},
urldate = {2024-04-08},
booktitle = {Companion Proceedings of the Web Conference 2022},
pages = {677–685},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
series = {WWW '22},
abstract = {Social media platforms thrive upon the intertwined combination of user-created content and social interaction between these users. In this paper, we aim to understand what early user activity patterns fuel an ultimately durable user lifespan. We do so by analyzing what behavior causes potentially durable contributors to abandon their “social career” at an early stage, despite a strong start. We use a uniquely processed temporal dataset of over 6 billion Reddit user interactions on covering over 14 years, which we make available together with this paper. The temporal data allows us to assess both user content creation activity and the way in which this content is perceived. We do so in three dimensions, being a user’s content a) engagement and perception, b) diversification, and c) contribution. Our experiments reveal that users who leave the platform quickly may initially receive good feedback on their posts, but in time experience a decrease in the perceived quality of their content. Concerning diversification, we find that early departing users focus on fewer content categories in total, but do “jump” between those content categories more frequently, perhaps in an (unsuccessful) search for recognition or a sense of belonging. Third, we see that users who stay with the platform for a more extended period gradually start contributing, whereas early departing users post their first comments relatively quickly. The findings from this paper may prove crucial for better understanding how social media platforms can in an early stage improve the overall user experience and feeling of belonging within the social ecosystem of the platform.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brinkmann, G. G.; Rietveld, K. F. D.; Verbeek, F. J.; Takes, F. W.
Real-time interactive visualization of large networks on a tiled display system Journal Article
In: Displays, vol. 73, pp. 102164, 2022, ISSN: 0141-9382.
@article{brinkmann_real-time_2022,
title = {Real-time interactive visualization of large networks on a tiled display system},
author = {G. G. Brinkmann and K. F. D. Rietveld and F. J. Verbeek and F. W. Takes},
url = {https://www.sciencedirect.com/science/article/pii/S0141938222000130},
doi = {10.1016/j.displa.2022.102164},
issn = {0141-9382},
year = {2022},
date = {2022-07-01},
urldate = {2024-04-08},
journal = {Displays},
volume = {73},
pages = {102164},
abstract = {This paper introduces a methodology for visualizing large real-world (social) network data on a high-resolution tiled display system. Advances in network drawing algorithms enabled real-time visualization and interactive exploration of large real-world networks. However, visualization on a typical desktop monitor remains challenging due to the limited amount of screen space and ever increasing size of real-world datasets. To solve this problem, we propose an integrated approach that employs state-of-the-art network visualization algorithms on a tiled display system consisting of multiple screens. Key to our approach is to use the machine’s graphics processing units (GPUs) to their fullest extent, in order to ensure an interactive setting with real-time visualization. To realize this, we extended a recent GPU-based implementation of a force-directed graph layout algorithm to multiple GPUs and combined this with a distributed rendering approach in which each graphics card in the tiled display system renders precisely the part of the network to be displayed on the monitors attached to it. Our evaluation of the approach on a 12-screen 25 megapixels tiled display system with three GPUs, demonstrates interactive performance at 60 frames per second for real-world networks with tens of thousands of nodes and edges. This constitutes a performance improvement of approximately 4 times over a single GPU implementation. All the software developed to implement our tiled visualization approach, including the multi-GPU network layout, rendering, display and interaction components, are made available as open-source software.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bellotti, E.; Czerniawska, D.; Everett, M. G.; Guadalupi, L.
Gender inequalities in research funding: Unequal network configurations, or unequal network returns? Journal Article
In: Social Networks, vol. 70, pp. 138–151, 2022, ISSN: 0378-8733.
@article{bellotti_gender_2022,
title = {Gender inequalities in research funding: Unequal network configurations, or unequal network returns?},
author = {E. Bellotti and D. Czerniawska and M. G. Everett and L. Guadalupi},
url = {https://www.sciencedirect.com/science/article/pii/S0378873321001167},
doi = {10.1016/j.socnet.2021.12.007},
issn = {0378-8733},
year = {2022},
date = {2022-07-01},
urldate = {2024-05-13},
journal = {Social Networks},
volume = {70},
pages = {138–151},
abstract = {Despite longstanding discussions and consequent improvements of gender representation in academia, the number of women working in academic research, their performance and their recognition still indicate the persistence of gender inequalities. Of the various mechanisms that may drive such inequality the one that relates the structure of research collaborative networks to the rewards that these networks facilitate is intriguing. If social networks play a role in academic performances and women are under-recognized compared to men, what are the mechanisms that relate networks to gender inequalities in academia? Do men and women obtain equal returns from similar network structure, or do women have to build different networks to obtain similar returns? This article contributes to the theoretical debate that aims to disentangle the mechanisms by which social networks may affect gendered success. We do so by looking at networks of collaborations to research projects in 10 years of public funding in Italy. We use a permutation t-test analysis to see if men and women, who are equally successful in getting funded for a high number of projects, build equal or unequal collaborative networks, and if these differences vary across disciplines with more or less tenured women. Our methodological contribution proposes an innovative way of measuring brokerage and constraint in one mode projections of bipartite networks, highly clustered and valued, and measures gender homophily by taking into account the higher or lower number of women available for collaborations in different disciplinary sectors. We find that men and women build similar collaborative networks; we also find that the more women are tenured in a discipline, the more they occupy higher ranks and lead research projects. When women’s presence and high-rank roles increase, we see that men’s collaborative networks are significantly more heterophilous, although women do not seem to significantly increase their collaborations with other women. We then regress the individual measures of network structure and composition against the average amount of money scholars receive per project and we interact each measure with gender, to observe if women obtain similar returns to men by building equal collaborative networks. Our results show that despite the similarities in network configurations in nearly all disciplines women still receive less funding than men. This suggests that an increased representation of women in academic fields is not enough to reduce inequalities. Our results are not directly generalizable to other international contexts, but the mechanisms that favor academic returns and exacerbate inequalities, and our methodological approach, are portable to other scientific and organizational contexts where collaborations are key to produce success.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
van Kuppevelt, D. E.; Bakhshi, R.; Heemskerk, E. M.; Takes, F. W.
Community membership consistency applied to corporate board interlock networks Journal Article
In: Journal of Computational Social Science, vol. 5, no. 1, pp. 841–860, 2022, ISSN: 2432-2725.
@article{kuppevelt_community_2022,
title = {Community membership consistency applied to corporate board interlock networks},
author = {D. E. van Kuppevelt and R. Bakhshi and E. M. Heemskerk and F. W. Takes},
url = {https://doi.org/10.1007/s42001-021-00145-5},
doi = {10.1007/s42001-021-00145-5},
issn = {2432-2725},
year = {2022},
date = {2022-05-01},
urldate = {2024-04-08},
journal = {Journal of Computational Social Science},
volume = {5},
number = {1},
pages = {841–860},
abstract = {Community detection is a well-established method for studying the meso-scale structure of social networks. Applying a community detection algorithm results in a division of a network into communities that is often used to inspect and reason about community membership of specific nodes. This micro-level interpretation step of community structure is a crucial step in typical social science research. However, the methodological caveat in this step is that virtually all modern community detection methods are non-deterministic and based on randomization and approximated results. This needs to be explicitly taken into consideration when reasoning about community membership of individual nodes. To do so, we propose a metric of community membership consistency, that provides node-level insights in how reliable the placement of that node into a community really is. In addition, it enables us to distinguish the community core members of a community. The usefulness of the proposed metrics is demonstrated on corporate board interlock networks, in which weighted links represent shared senior level directors between firms. Results suggest that the community structure of global business groups is centered around persistent communities consisting of core countries tied by geographical and cultural proximity. In addition, we identify fringe countries that appear to associate with a number of different global business communities.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bruin, G. J.; Barata, A. Pereira; Herik, H. J.; Takes, F. W.; Veenman, C. J.
Fair automated assessment of noncompliance in cargo ship networks Journal Article
In: EPJ Data Sci., vol. 11, no. 1, pp. 13, 2022.
@article{de_bruin_g_j_fair_2022,
title = {Fair automated assessment of noncompliance in cargo ship networks},
author = {G. J. Bruin and A. Pereira Barata and H. J. Herik and F. W. Takes and C. J. Veenman},
url = {https://doi.org/10.1140/epjds/s13688-022-00326-w},
doi = {10.1140/epjds/s13688-022-00326-w},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {EPJ Data Sci.},
volume = {11},
number = {1},
pages = {13},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Meertens, Q. A.; Diks, C. G. H.; Herik, H. J.; Takes, F. W.
Improving the Output Quality of Official Statistics Based on Machine Learning Algorithms Journal Article
In: Journal of Official Statistics, vol. 38, no. 2, pp. 485–508, 2022, (_eprint: https://doi.org/10.2478/jos-2022-0023).
@article{meertens_improving_2022,
title = {Improving the Output Quality of Official Statistics Based on Machine Learning Algorithms},
author = {Q. A. Meertens and C. G. H. Diks and H. J. Herik and F. W. Takes},
url = {https://doi.org/10.2478/jos-2022-0023},
doi = {10.2478/jos-2022-0023},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Journal of Official Statistics},
volume = {38},
number = {2},
pages = {485–508},
abstract = {National statistical institutes currently investigate how to improve the output quality of official statistics based on machine learning algorithms. A key issue is concept drift, that is, when the joint distribution of independent variables and a dependent (categorical) variable changes over time. Under concept drift, a statistical model requires regular updating to prevent it from becoming biased. However, updating a model asks for additional data, which are not always available. An alternative is to reduce the bias by means of bias correction methods. In the article, we focus on estimating the proportion (base rate) of a category of interest and we compare two popular bias correction methods: the misclassification estimator and the calibration estimator. For prior probability shift (a specific type of concept drift), we investigate the two methods analytically as well as numerically. Our analytical results are expressions for the bias and variance of both methods. As numerical result, we present a decision boundary for the relative performance of the two methods. Our results provide a better understanding of the effect of prior probability shift on output quality. Consequently, we may recommend a novel approach on how to use machine learning algorithms in the context of official statistics.},
note = {_eprint: https://doi.org/10.2478/jos-2022-0023},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Boekhout, H. D.; Heemskerk, E. M.; Takes, F. W.
Evolution of the World Stage of Global Science from a Scientific City Network Perspective Proceedings Article
In: Benito, R. M.; Cherifi, C.; Cherifi, H.; Moro, E.; Rocha, L. M.; Sales-Pardo, M. (Ed.): Complex Networks & Their Applications X, pp. 142–154, Springer International Publishing, Cham, 2022, ISBN: 978-3-030-93409-5.
@inproceedings{boekhout_evolution_2022,
title = {Evolution of the World Stage of Global Science from a Scientific City Network Perspective},
author = {H. D. Boekhout and E. M. Heemskerk and F. W. Takes},
editor = {R. M. Benito and C. Cherifi and H. Cherifi and E. Moro and L. M. Rocha and M. Sales-Pardo},
doi = {10.1007/978-3-030-93409-5_13},
isbn = {978-3-030-93409-5},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Complex Networks & Their Applications X},
pages = {142–154},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {This paper investigates the stability and evolution of the world stage of global science at the city level by analyzing changes in co-authorship network centrality rankings over time. Driven by the problem that there exists no consensus in the literature on how the spatial unit “city” should be defined, we first propose a new approach to delineate so-called scientific cities. On a high-quality Web of Science dataset of 21.5 million publications over the period 2008–2020, we study changes in centrality rankings of subsequent 3-year time-slices of scientific city co-authorship networks at various levels of impact. We find that, over the years, the world stage of global science has become more stable. Additionally, by means of a comparison with degree respecting rewired networks we reveal how new co-authorships between authors from previously unconnected cities more often connect ‘close’ cities in the network periphery.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Valeeva, D.; Takes, F. W.; Heemskerk, E. M.
Beaten paths towards the transnational corporate elite Journal Article
In: International Sociology, vol. 37, no. 1, pp. 97–123, 2022, ISSN: 0268-5809, (Publisher: SAGE Publications Ltd).
@article{valeeva_beaten_2022,
title = {Beaten paths towards the transnational corporate elite},
author = {D. Valeeva and F. W. Takes and E. M. Heemskerk},
url = {https://doi.org/10.1177/02685809211051661},
doi = {10.1177/02685809211051661},
issn = {0268-5809},
year = {2022},
date = {2022-01-01},
urldate = {2024-04-08},
journal = {International Sociology},
volume = {37},
number = {1},
pages = {97–123},
abstract = {The transnationalization of economic activities has fundamentally altered the world. One of the consequences that has intrigued scholars is the formation of a transnational corporate elite. While the literature tends to focus on the topology of the transnational board interlock network, little is known about its driving mechanisms. This article asks the question: what are the trajectories that corporate elites follow in driving the expansion of this network? To answer this, the authors employ a novel approach that models the transnationalization of elites using their board appointment sequences. The findings show that there are six transnationalization trajectories corporate elites follow to expand the network. The authors argue that while the transnational elite network appears as a global social structure, its generating mechanisms are regionally organized. This corroborates earlier findings on the fragmentation of the global network of corporate control, but also provides insights into how this network was shaped over time.},
note = {Publisher: SAGE Publications Ltd},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kapadia, P.; Saxena, A.; Das, B.; Pei, Y.; Pechenizkiy, M.
Co-Attention Based Multi-contextual Fake News Detection Proceedings Article
In: Pacheco, D.; Teixeira, A. S.; Barbosa, H.; Menezes, R.; Mangioni, G. (Ed.): Complex Networks XIII, pp. 83–95, Springer International Publishing, Cham, 2022, ISBN: 978-3-031-17658-6.
@inproceedings{kapadia_co-attention_2022,
title = {Co-Attention Based Multi-contextual Fake News Detection},
author = {P. Kapadia and A. Saxena and B. Das and Y. Pei and M. Pechenizkiy},
editor = {D. Pacheco and A. S. Teixeira and H. Barbosa and R. Menezes and G. Mangioni},
doi = {10.1007/978-3-031-17658-6_7},
isbn = {978-3-031-17658-6},
year = {2022},
date = {2022-01-01},
booktitle = {Complex Networks XIII},
pages = {83–95},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {Kapadia, ParitoshSaxena, AkratiDas, BhaskarjyotiPei, YulongPechenizkiy, MykolaIn recent years, the propagation of fake news on social media has emerged as a major challenge. Several approaches have been proposed to detect fake news on social media using the content of the microblogs and news-propagation network. In this work, we propose a method, named FND-NUP (Fake News Detection with News content, User profiles and Propagation networks), to detect fake news using users’ profile features, fake news content, and the propagation network. We use graph attention networks (GAT) to learn users representations using users’ profile features and news propagation networks. Next, we use co-attention technique to simultaneously learn the graph attention and the news content attention vectors, that will subsequently use to detect fake news. The derived co-attention weights allow our framework to provide the propagation graph-level and news article word-level explanations, respectively. We demonstrate that FND-NUP method outperforms state-of-the-art propagation-based and content-based fake news detection approaches.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Boekhout, H. D.; Traag, V. A.; Takes, F. W.
Investigating scientific mobility in co-authorship networks using multilayer temporal motifs Journal Article
In: Network Science, vol. 9, no. 3, pp. 354–386, 2021, ISSN: 2050-1242, 2050-1250.
@article{boekhout_investigating_2021,
title = {Investigating scientific mobility in co-authorship networks using multilayer temporal motifs},
author = {H. D. Boekhout and V. A. Traag and F. W. Takes},
url = {https://www.cambridge.org/core/journals/network-science/article/investigating-scientific-mobility-in-coauthorship-networks-using-multilayer-temporal-motifs/4A8730DC440D7BE7EF4E6306AEE6ACBD#},
doi = {10.1017/nws.2021.12},
issn = {2050-1242, 2050-1250},
year = {2021},
date = {2021-09-01},
urldate = {2024-04-08},
journal = {Network Science},
volume = {9},
number = {3},
pages = {354–386},
abstract = {This paper introduces a framework for understanding complex temporal interaction patterns in large-scale scientific collaboration networks. In particular, we investigate how two key concepts in science studies, scientific collaboration and scientific mobility, are related and possibly differ between fields. We do so by analyzing multilayer temporal motifs: small recurring configurations of nodes and edges.Driven by the problem that many papers share the same publication year, we first provide a methodological contribution: an efficient counting algorithm for multilayer temporal motifs with concurrent edges. Next, we introduce a systematic categorization of the multilayer temporal motifs, such that each category reflects a pattern of behavior relevant to scientific collaboration and mobility. Here, a key question concerns the causal direction: does mobility lead to collaboration or vice versa? Applying this framework to scientific collaboration networks extracted from Web of Science (WoS) consisting of up to 7.7 million nodes (authors) and 94 million edges (collaborations), we find that international collaboration and international mobility reciprocally influence one another. Additionally, we find that Social sciences & Humanities (SSH) scholars co-author to a greater extent with authors at a distance, while Mathematics & Computer science (M&C) scholars tend to continue to collaborate within the established knowledge network and organization.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bruin, G. J.; Veenman, C. J.; Herik, H. J.; Takes, F. W.
Supervised temporal link prediction in large-scale real-world networks Journal Article
In: Social Network Analysis and Mining, vol. 11, no. 1, pp. 80, 2021, ISSN: 1869-5469.
@article{de_bruin_supervised_2021,
title = {Supervised temporal link prediction in large-scale real-world networks},
author = {G. J. Bruin and C. J. Veenman and H. J. Herik and F. W. Takes},
url = {https://doi.org/10.1007/s13278-021-00787-3},
doi = {10.1007/s13278-021-00787-3},
issn = {1869-5469},
year = {2021},
date = {2021-08-01},
urldate = {2024-04-08},
journal = {Social Network Analysis and Mining},
volume = {11},
number = {1},
pages = {80},
abstract = {Link prediction is a well-studied technique for inferring the missing edges between two nodes in some static representation of a network. In modern day social networks, the timestamps associated with each link can be used to predict future links between so-far unconnected nodes. In these so-called temporal networks, we speak of temporal link prediction. This paper presents a systematic investigation of supervised temporal link prediction on 26 temporal, structurally diverse, real-world networks ranging from thousands to a million nodes and links. We analyse the relation between global structural properties of each network and the obtained temporal link prediction performance, employing a set of well-established topological features commonly used in the link prediction literature. We report on four contributions. First, using temporal information, an improvement of prediction performance is observed. Second, our experiments show that degree disassortative networks perform better in temporal link prediction than assortative networks. Third, we present a new approach to investigate the distinction between networks modelling discrete events and networks modelling persistent relations. Unlike earlier work, our approach utilises information on all past events in a systematic way, resulting in substantially higher link prediction performance. Fourth, we report on the influence of the temporal activity of the node or the edge on the link prediction performance, and show that the performance differs depending on the considered network type. In the studied information networks, temporal information on the node appears most important. The findings in this paper demonstrate how link prediction can effectively be improved in temporal networks, explicitly taking into account the type of connectivity modelled by the temporal edge. More generally, the findings contribute to a better understanding of the mechanisms behind the evolution of networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Boekhout, H. D.; Weijden, I.; Waltman, L.
Gender differences in scientific careers: A large-scale bibliometric analysis Miscellaneous
2021, (arXiv:2106.12624 [cs]).
@misc{boekhout_gender_2021,
title = {Gender differences in scientific careers: A large-scale bibliometric analysis},
author = {H. D. Boekhout and I. Weijden and L. Waltman},
url = {http://arxiv.org/abs/2106.12624},
doi = {10.48550/arXiv.2106.12624},
year = {2021},
date = {2021-06-01},
urldate = {2024-05-14},
publisher = {arXiv},
abstract = {We present a large-scale bibliometric analysis of gender differences in scientific careers, covering all scientific disciplines and a large number of countries worldwide. We take a longitudinal perspective in which we trace the publication careers of almost six million male and female researchers in the period 1996-2018. Our analysis reveals an increasing trend in the percentage of women starting a career as publishing researcher, from 33% in 2000 to about 40% in recent years. Looking at cohorts of male and female researchers that started their publication career in the same year, we find that women seem to be somewhat less likely to continue their career as publishing researcher than men, but the difference is small. We also observe that men produce on average between 15% and 20% more publications than women. Moreover, in biomedical disciplines, men are about 25% more likely than women to be last author of a publication, suggesting that men tend to have more senior roles than women. Compared with cross-sectional studies, our longitudinal analysis has the advantage of providing a more in-depth understanding of gender imbalances among authors of scientific publications.},
note = {arXiv:2106.12624 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Publications from 2020 and earlier can be found here.