@inproceedings{sitchinava-spaa08, author = "Lars Arge and Michael T. Goodrich and Mark Nelson and Nodari Sitchinava", title = "Fundamental parallel algorithms for private-cache chip multiprocessors", booktitle = "Proceedings of the 20th ACM Symposium on Parallelism in Algorithms and Architectures", series = "SPAA '08", pages = "197-206", year = 2008, month = 6, doi = "10.1145/1378533.1378573", abstract = "In this paper, we study parallel algorithms for private-cache chip multiprocessors (CMPs), focusing on methods for foundational problems that are scalable with the number of cores. By focusing on private-cache CMPs, we show that we can design efficient algorithms that need no additional assumptions about the way cores are interconnected, for we assume that all inter-processor communication occurs through the memory hierarchy. We study several fundamental problems, including prefix sums, selection, and sorting, which often form the building blocks of other parallel algorithms. Indeed, we present two sorting algorithms, a distribution sort and a mergesort. Our algorithms are asymptotically optimal in terms of parallel cache accesses and space complexity under reasonable assumptions about the relationships between the number of processors, the size of memory, and the size of cache blocks. In addition, we study sorting lower bounds in a computational model, which we call the parallel external-memory (PEM) model, that formalizes the essential properties of our algorithms for private-cache CMPs." }