Skip to main content
Code Review

Return to Question

replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/
Source Link

Inspired by a comment on this question this question, I thought I'd exercise my C skills by implementing something like it using a priority queue.

Inspired by a comment on this question, I thought I'd exercise my C skills by implementing something like it using a priority queue.

Inspired by a comment on this question, I thought I'd exercise my C skills by implementing something like it using a priority queue.

Tweeted twitter.com/StackCodeReview/status/742559796260438016
Source Link
Edward
  • 67.2k
  • 4
  • 120
  • 284

Longest lines using priority queue

Inspired by a comment on this question, I thought I'd exercise my C skills by implementing something like it using a priority queue.

The idea is to be able to invoke the program like this:

./longline 10 < /usr/share/dict/linux.words

And get it to print the 10 longest lines in the linux.words file. I chose to implement the program using getline() which is a handy POSIX-2008 function that automatically allocates memory for the line it reads and can reuse the buffer it allocates. It only reallocates when a new line is longer. On my machine, using this function (defined in stdio.h) requires the #define _GNU_SOURCE that you see at the top of the file.

The goals

The goal was primarily to implement something like a priority queue using an unordered array as a heap. I write "something like" a priority queue because this has one particular feature -- it only keeps the lowest priority items if a new item is added when the queue is already full.

I also wanted to minimize memory allocations and reallocations. Using it on the linux.words file mentioned above (which has 479828 words in it, one per line), valgrind reports:

total heap usage: 108 allocs, 108 frees, 13,136 bytes allocated

which is satisfyingly parsimonious for a file that's 4953680 bytes long.

Notes

The first entry in the array items[0] is generally not used; the top of the queue is located at items[1]. In particular, items[0] is only used as a convenient scratch space within pq_insert().

Questions

I'm interested in a general review, and in particular anywhere the code could be better written or more clearly written.

I'm also not entirely happy with the creation of the output_queue to reverse the order of the items in the queue. Is there a better way to do that?

longline.c

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
typedef struct {
 int priority;
 char *data;
} item;
/*
 * Swap the contents of two items.
 */
void item_swap(item * a, item * b)
{
 // swap the two passed nodes
 int pri = a->priority;
 char *name = a->data;
 a->priority = b->priority;
 a->data = b->data;
 b->priority = pri;
 b->data = name;
}
/*
 * Item comparison functon.
 *
 * Returns true iff a is lower priority than b.
 */
bool item_less(const item * a, const item * b)
{
 return a->priority < b->priority;
}
typedef struct {
 size_t size;
 size_t last_used;
 item items[1];
} priority_queue;
/*
 * Dump the contents of a priority queue.
 *
 * Primarily intended for troubleshooting.
 */
void pq_dump(const priority_queue * pqueue)
{
 if (pqueue == NULL)
 return;
 for (size_t i = 1; i <= pqueue->last_used; ++i) {
 printf("%lu: %d, %s\n", i, pqueue->items[i].priority,
 pqueue->items[i].data);
 }
}
/*
 * Create a fixed size priority queue and initialize.
 */
priority_queue *pq_create(size_t size)
{
 priority_queue *pq = malloc(sizeof(priority_queue) + sizeof(item) * (size + 1));
 if (pq) {
 pq->size = size;
 pq->last_used = 0;
 }
 return pq;
}
/*
 * Free all allocated memory for this queue.
 */
void pq_free(priority_queue * pqueue)
{
 if (pqueue) {
 for (size_t i = pqueue->last_used; i; --i) {
 free(pqueue->items[i].data);
 }
 free(pqueue);
 pqueue = NULL;
 }
}
/*
 * Readjust this priority queue's top three nodes,
 * returning false if no adjustment was needed.
 */
bool pq_adjust(priority_queue * pqueue, size_t * i)
{
 if (*i * 2 + 1 <= pqueue->last_used) {
 // handle case of two children
 size_t smallerchild =
 item_less(&pqueue->items[*i * 2],
 &pqueue->items[*i * 2 + 1]) ? *i * 2 : *i * 2 + 1;
 if (item_less(&pqueue->items[smallerchild], &pqueue->items[*i])) {
 item_swap(&pqueue->items[smallerchild], &pqueue->items[*i]);
 *i = smallerchild;
 return true;
 }
 } else if (*i * 2 <= pqueue->last_used) {
 // handle case of only one child
 size_t smallerchild = *i * 2;
 if (item_less(&pqueue->items[smallerchild], &pqueue->items[*i])) {
 item_swap(&pqueue->items[smallerchild], &pqueue->items[*i]);
 *i = smallerchild;
 return true;
 }
 }
 return false;
}
/*
 * returns true iff item was added successfully.
 *
 * Note that if a new item is a LOWER priority
 * than the current head of queue, the new item
 * will be added, replacing the current head of queue.
 * This is opposite the usual convention for a priority queue,
 * but intentional behavior here.
 */
bool pq_insert(priority_queue * pqueue, int pri, char *name)
{
 if (pqueue == NULL)
 return false;
 if (pqueue->size == pqueue->last_used) {
 // handle full queue
 // only insert if current top priority is LESS THAN new node
 pqueue->items[0].priority = pri;
 if (item_less(&pqueue->items[1], &pqueue->items[0])) {
 pqueue->items[1].priority = pri;
 free(pqueue->items[1].data);
 pqueue->items[1].data = name;
 for (size_t i = 1; pq_adjust(pqueue, &i);) {
 }
 } else {
 return false;
 }
 } else {
 size_t i = ++pqueue->last_used;
 pqueue->items[i].priority = pri;
 pqueue->items[i].data = name;
 // assure heap order property holds
 while (i / 2 && item_less(&pqueue->items[i], &pqueue->items[i / 2])) {
 item_swap(&pqueue->items[i], &pqueue->items[i / 2]);
 i /= 2; // go to parent
 }
 }
 return true;
}
/*
 * peek at the highest priority item in queue
 */
const item *pq_peek(const priority_queue * pqueue)
{
 if (pqueue == NULL || pqueue->last_used == 0)
 return NULL;
 return &pqueue->items[1];
}
/*
 * remove the top node
 */
void pq_pop(priority_queue * pqueue)
{
 if (pqueue == NULL || pqueue->last_used == 0)
 return;
 // move last node to top
 pqueue->items[1].priority = pqueue->items[pqueue->last_used].priority;
 free(pqueue->items[1].data);
 pqueue->items[1].data = pqueue->items[pqueue->last_used].data;
 --pqueue->last_used;
 for (size_t i = 1; pq_adjust(pqueue, &i);) {
 }
}
/*
 * remove top node, but return rather than freeing the data.
 */
char *pq_pop_no_delete(priority_queue * pqueue)
{
 if (pqueue == NULL || pqueue->last_used == 0)
 return NULL;
 // move last node to top
 pqueue->items[1].priority = pqueue->items[pqueue->last_used].priority;
 char *retval = pqueue->items[1].data;
 pqueue->items[1].data = pqueue->items[pqueue->last_used].data;
 --pqueue->last_used;
 for (size_t i = 1; pq_adjust(pqueue, &i);) {
 }
 return retval;
}
/*
 * Read the input stream, saving and printing the n longest lines
 * in descending size.
 */
int main(int argc, char *argv[])
{
 if (argc != 2) {
 printf
 ("Usage: %s n\nwill show the n longest lines in the input stream\n",
 argv[0]);
 return 0;
 }
 int n = atoi(argv[1]);
 if (n < 0) {
 return 0;
 }
 priority_queue *pqueue = pq_create(n);
 char *line = NULL;
 size_t len = 0;
 ssize_t read;
 while ((read = getline(&line, &len, stdin)) != -1) {
 if (pq_insert(pqueue, read, line)) {
 line = NULL;
 len = 0;
 }
 }
 free(line);
 priority_queue *output_queue = pq_create(n);
 const item *it;
 while ((it = pq_peek(pqueue)) != NULL) {
 pq_insert(output_queue, -it->priority, it->data);
 pq_pop_no_delete(pqueue);
 }
 pq_free(pqueue);
 while ((it = pq_peek(output_queue)) != NULL) {
 printf("%s", it->data);
 pq_pop(output_queue);
 }
 pq_free(output_queue);
}
lang-c

AltStyle によって変換されたページ (->オリジナル) /