[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 09/16] sorting and filtering working


MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: arf20 <aruizfernandez05@xxxxxxxxx>

---
 config.c       |   2 +-
 config.h       |   2 +-
 index.c        |  79 +++++++++++++++++++++--
 index.h        |  21 +++++-
 index.htm.tmpl |  69 ++++++++++----------
 main.c         | 171 +++++++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 290 insertions(+), 54 deletions(-)

diff --git a/config.c b/config.c
index d99cc2c..8d05eab 100644
--- a/config.c
+++ b/config.c
@@ -1,7 +1,7 @@
 /*
 
     arfnet2-search: Fast file indexer and search
-    Copyright (C) 2023 arf20 (Ã?ngel Ruiz Fernandez)
+    Copyright (C) 2025 arf20 (Ã?ngel Ruiz Fernandez)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/config.h b/config.h
index b12f226..aeb74d5 100644
--- a/config.h
+++ b/config.h
@@ -1,7 +1,7 @@
 /*
 
     arfnet2-search: Fast file indexer and search
-    Copyright (C) 2023 arf20 (Ã?ngel Ruiz Fernandez)
+    Copyright (C) 2025 arf20 (Ã?ngel Ruiz Fernandez)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/index.c b/index.c
index 3430178..096fe6e 100644
--- a/index.c
+++ b/index.c
@@ -1,7 +1,7 @@
 /*
 
     arfnet2-search: Fast file indexer and search
-    Copyright (C) 2023 arf20 (Ã?ngel Ruiz Fernandez)
+    Copyright (C) 2025 arf20 (Ã?ngel Ruiz Fernandez)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -20,6 +20,7 @@
 
 */
 
+#define _GNU_SOURCE
 #include "index.h"
 
 #include <sys/types.h>
@@ -115,6 +116,67 @@ results_insert(results_t *results, const node_data_t *result)
     results->results[results->size++] = result;
 }
 
+static int
+cmp_results(const void *_r1, const void *_r2, void *arg)
+{
+    const node_data_t *r1 = *(node_data_t**)_r1, *r2 = *(node_data_t**)_r2;
+    sort_type_t sort_type = ((int*)arg)[0];
+    int desc = ((int*)arg)[1];
+
+    int cmp = 0;
+
+    switch (sort_type) {
+    case SORT_NAME:
+        cmp = strcmp(r1->name, r2->name);
+    break;
+    case SORT_PATH:
+        cmp = strcmp(r1->path, r2->path);
+    break;
+    case SORT_MIME:
+        if (!r1->mime)
+            return 0;
+        cmp = strcmp(r1->mime, r2->mime);
+    break;
+    case SORT_SIZE:
+        cmp = r1->stat.st_size - r2->stat.st_size;
+    break;
+    case SORT_TIME:
+        cmp = r1->stat.st_mtime - r2->stat.st_mtime;
+    break;
+    }
+    
+    return !desc ? cmp : -cmp;
+}
+
+void
+results_sort(results_t *results, sort_type_t sort_type, int desc)
+{
+    int arg[2] = { sort_type, desc };
+    qsort_r(results->results, results->size, sizeof(node_data_t*), cmp_results,
+        &arg);
+}
+
+results_t *
+results_filter(results_t *results, const filter_t *filter)
+{
+    results_t *filtered = results_new();
+    for (size_t i = 0; i < results->size; i++) {
+        const node_data_t *n = results->results[i];
+        if (filter->time_low && (n->stat.st_mtime < filter->time_low))
+            continue;
+        if (filter->time_high && (n->stat.st_mtime > filter->time_high))
+            continue;
+        if (filter->size_low && (n->stat.st_size < filter->size_low))
+            continue;
+        if (filter->size_high && (n->stat.st_size > filter->size_high))
+            continue;
+
+        results_insert(filtered, n);
+    }
+    results_destroy(results);
+    return filtered;
+}
+
 void
 results_destroy(results_t *results)
 {
@@ -226,12 +288,18 @@ index_lookup_substr(map_t *index, const char *query,
 }
 
 void
-index_lookup_substr_nocase(map_t *index, const char *query,
+index_lookup_substr_caseinsensitive(map_t *index, const char *query,
     results_t *results)
 {
 
 }
 
+void
+index_lookup_exact(map_t *index, const char *query, results_t *results)
+{
+
+}
+
 void
 index_lookup_regex(map_t *index, const char *query,
     results_t *results)
@@ -248,8 +316,11 @@ index_lookup(map_t *index, lookup_type_t type, const char *query)
     case LOOKUP_SUBSTR:
         index_lookup_substr(index, query, results);
     break;
-    case LOOKUP_SUBSTR_NOCASE:
-        index_lookup_substr_nocase(index, query, results);
+    case LOOKUP_SUBSTR_CASEINSENSITIVE:
+        index_lookup_substr_caseinsensitive(index, query, results);
+    break;
+    case LOOKUP_EXACT:
+        index_lookup_exact(index, query, results);
     break;
     case LOOKUP_REGEX:
         index_lookup_regex(index, query, results);
diff --git a/index.h b/index.h
index 187de94..26ac586 100644
--- a/index.h
+++ b/index.h
@@ -1,7 +1,7 @@
 /*
 
     arfnet2-search: Fast file indexer and search
-    Copyright (C) 2023 arf20 (Ã?ngel Ruiz Fernandez)
+    Copyright (C) 2025 arf20 (Ã?ngel Ruiz Fernandez)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -28,7 +28,8 @@
 
 typedef enum {
     LOOKUP_SUBSTR,
-    LOOKUP_SUBSTR_NOCASE,
+    LOOKUP_SUBSTR_CASEINSENSITIVE,
+    LOOKUP_EXACT,
     LOOKUP_REGEX
 } lookup_type_t;
 
@@ -40,6 +41,19 @@ typedef struct {
 
 typedef struct map_s *index_t;
 
+typedef enum {
+    SORT_NAME,
+    SORT_MIME,
+    SORT_PATH,
+    SORT_SIZE,
+    SORT_TIME
+} sort_type_t;
+
+typedef struct {
+    time_t time_low, time_high;
+    size_t size_low, size_high;
+} filter_t;
+
 typedef struct {
     const node_data_t **results;
     size_t size, capacity;
@@ -50,6 +64,9 @@ void index_deinit();
 index_t index_new(size_t icapacity, const char *root, int examine);
 results_t *index_lookup(index_t index, lookup_type_t type, const char *query);
 void index_destroy(index_t index);
+
+void results_sort(results_t *results, sort_type_t sort_type, int desc);
+results_t *results_filter(results_t *results, const filter_t *filter);
 void results_destroy(results_t *results);
 
 #endif /* _INDEX_H */
diff --git a/index.htm.tmpl b/index.htm.tmpl
index fdb1b58..759625b 100644
--- a/index.htm.tmpl
+++ b/index.htm.tmpl
@@ -10,6 +10,7 @@
 }
 
 .box {
+    margin-bottom: 1em;
     width: 50%;
     min-width: 400px;
     display: block;
@@ -41,6 +42,10 @@
     width: 200px;
 }
 
+.advanced {
+    margin-left: 1em;
+}
+
 .collapse-title {
     font-weight: bold;
 }
@@ -53,6 +58,10 @@
     display: inline-block;
 }
 
+.result-header {
+    margin-bottom: 1em;
+}
+
 .result {
     margin-left: 1em;
     margin-bottom: 1em;
@@ -97,53 +106,45 @@
             <p>Search all of the ARFNET content fast</p>
             <form class="searchform" action="/query" method="get">
                 <div class="box form-inline">
-                    <input class="input" type="text" name="query" value="%s">
+                    <input class="input" type="text" name="q" value="%s">
                     <button type="submit">Search</button><br>
                 </div>
                 <div>
-                    <details>
+                    <details class="advanced">
                         <summary class="collapse-title">Advanced</summary>
-                        <input type="radio" id="substr" name="type" value="substr">
-                        <label for="substr">substring</label>
-                        <input type="radio" id="substr_nocase" name="type" value="substr_nocase">
-                        <label for="substr_nocase">case insensitive substring</label>
-                        <input type="radio" id="exact" name="type" value="exact">
-                        <label for="exact">exact</label>
-                        <input type="radio" id="regex" name="type" value="regex">
-                        <label for="regex">regex</label>
-                    </details>
-                    <details>
-                        <summary class="collapse-title">Filtering</summary>
+                        <p>
+                            <label class="label">Search type</label>
+                            <input type="radio" id="substr" name="t" value="s" checked="checked">
+                            <label for="substr">substring</label>
+                            <input type="radio" id="substr_nocase" name="t" value="i">
+                            <label for="substr_nocase">case insensitive substring</label>
+                            <input type="radio" id="exact" name="t" value="e">
+                            <label for="exact">exact</label>
+                            <input type="radio" id="regex" name="t" value="r">
+                            <label for="regex">regex</label>
+                        </p>
+                        <p>
                             <label class="label" for="mtime_start">Timeframe start</label>
-                            <input type="date" id="mtime_start" name="filter_mtime_start"><br>
+                            <input type="date" id="mtime_start" name="ftl" value="%s"><br>
+                        </p>
+                        <p>
                             <label class="label" for="mtime_end">Timeframe end</label>
-                            <input type="date" id="mtime_end" name="filter_mtime_end"><br>
+                            <input type="date" id="mtime_end" name="fth" value="%s"><br>
+                        </p>
+                        <p>
                             <label class="label" for="size_start">Size lower bound</label>
-                            <input type="text" id="size_start" name="filter_size_start"><br>
+                            <input type="text" id="size_start" name="fsl" value="%s"><br>
+                        </p>
+                        <p>
                             <label class="label" for="size_end">Size upper bound</label>
-                            <input type="text" id="size_end" name="filter_size_end"><br>
-                    </details>
-                    <details>
-                        <summary class="collapse-title">Sorting</summary>
-                        <div class="sort-left">
-                            <input type="radio" name="sort" id="name" value="name">
-                            <label for="name">name</label><br>
-                            <input type="radio" name="sort" id="time" value="time">
-                            <label for="time">time</label><br>
-                            <input type="radio" name="sort" id="size" value="size">
-                            <label for="size">size</label><br>
-                        </div>
-                        <div class="sort-right">
-                            <input type="radio" name="sort_dir" id="asc" value="asc">
-                            <label for="asc">ascending</label><br>
-                            <input type="radio" name="sort_dir" id="desc" value="desc">
-                            <label for="desc">descending</label><br>
-                        </div>
+                            <input type="text" id="size_end" name="fsh" value="%s"><br>
+                        </p>
                     </details>
                 </div>
             </form>
             <hr>
             %s
+            %s
         </main>
     </body>
 </html>
diff --git a/main.c b/main.c
index f3492aa..d7603fc 100644
--- a/main.c
+++ b/main.c
@@ -1,7 +1,7 @@
 /*
 
     arfnet2-search: Fast file indexer and search
-    Copyright (C) 2023 arf20 (Ã?ngel Ruiz Fernandez)
+    Copyright (C) 2025 arf20 (Ã?ngel Ruiz Fernandez)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -20,6 +20,8 @@
 
 */
 
+#define _XOPEN_SOURCE 700 /* strptime() without destroying clock_gettime() */
+
 #include <sys/types.h>
 #include <sys/select.h>
 #include <sys/socket.h>
@@ -41,12 +43,57 @@ static char *index_format_template = NULL;
 
 static index_t g_index = NULL;
 
+static const char *result_html_header = 
+    "<p>%ld results in %f seconds</p>\n"
+    "<div class=\"result-header\">\n"
+        "<a class=\"name\" href=\"%s\">Name %s</a><a class=\"mime\" href=\"%s\">mime-type %s</a><br>\n"
+        "<a class=\"path\" href=\"%s\">path %s</a><div class=\"attrib\">"
+            "<a class=\"size\" href=\"%s\">Size %s</a>"
+            "<a class=\"time\" href=\"%s\">Time %s</a></div><br>\n"
+    "</div>\n";
+
 static const char *result_html_template = 
     "<div class=\"result\">\n"
-        "<span class=\"name\">%s</span>""<super class=\"mime\">%s</super><br>\n"
-        "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\"><span class=\"size\">%s</span><span class=\"time\">%s</span></div><br>\n"
+        "<span class=\"name\">%s</span><super class=\"mime\">%s</super><br>\n"
+        "<a class=\"path\" href=\"%s\">%s</a><div class=\"attrib\">"
+            "<span class=\"size\">%s</span>"
+            "<span class=\"time\">%s</span></div><br>\n"
     "</div>\n";
 
+static const char *
+generate_results_header_html(struct MHD_Connection *connection, const char *baseurl,
+    sort_type_t sort_type, int sort_order, size_t nresults, float lookup_time)
+{
+    static char buff[65535], name_url[256], mime_url[256], path_url[256],
+        size_url[256], time_url[256];
+
+    *buff = '\0';
+
+    const char *arrows[] = { "&#8593;", "&#8595;" };
+
+    char name_order = (sort_type == SORT_NAME) && sort_order ? 'a' : 'd';
+    char mime_order = (sort_type == SORT_MIME) && sort_order ? 'a' : 'd';
+    char path_order = (sort_type == SORT_PATH) && sort_order ? 'a' : 'd';
+    char size_order = (sort_type == SORT_SIZE) && sort_order ? 'a' : 'd';
+    char time_order = (sort_type == SORT_TIME) && sort_order ? 'a' : 'd';
+
+    snprintf(name_url, 256, "%s&s=n&o=%c", baseurl, name_order);
+    snprintf(mime_url, 256, "%s&s=m&o=%c", baseurl, mime_order);
+    snprintf(path_url, 256, "%s&s=p&o=%c", baseurl, path_order);
+    snprintf(size_url, 256, "%s&s=s&o=%c", baseurl, size_order);
+    snprintf(time_url, 256, "%s&s=t&o=%c", baseurl, time_order);
+
+    snprintf(buff, 65535, result_html_header, nresults, lookup_time,
+        name_url, arrows[!name_order],
+        mime_url, arrows[!mime_order],
+        path_url, arrows[!path_order],
+        size_url, arrows[!size_order],
+        time_url, arrows[!time_order]
+    );
+
+    return buff;
+}
+
 static const char *
 sizestr(size_t size)
 {
@@ -68,13 +115,12 @@ static const char *
 generate_results_html(results_t *results)
 {
     static char buff[65535], timebuf[256], urlbuf[4096];
-
     char *pos = buff;
   
     for (int i = 0; i < results->size; i++) {
         const node_data_t *data = results->results[i];
-        struct tm *tm_mtim = gmtime(&data->stat.st_mtim.tv_sec);
-        strftime(timebuf, 256, "%Y-%m-%d %H:%M:%S", tm_mtim);
+        struct tm *tm_mtim = gmtime(&data->stat.st_mtime);
+        strftime(timebuf, 256, "%b %d %Y", tm_mtim);
 
         snprintf(urlbuf, 4096, "%s%s", subdir, data->path);
 
@@ -117,7 +163,7 @@ enum MHD_Result answer_to_connection(
     int ret;
 
     if (strcmp(method, "GET") == 0 && strcmp(url, "/") == 0) {
-        snprintf(buff, BUFF_SIZE, index_format_template, "", "");
+        snprintf(buff, BUFF_SIZE, index_format_template, "", "", "");
 
         response = MHD_create_response_from_buffer(strlen(buff), (void*)buff,
             MHD_RESPMEM_PERSISTENT);
@@ -127,23 +173,124 @@ enum MHD_Result answer_to_connection(
         MHD_destroy_response(response);
     }
     else if (strcmp(method, "GET") == 0 && strcmp(url, "/query") == 0) {
+        /* get query */
         const char *query = MHD_lookup_connection_value(connection,
-            MHD_GET_ARGUMENT_KIND, "query");
+            MHD_GET_ARGUMENT_KIND, "q");
+
+        /* get and parse query type */
+        lookup_type_t query_type = -1;
+        const char *query_type_str = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "t");
+        if (!query_type_str)
+            query_type_str = "s";
+
+        if (query_type_str) {
+            switch (query_type_str[0]) {
+            case 's': query_type = LOOKUP_SUBSTR; break;
+            case 'i': query_type = LOOKUP_SUBSTR_CASEINSENSITIVE; break;
+            case 'e': query_type = LOOKUP_EXACT; break;
+            case 'r': query_type = LOOKUP_REGEX; break;
+            }
+        } else query_type = LOOKUP_SUBSTR;
+
+        /* get and parse sorting */
+        sort_type_t sort_type = SORT_NAME;
+        int sort_order = 0;
+        const char *sort_type_str = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "s");
+        const char *sort_order_str = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "o");
+        if (sort_type_str) {
+            switch (sort_type_str[0]) {
+            case 'n': sort_type = SORT_NAME; break;
+            case 'm': sort_type = SORT_MIME; break;
+            case 'p': sort_type = SORT_PATH; break;
+            case 's': sort_type = SORT_SIZE; break;
+            case 't': sort_type = SORT_TIME; break;
+            }
+        }
+        if (sort_order_str)
+            sort_order = sort_order_str[0] == 'd';
+
+        /* get and parse filters */
+        const char *filter_time_low = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "ftl");
+        const char *filter_time_high = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "fth");
+        const char *filter_size_low = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "fsl");
+        const char *filter_size_high = MHD_lookup_connection_value(connection,
+            MHD_GET_ARGUMENT_KIND, "fsh");
+
+        filter_t filter = { 0 };
+
+        struct tm filter_tm;
+        if (strptime(filter_time_low, "%Y-%m-%d", &filter_tm))
+            filter.time_low = mktime(&filter_tm);
+        else
+            filter.time_low = 0;
+
+        if (strptime(filter_time_high, "%Y-%m-%d", &filter_tm))
+            filter.time_high = mktime(&filter_tm);
+        else
+            filter.time_high = 0;
+
+        filter.size_low = atoi(filter_size_low);
+        filter.size_high = atoi(filter_size_high);
+
+
+        /* build baseurl with query and filters (no sort) for sort links */
+        char baseurl[1024];
+        snprintf(baseurl, 1024, "/query?q=%s&t=%s&ftl=%s&fth=%s&fsl=%s&fsh=%s",
+            query,
+            query_type_str,
+            filter_time_low ? filter_time_low : "",
+            filter_time_high ? filter_time_high : "",
+            filter_size_low ? filter_size_low : "",
+            filter_size_high ? filter_size_high : ""
+        );
+
+
+        /* lookup query in index with type, mesuring time */
+        struct timespec start, finish;
+        clock_gettime(CLOCK_REALTIME, &start);
 
         results_t *results = NULL;
-        if (g_index)
-            results = index_lookup(g_index, LOOKUP_SUBSTR, query);
+        if (query && g_index)
+            results = index_lookup(g_index, query_type, query);
+
+        clock_gettime(CLOCK_REALTIME, &finish);
+
+        /* sort results */
+        if (results)
+            results_sort(results, sort_type, sort_order);
 
+        /* filter results */
         if (results)
+            results = results_filter(results, &filter);
+
+        /* generate response with header, results, and time */
+        float lookup_time = (finish.tv_sec + (0.000000001 * finish.tv_nsec)) - 
+            (start.tv_sec + (0.000000001 * start.tv_nsec));
+
+        if (query && results)
             snprintf(buff, BUFF_SIZE, index_format_template, query,
+                filter_time_low ? filter_time_low : "",
+                filter_time_high ? filter_time_high : "",
+                filter_size_low ? filter_size_low : "",
+                filter_size_high ? filter_size_high : "",
+                generate_results_header_html(connection, baseurl, sort_type,
+                    sort_order, results->size, lookup_time),
                 generate_results_html(results));
         else
-            snprintf(buff, BUFF_SIZE, index_format_template, query,
-                "indexing in progress... try again later");
+            snprintf(buff, BUFF_SIZE, index_format_template, query ? query : "",
+                "", "indexing in progress... try again later");
 
+        /* send it */
         response = MHD_create_response_from_buffer(strlen(buff), (void*)buff,
             MHD_RESPMEM_PERSISTENT);
 
+        /* cleanup */
         if (results)
             results_destroy(results);
 
-- 
2.47.3


References:
[arfnet2-search PATCH 00/16] Pre-release deploymentarf20 <arf20@xxxxxxxxx>