PIKApp/plug-ins/help-browser/uri.c

/* PIKA - Photo and Image Kooker Application
 * a rebranding of The GNU Image Manipulation Program (created with heckimp)
 * A derived work which may be trivial. However, any changes may be (C)2023 by Aldercone Studio
 *
 * Original copyright, applying to most contents (license remains unchanged): 
 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
 *
 * The PIKA Help Browser - URI functions
 * Copyright (C) 2001  Jacob Schroeder  <jacob@convergence.de>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#include "config.h"

#include <string.h>

#include <glib.h>

#include "uri.h"

/*  #define URI_DEBUG 1  */

typedef enum
{
  URI_UNKNOWN,
  URI_ABSURI,
  URI_NETPATH,
  URI_ABSPATH,
  URI_RELPATH,
  URI_QUERY,
  URI_EMPTY,
  URI_FRAGMENT,
  URI_INVALID
} UriType;


static UriType
uri_get_type (const gchar *uri)
{
  gchar        c;
  const gchar *cptr;
  UriType      type = URI_UNKNOWN;

  if (!uri)
    return type;

  cptr = uri;
  c = *cptr++;

  if (g_ascii_isalpha (c))
    {
      type = URI_RELPATH;  /* assume relative path */

      while ((c = *cptr++))
        {
          if (g_ascii_isalnum (c) || c == '+' || c == '-' || c == '.')
            continue;

          if (c == ':')
            {
              /* it was a scheme */
              type = URI_ABSURI;
            }
          break;
        }
    }
  else
    {
      switch (c)
        {
        case '/':
          if (*cptr == '/')
            {
              cptr++;
              type = URI_NETPATH;
            }
          else
            {
              type = URI_ABSPATH;
            }
          break;
        case '?':
          type = URI_QUERY;
          break;
        case '#':
          type = URI_FRAGMENT;
          break;
        case '\0':
          type = URI_EMPTY;
          break;
        default:
          type = URI_RELPATH;
          break;
        }
    }

#ifdef URI_DEBUG
  g_print ("uri_get_type (\"%s\") -> ", uri);
  switch (type)
    {
    case URI_UNKNOWN:  g_print ("unknown");  break;
    case URI_ABSURI:   g_print ("absuri");   break;
    case URI_NETPATH:  g_print ("netpath");  break;
    case URI_ABSPATH:  g_print ("abspath");  break;
    case URI_RELPATH:  g_print ("relpath");  break;
    case URI_QUERY:    g_print ("query");    break;
    case URI_EMPTY:    g_print ("empty");    break;
    case URI_FRAGMENT: g_print ("fragment"); break;
    case URI_INVALID:  g_print ("invalid");  break;
    }
  g_print ("\n");
#endif

  return type;
}

gchar *
uri_to_abs (const gchar *uri,
            const gchar *base_uri)
{
  gchar        c;
  const gchar *cptr;
  gchar       *retval    = NULL;
  UriType      uri_type  = URI_UNKNOWN;
  UriType      base_type = URI_UNKNOWN;

  gint base_cnt    =  0;  /* no of chars to be copied from base URI  */
  gint uri_cnt     =  0;  /* no of chars to be copied from URI       */
  gint sep_cnt     =  0;  /* no of chars to be inserted between them */

  const gchar *sep_str = ""; /* string to insert between base and uri */
  const gchar *part;
  const gchar *last_segment = NULL;

#ifdef URI_DEBUG
  g_print ("uri_to_abs (\"%s\", \"%s\")\n", uri, base_uri);
#endif

  /* this function does not use the algorithm that is being proposed
   * in RFC 2396. Instead it analyses the first characters of each
   * URI to determine its kind (abs, net, path, ...).
   * After that it locates the missing parts in the base URI and then
   * concats everything into a newly allocated string.
   */

  /* determine the kind of the URIs */
  uri_type = uri_get_type (uri);

  if (uri_type != URI_ABSURI)
    {
      base_type = uri_get_type (base_uri);

      if (base_type != URI_ABSURI)
        return NULL;  /*  neither uri nor base uri are absolute  */
    }

  /* find missing parts in base URI */
  switch (uri_type)
    {
    case URI_ABSURI:
      /* base uri not needed */
      break;

    case URI_QUERY:
      /* ??? last segment? */
      uri_type = URI_RELPATH;
    case URI_NETPATH:  /* base scheme */
    case URI_ABSPATH:  /* base scheme and authority */
    case URI_RELPATH:  /* base scheme, authority and path */
      cptr = base_uri;

      /* skip scheme */
      while ((c = *cptr++) && c != ':')
        ; /* nada */

      base_cnt = cptr - base_uri; /* incl : */

      if (*cptr != '/')
        {
          /* completion not possible */
          return NULL;
        }

      if (uri_type == URI_NETPATH)
        break;

      /* skip authority */
      if (cptr[0] == '/' && cptr[1] == '/')
        {
          part = cptr;
          cptr += 2;

          while ((c = *cptr++) && c != '/' && c != '?' && c != '#')
            ; /* nada */

          cptr--;
          base_cnt += cptr - part;
        }

      if (uri_type == URI_ABSPATH)
        break;

      /* skip path */
      if (*cptr != '/')
        {
          sep_cnt = 1;
          sep_str = "/";
          break;
        }

      part = cptr;

      g_assert (*cptr == '/');

      while ((c = *cptr++) && c != '?' && c != '#')
        {
          if (c == '/')
            last_segment = cptr - 1;
        };

      g_assert (last_segment);

      cptr = last_segment;

      while ((c = *uri) && c == '.' && cptr > part)
        {
          gint shift_segment = 0;

          c = uri[1];

          if (c == '.' )
            {
              c = uri[2];
              shift_segment = 1;
            }

          if (c == '/')
            {
              uri += 2;
            }
          else if (c == 0 || c == '?' || c == '#')
            {
              uri += 1;
            }
          else
            {
              break;
            }

          g_assert (*cptr == '/');

          if (shift_segment)
            {
              uri += 1;
              while (cptr > part && *--cptr != '/')
                ; /* nada */
            }
        }

      base_cnt += cptr - part + 1;
      break;

    case URI_EMPTY:
    case URI_FRAGMENT:
      /* use whole base uri */
      base_cnt = strlen (base_uri);
      break;

    case URI_UNKNOWN:
    case URI_INVALID:
      return NULL;
    }

  /* do not include fragment part from the URI reference */
  for (cptr = uri; (c = *cptr) && c != '#'; cptr++)
    ; /* nada */

  uri_cnt = cptr - uri;

  /* allocate string and copy characters */

  retval = g_new (gchar, base_cnt + sep_cnt + uri_cnt + 1);

  if (base_cnt)
    strncpy (retval, base_uri, base_cnt);

  if (sep_cnt)
    strncpy (retval + base_cnt, sep_str, sep_cnt);

  if (uri_cnt)
    strncpy (retval + base_cnt + sep_cnt, uri, uri_cnt);

  retval[base_cnt + sep_cnt + uri_cnt] = '\0';

#ifdef URI_DEBUG
  g_print ("  ->  \"%s\"\n", retval);
#endif

  return retval;
}

#if 0
RFC 2396                   URI Generic Syntax                August 1998


A. Collected BNF for URI

      URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
      absoluteURI   = scheme ":" ( hier_part | opaque_part )
      relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

      hier_part     = ( net_path | abs_path ) [ "?" query ]
      opaque_part   = uric_no_slash *uric

      uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
                      "&" | "=" | "+" | "$" | ","

      net_path      = "//" authority [ abs_path ]
      abs_path      = "/"  path_segments
      rel_path      = rel_segment [ abs_path ]

      rel_segment   = 1*( unreserved | escaped |
                          ";" | "@" | "&" | "=" | "+" | "$" | "," )

      scheme        = alpha *( alpha | digit | "+" | "-" | "." )

      authority     = server | reg_name

      reg_name      = 1*( unreserved | escaped | "$" | "," |
                          ";" | ":" | "@" | "&" | "=" | "+" )

      server        = [ [ userinfo "@" ] hostport ]
      userinfo      = *( unreserved | escaped |
                         ";" | ":" | "&" | "=" | "+" | "$" | "," )

      hostport      = host [ ":" port ]
      host          = hostname | IPv4address
      hostname      = *( domainlabel "." ) toplabel [ "." ]
      domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
      toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
      IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
      port          = *digit

      path          = [ abs_path | opaque_part ]
      path_segments = segment *( "/" segment )
      segment       = *pchar *( ";" param )
      param         = *pchar
      pchar         = unreserved | escaped |
                      ":" | "@" | "&" | "=" | "+" | "$" | ","

      query         = *uric

      fragment      = *uric

      uric          = reserved | unreserved | escaped
      reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
                      "$" | ","
      unreserved    = alphanum | mark
      mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
                      "(" | ")"

      escaped       = "%" hex hex
      hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
                              "a" | "b" | "c" | "d" | "e" | "f"

      alphanum      = alpha | digit
      alpha         = lowalpha | upalpha

      lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
                 "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
                 "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
      upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
                 "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
                 "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
      digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
                 "8" | "9"

#endif
Initial checkin of Pika from heckimp 2023-09-26 00:35:21 +02:00			`/* PIKA - Photo and Image Kooker Application`
			`* a rebranding of The GNU Image Manipulation Program (created with heckimp)`
			`* A derived work which may be trivial. However, any changes may be (C)2023 by Aldercone Studio`
			`*`
			`* Original copyright, applying to most contents (license remains unchanged):`
			`* Copyright (C) 1995 Spencer Kimball and Peter Mattis`
			`*`
			`* The PIKA Help Browser - URI functions`
			`* Copyright (C) 2001 Jacob Schroeder <jacob@convergence.de>`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 3 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program. If not, see <https://www.gnu.org/licenses/>.`
			`*/`

			`#include "config.h"`

			`#include <string.h>`

			`#include <glib.h>`

			`#include "uri.h"`

			`/* #define URI_DEBUG 1 */`

			`typedef enum`
			`{`
			`URI_UNKNOWN,`
			`URI_ABSURI,`
			`URI_NETPATH,`
			`URI_ABSPATH,`
			`URI_RELPATH,`
			`URI_QUERY,`
			`URI_EMPTY,`
			`URI_FRAGMENT,`
			`URI_INVALID`
			`} UriType;`


			`static UriType`
			`uri_get_type (const gchar *uri)`
			`{`
			`gchar c;`
			`const gchar *cptr;`
			`UriType type = URI_UNKNOWN;`

			`if (!uri)`
			`return type;`

			`cptr = uri;`
			`c = *cptr++;`

			`if (g_ascii_isalpha (c))`
			`{`
			`type = URI_RELPATH; /* assume relative path */`

			`while ((c = *cptr++))`
			`{`
			`if (g_ascii_isalnum (c) \|\| c == '+' \|\| c == '-' \|\| c == '.')`
			`continue;`

			`if (c == ':')`
			`{`
			`/* it was a scheme */`
			`type = URI_ABSURI;`
			`}`
			`break;`
			`}`
			`}`
			`else`
			`{`
			`switch (c)`
			`{`
			`case '/':`
			`if (*cptr == '/')`
			`{`
			`cptr++;`
			`type = URI_NETPATH;`
			`}`
			`else`
			`{`
			`type = URI_ABSPATH;`
			`}`
			`break;`
			`case '?':`
			`type = URI_QUERY;`
			`break;`
			`case '#':`
			`type = URI_FRAGMENT;`
			`break;`
			`case '\0':`
			`type = URI_EMPTY;`
			`break;`
			`default:`
			`type = URI_RELPATH;`
			`break;`
			`}`
			`}`

			`#ifdef URI_DEBUG`
			`g_print ("uri_get_type (\"%s\") -> ", uri);`
			`switch (type)`
			`{`
			`case URI_UNKNOWN: g_print ("unknown"); break;`
			`case URI_ABSURI: g_print ("absuri"); break;`
			`case URI_NETPATH: g_print ("netpath"); break;`
			`case URI_ABSPATH: g_print ("abspath"); break;`
			`case URI_RELPATH: g_print ("relpath"); break;`
			`case URI_QUERY: g_print ("query"); break;`
			`case URI_EMPTY: g_print ("empty"); break;`
			`case URI_FRAGMENT: g_print ("fragment"); break;`
			`case URI_INVALID: g_print ("invalid"); break;`
			`}`
			`g_print ("\n");`
			`#endif`

			`return type;`
			`}`

			`gchar *`
			`uri_to_abs (const gchar *uri,`
			`const gchar *base_uri)`
			`{`
			`gchar c;`
			`const gchar *cptr;`
			`gchar *retval = NULL;`
			`UriType uri_type = URI_UNKNOWN;`
			`UriType base_type = URI_UNKNOWN;`

			`gint base_cnt = 0; /* no of chars to be copied from base URI */`
			`gint uri_cnt = 0; /* no of chars to be copied from URI */`
			`gint sep_cnt = 0; /* no of chars to be inserted between them */`

			`const gchar sep_str = ""; / string to insert between base and uri */`
			`const gchar *part;`
			`const gchar *last_segment = NULL;`

			`#ifdef URI_DEBUG`
			`g_print ("uri_to_abs (\"%s\", \"%s\")\n", uri, base_uri);`
			`#endif`

			`/* this function does not use the algorithm that is being proposed`
			`* in RFC 2396. Instead it analyses the first characters of each`
			`* URI to determine its kind (abs, net, path, ...).`
			`* After that it locates the missing parts in the base URI and then`
			`* concats everything into a newly allocated string.`
			`*/`

			`/* determine the kind of the URIs */`
			`uri_type = uri_get_type (uri);`

			`if (uri_type != URI_ABSURI)`
			`{`
			`base_type = uri_get_type (base_uri);`

			`if (base_type != URI_ABSURI)`
			`return NULL; /* neither uri nor base uri are absolute */`
			`}`

			`/* find missing parts in base URI */`
			`switch (uri_type)`
			`{`
			`case URI_ABSURI:`
			`/* base uri not needed */`
			`break;`

			`case URI_QUERY:`
			`/* ??? last segment? */`
			`uri_type = URI_RELPATH;`
			`case URI_NETPATH: /* base scheme */`
			`case URI_ABSPATH: /* base scheme and authority */`
			`case URI_RELPATH: /* base scheme, authority and path */`
			`cptr = base_uri;`

			`/* skip scheme */`
			`while ((c = *cptr++) && c != ':')`
			`; /* nada */`

			`base_cnt = cptr - base_uri; /* incl : */`

			`if (*cptr != '/')`
			`{`
			`/* completion not possible */`
			`return NULL;`
			`}`

			`if (uri_type == URI_NETPATH)`
			`break;`

			`/* skip authority */`
			`if (cptr[0] == '/' && cptr[1] == '/')`
			`{`
			`part = cptr;`
			`cptr += 2;`

			`while ((c = *cptr++) && c != '/' && c != '?' && c != '#')`
			`; /* nada */`

			`cptr--;`
			`base_cnt += cptr - part;`
			`}`

			`if (uri_type == URI_ABSPATH)`
			`break;`

			`/* skip path */`
			`if (*cptr != '/')`
			`{`
			`sep_cnt = 1;`
			`sep_str = "/";`
			`break;`
			`}`

			`part = cptr;`

			`g_assert (*cptr == '/');`

			`while ((c = *cptr++) && c != '?' && c != '#')`
			`{`
			`if (c == '/')`
			`last_segment = cptr - 1;`
			`};`

			`g_assert (last_segment);`

			`cptr = last_segment;`

			`while ((c = *uri) && c == '.' && cptr > part)`
			`{`
			`gint shift_segment = 0;`

			`c = uri[1];`

			`if (c == '.' )`
			`{`
			`c = uri[2];`
			`shift_segment = 1;`
			`}`

			`if (c == '/')`
			`{`
			`uri += 2;`
			`}`
			`else if (c == 0 \|\| c == '?' \|\| c == '#')`
			`{`
			`uri += 1;`
			`}`
			`else`
			`{`
			`break;`
			`}`

			`g_assert (*cptr == '/');`

			`if (shift_segment)`
			`{`
			`uri += 1;`
			`while (cptr > part && *--cptr != '/')`
			`; /* nada */`
			`}`
			`}`

			`base_cnt += cptr - part + 1;`
			`break;`

			`case URI_EMPTY:`
			`case URI_FRAGMENT:`
			`/* use whole base uri */`
			`base_cnt = strlen (base_uri);`
			`break;`

			`case URI_UNKNOWN:`
			`case URI_INVALID:`
			`return NULL;`
			`}`

			`/* do not include fragment part from the URI reference */`
			`for (cptr = uri; (c = *cptr) && c != '#'; cptr++)`
			`; /* nada */`

			`uri_cnt = cptr - uri;`

			`/* allocate string and copy characters */`

			`retval = g_new (gchar, base_cnt + sep_cnt + uri_cnt + 1);`

			`if (base_cnt)`
			`strncpy (retval, base_uri, base_cnt);`

			`if (sep_cnt)`
			`strncpy (retval + base_cnt, sep_str, sep_cnt);`

			`if (uri_cnt)`
			`strncpy (retval + base_cnt + sep_cnt, uri, uri_cnt);`

			`retval[base_cnt + sep_cnt + uri_cnt] = '\0';`

			`#ifdef URI_DEBUG`
			`g_print (" -> \"%s\"\n", retval);`
			`#endif`

			`return retval;`
			`}`

			`#if 0`
			`RFC 2396 URI Generic Syntax August 1998`


			`A. Collected BNF for URI`

			`URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]`
			`absoluteURI = scheme ":" ( hier_part \| opaque_part )`
			`relativeURI = ( net_path \| abs_path \| rel_path ) [ "?" query ]`

			`hier_part = ( net_path \| abs_path ) [ "?" query ]`
			`opaque_part = uric_no_slash *uric`

			`uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|`
			`"&" \| "=" \| "+" \| "$" \| ","`

			`net_path = "//" authority [ abs_path ]`
			`abs_path = "/" path_segments`
			`rel_path = rel_segment [ abs_path ]`

			`rel_segment = 1*( unreserved \| escaped \|`
			`";" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," )`

			`scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )`

			`authority = server \| reg_name`

			`reg_name = 1*( unreserved \| escaped \| "$" \| "," \|`
			`";" \| ":" \| "@" \| "&" \| "=" \| "+" )`

			`server = [ [ userinfo "@" ] hostport ]`
			`userinfo = *( unreserved \| escaped \|`
			`";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )`

			`hostport = host [ ":" port ]`
			`host = hostname \| IPv4address`
			`hostname = *( domainlabel "." ) toplabel [ "." ]`
			`domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum`
			`toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum`
			`IPv4address = 1digit "." 1digit "." 1digit "." 1digit`
			`port = *digit`

			`path = [ abs_path \| opaque_part ]`
			`path_segments = segment *( "/" segment )`
			`segment = pchar ( ";" param )`
			`param = *pchar`
			`pchar = unreserved \| escaped \|`
			`":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","`

			`query = *uric`

			`fragment = *uric`

			`uric = reserved \| unreserved \| escaped`
			`reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|`
			`"$" \| ","`
			`unreserved = alphanum \| mark`
			`mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|`
			`"(" \| ")"`

			`escaped = "%" hex hex`
			`hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|`
			`"a" \| "b" \| "c" \| "d" \| "e" \| "f"`

			`alphanum = alpha \| digit`
			`alpha = lowalpha \| upalpha`

			`lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \|`
			`"j" \| "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \|`
			`"s" \| "t" \| "u" \| "v" \| "w" \| "x" \| "y" \| "z"`
			`upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \|`
			`"J" \| "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \|`
			`"S" \| "T" \| "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"`
			`digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \|`
			`"8" \| "9"`

			`#endif`