summaryrefslogtreecommitdiff
path: root/manual/string.texi
diff options
context:
space:
mode:
Diffstat (limited to 'manual/string.texi')
-rw-r--r--manual/string.texi118
1 files changed, 105 insertions, 13 deletions
diff --git a/manual/string.texi b/manual/string.texi
index af95925a14..cb9f5412f8 100644
--- a/manual/string.texi
+++ b/manual/string.texi
@@ -890,6 +890,9 @@ specifying a null character as the value of the @var{c} argument.
@comment BSD
@deftypefun {char *} index (const char *@var{string}, int @var{c})
@code{index} is another name for @code{strchr}; they are exactly the same.
+New code should always use @code{strchr} since this name is defined in
+@w{ISO C} while @code{index} is a BSD invention which never was available
+on @w{System V} derived systems.
@end deftypefun
@comment string.h
@@ -910,6 +913,9 @@ strrchr ("hello, world", 'l')
@comment BSD
@deftypefun {char *} rindex (const char *@var{string}, int @var{c})
@code{rindex} is another name for @code{strrchr}; they are exactly the same.
+New code should always use @code{strrchr} since this name is defined in
+@w{ISO C} while @code{rindex} is a BSD invention which never was available
+on @w{System V} derived systems.
@end deftypefun
@comment string.h
@@ -1067,13 +1073,14 @@ Here is a simple example showing the use of @code{strtok}.
@dots{}
-char string[] = "words separated by spaces -- and, punctuation!";
+const char string[] = "words separated by spaces -- and, punctuation!";
const char delimiters[] = " .,;:!-";
-char *token;
+char *token, *cp;
@dots{}
-token = strtok (string, delimiters); /* token => "words" */
+cp = strdupa (string); /* Make writable copy. */
+token = strtok (cp, delimiters); /* token => "words" */
token = strtok (NULL, delimiters); /* token => "separated" */
token = strtok (NULL, delimiters); /* token => "by" */
token = strtok (NULL, delimiters); /* token => "spaces" */
@@ -1097,7 +1104,7 @@ pointer. Calling @code{strtok_r} with a null pointer for
@var{newstring} and leaving @var{save_ptr} between the calls unchanged
does the job without limiting reentrancy.
-This function was proposed for POSIX.1b and can be found on many systems
+This function is defined in POSIX-1 and can be found on many systems
which support multi-threading.
@end deftypefun
@@ -1123,14 +1130,14 @@ Here is how the above example looks like when @code{strsep} is used.
@dots{}
-char string[] = "words separated by spaces -- and, punctuation!";
+const char string[] = "words separated by spaces -- and, punctuation!";
const char delimiters[] = " .,;:!-";
char *running;
char *token;
@dots{}
-running = string;
+running = strdupa (string);
token = strsep (&running, delimiters); /* token => "words" */
token = strsep (&running, delimiters); /* token => "separated" */
token = strsep (&running, delimiters); /* token => "by" */
@@ -1180,13 +1187,66 @@ The function returns a pointer to a static buffer which contains the
string representing of the encoding of @var{n}. To encoded a series of
bytes the use should append the new string to the destination buffer.
@emph{Warning:} Since a static buffer is used this function should not
-be used in multi-threaded programs. There is no thread-safe alternatice
+be used in multi-threaded programs. There is no thread-safe alternative
to this function in the C library.
@end deftypefun
+Alone the @code{l64a} function is not usable. To encode arbitrary
+sequences of bytes one needs some more code and this could look like
+this:
+
+@smallexample
+char *
+encode (const void *buf, size_t len)
+@{
+ /* @r{We know in advance how long the buffer has to be.} */
+ unsigned char *in = (unsigned char *) buf;
+ char *out = malloc (6 + ((len + 3) / 4) * 6 + 1);
+ char *cp = out;
+
+ /* @r{Encode the length.} */
+ memcpy (cp, l64a (len), 6);
+ cp += 6;
+
+ while (len > 3)
+ @{
+ unsigned long int n = *in++;
+ n = (n << 8) | *in++;
+ n = (n << 8) | *in++;
+ n = (n << 8) | *in++;
+ len -= 4;
+ /* @r{Using `htonl' is necessary so that the data can be}
+ @r{decoded even on machines with different byte order.} */
+ memcpy (cp, l64a (htonl (n)), 6);
+ cp += 6;
+ @}
+ if (len > 0)
+ @{
+ unsigned long int n = *in++;
+ if (--len > 0)
+ @{
+ n = (n << 8) | *in++;
+ if (--len > 0)
+ n = (n << 8) | *in;
+ @}
+ memcpy (cp, l64a (htonl (n)), 6);
+ cp += 6;
+ @}
+ *cp = '\0';
+ return out;
+@}
+@end smallexample
+
+It is strange that the library does not provide the complete
+functionality needed but so be it. There are some other encoding
+methods which are much more widely used (UU encoding, Base64 encoding).
+Generally, it is better to use one of these encodings.
+
To decode data produced with @code{l64a} the following function should be
used.
+@comment stdlib.h
+@comment XPG
@deftypefun {long int} a64l (const char *@var{string})
The parameter @var{string} should contain a string which was produced by
a call to @code{l64a}. The function processes the next 6 characters and
@@ -1205,17 +1265,17 @@ characters.
@node Argz and Envz Vectors
@section Argz and Envz Vectors
-@cindex argz vectors
+@cindex argz vectors (string vectors)
@cindex string vectors, null-character separated
@cindex argument vectors, null-character separated
@dfn{argz vectors} are vectors of strings in a contiguous block of
memory, each element separated from its neighbors by null-characters
(@code{'\0'}).
-@cindex envz vectors
+@cindex envz vectors (environment vectors)
@cindex environment vectors, null-character separated
@dfn{Envz vectors} are an extension of argz vectors where each element is a
-name-value pair, separated by a @code{'='} character (as in a unix
+name-value pair, separated by a @code{'='} character (as in a Unix
environment).
@menu
@@ -1247,13 +1307,17 @@ allocation error occurs.
@pindex argz.h
These functions are declared in the standard include file @file{argz.h}.
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_create (char *const @var{argv}[], char **@var{argz}, size_t *@var{argz_len})
-The @code{argz_create} function converts the unix-style argument vector
+The @code{argz_create} function converts the Unix-style argument vector
@var{argv} (a vector of pointers to normal C strings, terminated by
@code{(char *)0}; @pxref{Program Arguments}) into an argz vector with
the same elements, which is returned in @var{argz} and @var{argz_len}.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_create_sep (const char *@var{string}, int @var{sep}, char **@var{argz}, size_t *@var{argz_len})
The @code{argz_create_sep} function converts the null-terminated string
@var{string} into an argz vector (returned in @var{argz} and
@@ -1261,14 +1325,18 @@ The @code{argz_create_sep} function converts the null-terminated string
character @var{sep}.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {size_t} argz_count (const char *@var{argz}, size_t @var{arg_len})
Returns the number of elements in the argz vector @var{argz} and
@var{argz_len}.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {void} argz_extract (char *@var{argz}, size_t @var{argz_len}, char **@var{argv})
The @code{argz_extract} function converts the argz vector @var{argz} and
-@var{argz_len} into a unix-style argument vector stored in @var{argv},
+@var{argz_len} into a Unix-style argument vector stored in @var{argv},
by putting pointers to every element in @var{argz} into successive
positions in @var{argv}, followed by a terminator of @code{0}.
@var{Argv} must be pre-allocated with enough space to hold all the
@@ -1281,6 +1349,8 @@ still active. This function is useful for passing the elements in
@var{argz} to an exec function (@pxref{Executing a File}).
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {void} argz_stringify (char *@var{argz}, size_t @var{len}, int @var{sep})
The @code{argz_stringify} converts @var{argz} into a normal string with
the elements separated by the character @var{sep}, by replacing each
@@ -1289,20 +1359,26 @@ string) with @var{sep}. This is handy for printing @var{argz} in a
readable manner.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_add (char **@var{argz}, size_t *@var{argz_len}, const char *@var{str})
The @code{argz_add} function adds the string @var{str} to the end of the
argz vector @code{*@var{argz}}, and updates @code{*@var{argz}} and
@code{*@var{argz_len}} accordingly.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_add_sep (char **@var{argz}, size_t *@var{argz_len}, const char *@var{str}, int @var{delim})
The @code{argz_add_sep} function is similar to @code{argz_add}, but
@var{str} is split into separate elements in the result at occurances of
the character @var{delim}. This is useful, for instance, for
-adding the components of a unix search path to an argz vector, by using
+adding the components of a Unix search path to an argz vector, by using
a value of @code{':'} for @var{delim}.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_append (char **@var{argz}, size_t *@var{argz_len}, const char *@var{buf}, size_t @var{buf_len})
The @code{argz_append} function appends @var{buf_len} bytes starting at
@var{buf} to the argz vector @code{*@var{argz}}, reallocating
@@ -1310,6 +1386,8 @@ The @code{argz_append} function appends @var{buf_len} bytes starting at
@code{*@var{argz_len}}.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_delete (char **@var{argz}, size_t *@var{argz_len}, char *@var{entry})
If @var{entry} points to the beginning of one of the elements in the
argz vector @code{*@var{argz}}, the @code{argz_delete} function will
@@ -1319,6 +1397,8 @@ destructive argz functions usually reallocate their argz argument,
pointers into argz vectors such as @var{entry} will then become invalid.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {error_t} argz_insert (char **@var{argz}, size_t *@var{argz_len}, char *@var{before}, const char *@var{entry})
The @code{argz_insert} function inserts the string @var{entry} into the
argz vector @code{*@var{argz}} at a point just before the existing
@@ -1330,6 +1410,8 @@ is @code{0}, @var{entry} is added to the end instead (as if by
@var{before} will result in @var{entry} being inserted at the beginning.
@end deftypefun
+@comment argz.h
+@comment GNU
@deftypefun {char *} argz_next (char *@var{argz}, size_t @var{argz_len}, const char *@var{entry})
The @code{argz_next} function provides a convenient way of iterating
over the elements in the argz vector @var{argz}. It returns a pointer
@@ -1387,6 +1469,8 @@ fail) have a return type of @code{error_t}, and return either @code{0} or
@pindex envz.h
These functions are declared in the standard include file @file{envz.h}.
+@comment envz.h
+@comment GNU
@deftypefun {char *} envz_entry (const char *@var{envz}, size_t @var{envz_len}, const char *@var{name})
The @code{envz_entry} function finds the entry in @var{envz} with the name
@var{name}, and returns a pointer to the whole entry---that is, the argz
@@ -1394,6 +1478,8 @@ element which begins with @var{name} followed by a @code{'='} character. If
there is no entry with that name, @code{0} is returned.
@end deftypefun
+@comment envz.h
+@comment GNU
@deftypefun {char *} envz_get (const char *@var{envz}, size_t @var{envz_len}, const char *@var{name})
The @code{envz_get} function finds the entry in @var{envz} with the name
@var{name} (like @code{envz_entry}), and returns a pointer to the value
@@ -1401,6 +1487,8 @@ portion of that entry (following the @code{'='}). If there is no entry with
that name (or only a null entry), @code{0} is returned.
@end deftypefun
+@comment envz.h
+@comment GNU
@deftypefun {error_t} envz_add (char **@var{envz}, size_t *@var{envz_len}, const char *@var{name}, const char *@var{value})
The @code{envz_add} function adds an entry to @code{*@var{envz}}
(updating @code{*@var{envz}} and @code{*@var{envz_len}}) with the name
@@ -1410,6 +1498,8 @@ already exists in @var{envz}, it is removed first. If @var{value} is
(mentioned above).
@end deftypefun
+@comment envz.h
+@comment GNU
@deftypefun {error_t} envz_merge (char **@var{envz}, size_t *@var{envz_len}, const char *@var{envz2}, size_t @var{envz2_len}, int @var{override})
The @code{envz_merge} function adds each entry in @var{envz2} to @var{envz},
as if with @code{envz_add}, updating @code{*@var{envz}} and
@@ -1421,6 +1511,8 @@ entry in @var{envz} can prevent an entry of the same name in @var{envz2} from
being added to @var{envz}, if @var{override} is false.
@end deftypefun
+@comment envz.h
+@comment GNU
@deftypefun {void} envz_strip (char **@var{envz}, size_t *@var{envz_len})
The @code{envz_strip} function removes any null entries from @var{envz},
updating @code{*@var{envz}} and @code{*@var{envz_len}}.