/** 
 * @file gnome-cmd-convert.cc
 * @copyright (C) 2001-2006 Marcus Bjurman\n
 * @copyright (C) 2007-2012 Piotr Eljasiak\n
 * @copyright (C) 2013-2023 Uwe Scholz\n
 *
 * @copyright This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * @copyright This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * @copyright You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <config.h>

#include "gnome-cmd-includes.h"
#include "utils.h"
#include "gnome-cmd-convert.h"

using namespace std;


/*
 * Quick roman numeral check (non-roman numerals may also return true)
 * Patch from Slash Bunny (2007.08.12)
 * (http://home.hiwaay.net/~lkseitz/math/roman/numerals.shtml)
 *    I = 1    (one)
 *    V = 5    (five)
 *    X = 10   (ten)
 *    L = 50   (fifty)
 *    C = 100  (one hundred)
 *    D = 500  (five hundred)
 *    M = 1000 (one thousand)
 */
inline gint word_is_roman_numeral (gchar *text)
{
    gint len = 0;

    for (gchar *i = text; *i; ++i, ++len)
        if (*i != (gunichar)'i' && *i != (gunichar)'v' && *i != (gunichar)'x' && *i != (gunichar)'l' &&
            *i != (gunichar)'c' && *i != (gunichar)'d' && *i != (gunichar)'m')
            return *i == ' ' || *i == '_' ? len : 0;

    return len;
}


gchar *gcmd_convert_unchanged (gchar *string)
{
    return string;
}


gchar *gcmd_convert_ltrim (gchar *string)
{
    if (!string || !*string)
        return string;

    return g_strchug (string);
}


gchar *gcmd_convert_rtrim (gchar *string)
{
    if (!string || !*string)
        return string;

    return g_strchomp (string);
}


gchar *gcmd_convert_strip (gchar *string)
{
    if (!string || !*string)
        return string;

    return g_strstrip (string);
}


gchar *gcmd_convert_lowercase (gchar *string)
{
    if (!string || !*string)
        return string;

    gchar *converted_string = g_utf8_strdown (string, -1);

    g_free (string);

    return converted_string;
}


gchar *gcmd_convert_uppercase (gchar *string)
{
    if (!string || !*string)
        return string;

    gchar *converted_string = g_utf8_strup (string, -1);

    g_free (string);

    return converted_string;
}


// Function to set the first letter of each word to uppercase, according the "Chicago Manual of Style" (http://www.chicagomanualofstyle.org/)
// No needed to reallocate
gchar *gcmd_convert_sentence_case (gchar *string)
{
    if (!string || !*string)
        return string;

    // Bariē Įiįek version
    gint len;
    gchar utf8_character[6];
    gunichar c;
    // There have to be space at the end of words to seperate them from prefix
    // Chicago Manual of Style "Heading caps" Capitalization Rules (CMS 1993, 282) (http://www.docstyles.com/cmscrib.htm#Note2)
    static const gchar *exempt[] =
    {
        "a ",       "a_",
        "against ", "against_",
        "an ",      "an_",
        "and ",     "and_",
        "at ",      "at_",
        "between ", "between_",
        "but ",     "but_",
        "for ",     "for_",
        "in ",      "in_",
        "nor ",     "nor_",
        "of ",      "of_",
        "on ",      "on_",
        "or ",      "or_",
        "so ",      "so_",
        "the ",     "the_",
        "to ",      "to_",
        "with ",    "with_",
        "yet ",     "yet_",
        NULL
    };

    gcmd_convert_lowercase (string);

    // Removes trailing whitespace
    gchar *i = string = g_strchomp (string);

    // If the word is a roman numeral, capitalize all of it
    if ((len = word_is_roman_numeral (i)))
        strncpy (string, g_utf8_strup (i, len), len);
    else
    {
        // Set first character to uppercase
        c = g_utf8_get_char (i);
        strncpy (string, utf8_character, g_unichar_to_utf8 (g_unichar_toupper (c), utf8_character));
    }

    // Uppercase first character of each word, except for 'exempt[]' words lists
    while (i)
    {
        gchar *word = i; // Needed if there is only one word
        gchar *word1 = g_utf8_strchr (i,-1,' ');
        gchar *word2 = g_utf8_strchr (i,-1,'_');

        // Take the first string found (near beginning of string)
        if (word1 && word2)
            word = MIN (word1, word2);
        else if (word1)
            word = word1;
        else if (word2)
            word = word2;
        else
        {
            // Last word of the string: the first letter is always uppercase,
            // even if it's in the exempt list. This is a Chicago Manual of Style rule.
            // Last Word In String - Should Capitalize Regardless of Word (Chicago Manual of Style)
            c = g_utf8_get_char (word);
            strncpy (word, utf8_character, g_unichar_to_utf8 (g_unichar_toupper (c), utf8_character));
            break;
        }

        // Go to first character of the word (char. after ' ' or '_')
        ++word;

        // If the word is a roman numeral, capitalize all of it
        if ((len = word_is_roman_numeral (word)))
            strncpy (word, g_utf8_strup (word, len), len);
        else
        {
            // Set uppercase the first character of this word
            c = g_utf8_get_char (word);
            strncpy (word, utf8_character, g_unichar_to_utf8 (g_unichar_toupper (c), utf8_character));

            // Set lowercase the first character of this word if found in the exempt words list
            for (gint ii=0; exempt[ii]!=NULL; ++ii)
                if (g_ascii_strncasecmp (exempt[ii], word, strlen (exempt[ii])) == 0)
                {
                    c = g_utf8_get_char (word);
                    strncpy (word, utf8_character, g_unichar_to_utf8 (g_unichar_tolower (c), utf8_character));
                    break;
            }
        }

        i = word;
    }

    // Uppercase letter placed after some characters like ' (', '[', '{'
    gboolean set_to_upper_case = FALSE;
    for (i = string; *i; i = g_utf8_next_char (i))
    {
        c = g_utf8_get_char (i);

        if (set_to_upper_case && g_unichar_islower (c))
            strncpy (i, utf8_character, g_unichar_to_utf8 (g_unichar_toupper (c), utf8_character));

        set_to_upper_case = c == (gunichar) '(' || c == (gunichar) '[' || c == (gunichar) '{' ||
                            c == (gunichar) '"' || c == (gunichar) ':' || c == (gunichar) '.' ||
                            c == (gunichar) '`' || c == (gunichar) '-';
    }

    return string;
}


gchar *gcmd_convert_initial_caps (gchar *string)
{
    if (!string || !*string)
        return string;

    gchar temp2[6];
    gboolean set_to_upper_case = TRUE;
    gchar utf8_character[6];

    for (gchar *i = string; *i; i = g_utf8_next_char (i))
    {
        gunichar c = g_utf8_get_char (i);
        if (set_to_upper_case && g_unichar_islower (c))
            strncpy (i, temp2, g_unichar_to_utf8 (g_unichar_toupper (c), temp2));
        else
            if (!set_to_upper_case && g_unichar_isupper (c))
                strncpy (i, temp2, g_unichar_to_utf8 (g_unichar_tolower (c), temp2));
        set_to_upper_case = FALSE; // After the first time, all will be lower case
    }

    // Uppercase again the word 'I' in english
    for (gchar *i=string; *i; )
    {
        gchar *word = i; // Needed if there is only one word
        gchar *word1 = g_utf8_strchr (i,-1,' ');
        gchar *word2 = g_utf8_strchr (i,-1,'_');

        // Take the first string found (near beginning of string)
        if (word1 && word2)
            word = MIN (word1,word2);
        else if (word1)
            word = word1;
        else if (word2)
            word = word2;
        else
            // Last word of the string
            break;

        // Go to first character of the word (char after ' ' or '_')
        ++word;

        // Set uppercase word 'I'
        if (g_ascii_strncasecmp ("I ", word, strlen ("I ")) == 0)
        {
            gunichar c = g_utf8_get_char (word);
            strncpy (word, utf8_character, g_unichar_to_utf8 (g_unichar_toupper (c), utf8_character));
        }

        i = word;
    }

    return string;
}


gchar *gcmd_convert_toggle_case (gchar *string)
{
    return string;
}