/*
 * Logserver
 * Copyright (C) 2017-2025 Joel Reardon
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#ifndef __UTF8__H__
#define __UTF8__H__

#include <string>

using namespace std;

/* static method to perform utf8 simplifications when using logserver to view
 * man pages */
class UTF8 {
public:
	// if we can decode a utf8 symbol in string sv at position i, then
	// return it and advance i. otherwise return nullopt.
	static optional<string> simplify(const string_view& sv, size_t* i) {
		if (*i + 2 >= sv.size()) [[unlikely]] return nullopt;
		if (static_cast<uint8_t>(sv[*i]) != 0xe2) [[likely]]
			return nullopt;

		if (static_cast<uint8_t>(sv[*i + 1]) == 0x80) {
			uint8_t c = static_cast<uint8_t>(sv[*i + 2]);
			static const map<uint8_t, string> vals = {
				{0x90, "-"},
				{0x93, "-"},
				{0x94, "-"},
				{0x98, "'"},
				{0x99, "'"},
				{0x9c, "\""},
				{0x9d, "\""},
				{0xa2, "*"},
				{0xa6, "..."},

			};
			// if we have a match, return the value and advance i
			auto it = vals.find(c);
			if (it != vals.end()) {
				*i += 2;
				return it->second;
			}
		}
		return nullopt;
	}
};

#endif  // __UTF8__H__
