view libstdc++-v3/include/bits/locale_conv.h @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
line wrap: on
line source

// wstring_convert implementation -*- C++ -*-

// Copyright (C) 2015-2020 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

/** @file bits/locale_conv.h
 *  This is an internal header file, included by other library headers.
 *  Do not attempt to use it directly. @headername{locale}
 */

#ifndef _LOCALE_CONV_H
#define _LOCALE_CONV_H 1

#if __cplusplus < 201103L
# include <bits/c++0x_warning.h>
#else

#include <streambuf>
#include <bits/stringfwd.h>
#include <bits/allocator.h>
#include <bits/codecvt.h>
#include <bits/unique_ptr.h>

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

  /**
   * @addtogroup locales
   * @{
   */

  template<typename _OutStr, typename _InChar, typename _Codecvt,
	   typename _State, typename _Fn>
    bool
    __do_str_codecvt(const _InChar* __first, const _InChar* __last,
		     _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
		     size_t& __count, _Fn __fn)
    {
      if (__first == __last)
	{
	  __outstr.clear();
	  __count = 0;
	  return true;
	}

      size_t __outchars = 0;
      auto __next = __first;
      const auto __maxlen = __cvt.max_length() + 1;

      codecvt_base::result __result;
      do
	{
	  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
	  auto __outnext = &__outstr.front() + __outchars;
	  auto const __outlast = &__outstr.back() + 1;
	  __result = (__cvt.*__fn)(__state, __next, __last, __next,
					__outnext, __outlast, __outnext);
	  __outchars = __outnext - &__outstr.front();
	}
      while (__result == codecvt_base::partial && __next != __last
	     && (__outstr.size() - __outchars) < __maxlen);

      if (__result == codecvt_base::error)
	{
	  __count = __next - __first;
	  return false;
	}

      // The codecvt facet will only return noconv when the types are
      // the same, so avoid instantiating basic_string::assign otherwise
      if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
				       typename _Codecvt::extern_type>())
	if (__result == codecvt_base::noconv)
	  {
	    __outstr.assign(__first, __last);
	    __count = __last - __first;
	    return true;
	  }

      __outstr.resize(__outchars);
      __count = __next - __first;
      return true;
    }

  // Convert narrow character string to wide.
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_in(const char* __first, const char* __last,
		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
		     const codecvt<_CharT, char, _State>& __cvt,
		     _State& __state, size_t& __count)
    {
      using _Codecvt = codecvt<_CharT, char, _State>;
      using _ConvFn
	= codecvt_base::result
	  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
			_CharT*, _CharT*, _CharT*&) const;
      _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
      return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
			      __count, __fn);
    }

  // As above, but with no __count parameter
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_in(const char* __first, const char* __last,
		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
		     const codecvt<_CharT, char, _State>& __cvt)
    {
      _State __state = {};
      size_t __n;
      return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
    }

  // As above, but returns false for partial conversion
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_in_all(const char* __first, const char* __last,
			 basic_string<_CharT, _Traits, _Alloc>& __outstr,
			 const codecvt<_CharT, char, _State>& __cvt)
    {
      _State __state = {};
      size_t __n;
      return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
	&& (__n == (__last - __first));
    }

  // Convert wide character string to narrow.
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
		      basic_string<char, _Traits, _Alloc>& __outstr,
		      const codecvt<_CharT, char, _State>& __cvt,
		      _State& __state, size_t& __count)
    {
      using _Codecvt = codecvt<_CharT, char, _State>;
      using _ConvFn
	= codecvt_base::result
	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
			char*, char*, char*&) const;
      _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
      return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
			      __count, __fn);
    }

  // As above, but with no __count parameter
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
		      basic_string<char, _Traits, _Alloc>& __outstr,
		      const codecvt<_CharT, char, _State>& __cvt)
    {
      _State __state = {};
      size_t __n;
      return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
    }

  // As above, but returns false for partial conversions
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
			  basic_string<char, _Traits, _Alloc>& __outstr,
			  const codecvt<_CharT, char, _State>& __cvt)
    {
      _State __state = {};
      size_t __n;
      return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
	&& (__n == (__last - __first));
    }

#ifdef _GLIBCXX_USE_CHAR8_T

  // Convert wide character string to narrow.
  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
		      basic_string<char8_t, _Traits, _Alloc>& __outstr,
		      const codecvt<_CharT, char8_t, _State>& __cvt,
		      _State& __state, size_t& __count)
    {
      using _Codecvt = codecvt<_CharT, char8_t, _State>;
      using _ConvFn
	= codecvt_base::result
	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
			char8_t*, char8_t*, char8_t*&) const;
      _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
      return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
			      __count, __fn);
    }

  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
    inline bool
    __str_codecvt_out(const _CharT* __first, const _CharT* __last,
		      basic_string<char8_t, _Traits, _Alloc>& __outstr,
		      const codecvt<_CharT, char8_t, _State>& __cvt)
    {
      _State __state = {};
      size_t __n;
      return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
    }

#endif  // _GLIBCXX_USE_CHAR8_T

#ifdef _GLIBCXX_USE_WCHAR_T

_GLIBCXX_BEGIN_NAMESPACE_CXX11

  /// String conversions
  template<typename _Codecvt, typename _Elem = wchar_t,
	   typename _Wide_alloc = allocator<_Elem>,
	   typename _Byte_alloc = allocator<char>>
    class wstring_convert
    {
    public:
      typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
      typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
      typedef typename _Codecvt::state_type 			   state_type;
      typedef typename wide_string::traits_type::int_type	   int_type;

      /// Default constructor.
      wstring_convert() : _M_cvt(new _Codecvt()) { }

      /** Constructor.
       *
       * @param  __pcvt The facet to use for conversions.
       *
       * Takes ownership of @p __pcvt and will delete it in the destructor.
       */
      explicit
      wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
      {
	if (!_M_cvt)
	  __throw_logic_error("wstring_convert");
      }

      /** Construct with an initial converstion state.
       *
       * @param  __pcvt The facet to use for conversions.
       * @param  __state Initial conversion state.
       *
       * Takes ownership of @p __pcvt and will delete it in the destructor.
       * The object's conversion state will persist between conversions.
       */
      wstring_convert(_Codecvt* __pcvt, state_type __state)
      : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
      {
	if (!_M_cvt)
	  __throw_logic_error("wstring_convert");
      }

      /** Construct with error strings.
       *
       * @param  __byte_err A string to return on failed conversions.
       * @param  __wide_err A wide string to return on failed conversions.
       */
      explicit
      wstring_convert(const byte_string& __byte_err,
		      const wide_string& __wide_err = wide_string())
      : _M_cvt(new _Codecvt),
	_M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
	_M_with_strings(true)
      {
	if (!_M_cvt)
	  __throw_logic_error("wstring_convert");
      }

      ~wstring_convert() = default;

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2176. Special members for wstring_convert and wbuffer_convert
      wstring_convert(const wstring_convert&) = delete;
      wstring_convert& operator=(const wstring_convert&) = delete;

      /// @{ Convert from bytes.
      wide_string
      from_bytes(char __byte)
      {
	char __bytes[2] = { __byte };
	return from_bytes(__bytes, __bytes+1);
      }

      wide_string
      from_bytes(const char* __ptr)
      { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }

      wide_string
      from_bytes(const byte_string& __str)
      {
	auto __ptr = __str.data();
	return from_bytes(__ptr, __ptr + __str.size());
      }

      wide_string
      from_bytes(const char* __first, const char* __last)
      {
	if (!_M_with_cvtstate)
	  _M_state = state_type();
	wide_string __out{ _M_wide_err_string.get_allocator() };
	if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
			     _M_count))
	  return __out;
	if (_M_with_strings)
	  return _M_wide_err_string;
	__throw_range_error("wstring_convert::from_bytes");
      }
      /// @}

      /// @{ Convert to bytes.
      byte_string
      to_bytes(_Elem __wchar)
      {
	_Elem __wchars[2] = { __wchar };
	return to_bytes(__wchars, __wchars+1);
      }

      byte_string
      to_bytes(const _Elem* __ptr)
      {
	return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
      }

      byte_string
      to_bytes(const wide_string& __wstr)
      {
	auto __ptr = __wstr.data();
	return to_bytes(__ptr, __ptr + __wstr.size());
      }

      byte_string
      to_bytes(const _Elem* __first, const _Elem* __last)
      {
	if (!_M_with_cvtstate)
	  _M_state = state_type();
	byte_string __out{ _M_byte_err_string.get_allocator() };
	if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
			      _M_count))
	  return __out;
	if (_M_with_strings)
	  return _M_byte_err_string;
	__throw_range_error("wstring_convert::to_bytes");
      }
      /// @}

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2174. wstring_convert::converted() should be noexcept
      /// The number of elements successfully converted in the last conversion.
      size_t converted() const noexcept { return _M_count; }

      /// The final conversion state of the last conversion.
      state_type state() const { return _M_state; }

    private:
      unique_ptr<_Codecvt>	_M_cvt;
      byte_string		_M_byte_err_string;
      wide_string		_M_wide_err_string;
      state_type		_M_state = state_type();
      size_t			_M_count = 0;
      bool			_M_with_cvtstate = false;
      bool			_M_with_strings = false;
    };

_GLIBCXX_END_NAMESPACE_CXX11

  /// Buffer conversions
  template<typename _Codecvt, typename _Elem = wchar_t,
	   typename _Tr = char_traits<_Elem>>
    class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
    {
      typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;

    public:
      typedef typename _Codecvt::state_type state_type;

      /// Default constructor.
      wbuffer_convert() : wbuffer_convert(nullptr) { }

      /** Constructor.
       *
       * @param  __bytebuf The underlying byte stream buffer.
       * @param  __pcvt    The facet to use for conversions.
       * @param  __state   Initial conversion state.
       *
       * Takes ownership of @p __pcvt and will delete it in the destructor.
       */
      explicit
      wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
		      state_type __state = state_type())
      : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
      {
	if (!_M_cvt)
	  __throw_logic_error("wbuffer_convert");

	_M_always_noconv = _M_cvt->always_noconv();

	if (_M_buf)
	  {
	    this->setp(_M_put_area, _M_put_area + _S_buffer_length);
	    this->setg(_M_get_area + _S_putback_length,
		       _M_get_area + _S_putback_length,
		       _M_get_area + _S_putback_length);
	  }
      }

      ~wbuffer_convert() = default;

      // _GLIBCXX_RESOLVE_LIB_DEFECTS
      // 2176. Special members for wstring_convert and wbuffer_convert
      wbuffer_convert(const wbuffer_convert&) = delete;
      wbuffer_convert& operator=(const wbuffer_convert&) = delete;

      streambuf* rdbuf() const noexcept { return _M_buf; }

      streambuf*
      rdbuf(streambuf *__bytebuf) noexcept
      {
	auto __prev = _M_buf;
	_M_buf = __bytebuf;
	return __prev;
      }

      /// The conversion state following the last conversion.
      state_type state() const noexcept { return _M_state; }

    protected:
      int
      sync()
      { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }

      typename _Wide_streambuf::int_type
      overflow(typename _Wide_streambuf::int_type __out)
      {
	if (!_M_buf || !_M_conv_put())
	  return _Tr::eof();
	else if (!_Tr::eq_int_type(__out, _Tr::eof()))
	  return this->sputc(__out);
	return _Tr::not_eof(__out);
      }

      typename _Wide_streambuf::int_type
      underflow()
      {
	if (!_M_buf)
	  return _Tr::eof();

	if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
	  return _Tr::to_int_type(*this->gptr());
	else
	  return _Tr::eof();
      }

      streamsize
      xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
      {
	if (!_M_buf || __n == 0)
	  return 0;
	streamsize __done = 0;
	do
	{
	  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
					   __n - __done);
	  _Tr::copy(this->pptr(), __s + __done, __nn);
	  this->pbump(__nn);
	  __done += __nn;
	} while (__done < __n && _M_conv_put());
	return __done;
      }

    private:
      // fill the get area from converted contents of the byte stream buffer
      bool
      _M_conv_get()
      {
	const streamsize __pb1 = this->gptr() - this->eback();
	const streamsize __pb2 = _S_putback_length;
	const streamsize __npb = std::min(__pb1, __pb2);

	_Tr::move(_M_get_area + _S_putback_length - __npb,
		  this->gptr() - __npb, __npb);

	streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
	__nbytes = std::min(__nbytes, _M_buf->in_avail());
	if (__nbytes < 1)
	  __nbytes = 1;
	__nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
	if (__nbytes < 1)
	  return false;
	__nbytes += _M_unconv;

	// convert _M_get_buf into _M_get_area

	_Elem* __outbuf = _M_get_area + _S_putback_length;
	_Elem* __outnext = __outbuf;
	const char* __bnext = _M_get_buf;

	codecvt_base::result __result;
	if (_M_always_noconv)
	  __result = codecvt_base::noconv;
	else
	  {
	    _Elem* __outend = _M_get_area + _S_buffer_length;

	    __result = _M_cvt->in(_M_state,
				  __bnext, __bnext + __nbytes, __bnext,
				  __outbuf, __outend, __outnext);
	  }

	if (__result == codecvt_base::noconv)
	  {
	    // cast is safe because noconv means _Elem is same type as char
	    auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
	    _Tr::copy(__outbuf, __get_buf, __nbytes);
	    _M_unconv = 0;
	    return true;
	  }

	if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
	  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);

	this->setg(__outbuf, __outbuf, __outnext);

	return __result != codecvt_base::error;
      }

      // unused
      bool
      _M_put(...)
      { return false; }

      bool
      _M_put(const char* __p, streamsize __n)
      {
	if (_M_buf->sputn(__p, __n) < __n)
	  return false;
	return true;
      }

      // convert the put area and write to the byte stream buffer
      bool
      _M_conv_put()
      {
	_Elem* const __first = this->pbase();
	const _Elem* const __last = this->pptr();
	const streamsize __pending = __last - __first;

	if (_M_always_noconv)
	  return _M_put(__first, __pending);

	char __outbuf[2 * _S_buffer_length];

	const _Elem* __next = __first;
	const _Elem* __start;
	do
	  {
	    __start = __next;
	    char* __outnext = __outbuf;
	    char* const __outlast = __outbuf + sizeof(__outbuf);
	    auto __result = _M_cvt->out(_M_state, __next, __last, __next,
					__outnext, __outlast, __outnext);
	    if (__result == codecvt_base::error)
	      return false;
	    else if (__result == codecvt_base::noconv)
	      return _M_put(__next, __pending);

	    if (!_M_put(__outbuf, __outnext - __outbuf))
	      return false;
	  }
	while (__next != __last && __next != __start);

	if (__next != __last)
	  _Tr::move(__first, __next, __last - __next);

	this->pbump(__first - __next);
	return __next != __first;
      }

      streambuf*		_M_buf;
      unique_ptr<_Codecvt>	_M_cvt;
      state_type		_M_state;

      static const streamsize	_S_buffer_length = 32;
      static const streamsize	_S_putback_length = 3;
      _Elem                     _M_put_area[_S_buffer_length];
      _Elem                     _M_get_area[_S_buffer_length];
      streamsize		_M_unconv = 0;
      char			_M_get_buf[_S_buffer_length-_S_putback_length];
      bool			_M_always_noconv;
    };

#endif  // _GLIBCXX_USE_WCHAR_T

  /// @} group locales

_GLIBCXX_END_NAMESPACE_VERSION
} // namespace

#endif // __cplusplus

#endif /* _LOCALE_CONV_H */