Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

utf8::ostream

0.00/5 (No votes)
27 Feb 2004 1  
Lightweight utf8 generator

Introduction

Proposed template class is lightweight and fast utf8 generator - output stream. The main purpose of the stream is to provide an easy way to generate XML/HTML "on the fly".

Example:

html::viewer hv;
utf8::oxstream out; 
out << "<HTML>"
    << "<TITLE>" << L"Hello world!" << "</TITLE>"
    << "<BODY>" << L"Hello world!" << "</BODY>"
    << "<HTML>"; 
hv.show(out.data());

Implementation

Main class is a template declared as

template <class T, bool X> class ostream_t : public T {}

Where T is a class - implementation of output (write) buffer.

And X is a boolean parameter. If it is true then stream will do conversion of characters having special XML/HTML meaning, e.g. '<','>', etc.

Class T is an output buffer and shall provide implementation of two write methods.

  void push(unsigned char c);
  void push(const unsigned char *pc, size_t sz);

For details of implementation see source of byte_buffer class below.

Source Code

Source code is pretty compact and you are free to grab it from here and paste anywhere you want:

//

// This file is part of 

// Terra Informatica Lightweight Embeddable HTMEngine control SDK

// Created by Andrew Fedoniouk @ TerraInformatica.com

//

namespace aux 
{
  // byte_buffer class is an in-memory dynamic buffer implementation.

  class byte_buffer 
  {
    unsigned char*  _body;
    size_t          _allocated;
    size_t          _size;   
    unsigned char *reserve(size_t size)
    {
      size_t newsize = _size + size;
      if( newsize > _allocated ) 
      {
        _allocated *= 2;
        if(_allocated < newsize) 
           _allocated = newsize;
        unsigned char *newbody = new unsigned char[_allocated];
        memcpy(newbody,_body,_size);
        delete[] _body;
        _body = newbody;
      }
      return _body + _size;
    }  
   public:    
      byte_buffer():_size(0)      
        { _body = new unsigned char[_allocated = 256]; }
    ~byte_buffer()                { delete[] _body;  }    
    const unsigned char * data()  {  
             if(_size == _allocated) reserve(1); 
             _body[_size] = '\0'; return _body; 
    }    
    size_t length() const         { return _size; }    
    void push(unsigned char c)    { *reserve(1) = c; ++_size; }
    void push(const unsigned char *pc, size_t sz) 
        { memcpy(reserve(sz),pc,sz); _size += sz; }  
  };
}
namespace utf8
{
  // UTF8 stream  // class T must have two methods:

  //   void push(unsigned char c)

  //   void push(const unsigned char *pc, size_t sz)

  // bool X - true - XML markup character conversion 

  // (characters '<','>',etc).

  //          false - no conversion at all.   

template <class T, bool X = true>
  class ostream_t : public T
  {
  public:
    ostream_t()
    { 
      // utf8 byte order mark

      static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };
      T::push(BOM, sizeof(BOM));
    }    
    // intended to handle only ascii-7 strings

    // use this for markup output 

    ostream_t& operator << (const char* str) 
    { 
      T::push((const unsigned char*)str,strlen(str)); return *this; 
    }    
    // use UNICODE chars for value output

    ostream_t& operator << (const wchar_t* wstr)
    {
      const wchar_t *pc = wstr;
      for(unsigned int c = *pc; c ; c = *(++pc)) 
      {
        if(X)
          switch(c) 
          {
              case '<': *this << "&lt;"; continue;
              case '>': *this << "&gt;"; continue;
              case '&': *this << "&amp;"; continue;
              case '"': *this << "&quot;"; continue;
              case '\'': *this << "&apos;"; continue;
          }
        if (c < (1 << 7)) {
         T::push (c);
        } else if (c < (1 << 11)) {
         T::push ((c >> 6) | 0xc0);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 16)) {
         T::push ((c >> 12) | 0xe0);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        } else if (c < (1 << 21)) {
         T::push ((c >> 18) | 0xe0);
         T::push (((c >> 12) & 0x3f) | 0x80);
         T::push (((c >> 6) & 0x3f) | 0x80);
         T::push ((c & 0x3f) | 0x80);
        }
      }
      return *this;
    }
  };  
 // raw ASCII/UNICODE -> UTF8 converter 
  typedef ostream_t<aux::byte_buffer,false> ostream;
 // ASCII/UNICODE -> UTF8 converter with XML support
  typedef ostream_t<aux::byte_buffer,true> oxstream;
}

We are using this code in HTMEngine SDK for creating HTML dialogs and popup windows. Hope it might be used in other places where you need dynamic XML/HTML creation.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here