Convert HTML content to XML

In this write up i will illustrating how we can convert HTML content to XML. Remember one thing that with HTML into picture things will be more or less static. The reason i had to get this code was i had a requirement wherein i had to read a table from a website and store its data into AX table. Below is the code that does the same
 
static void convertHTML2XML(Args _args)
{
    WinInet         wi = new WinInet();
    TextBuffer      tb = new TextBuffer();
    XML             xmlString;
    str             page;
    str             filename;
    int             handle;
    int             i;
    str             tmpHTml;
    container       con;
    str             tmpString;
    int             tillWhere;
    boolean         firstSearch = true;
    int             fields = 0;
    void formXML(int _i, str _value)
    {
       switch(_i)
       {
            case 1, 2: break;
            case 3:  xmlString += ‘<ExchangeRate><Valor>’+_value+'</Valor>’;break;
            case 4:  xmlString += ‘<Ultimor>’+_value+'</Ultimor>’;break;
            case 5:  xmlString += ‘<Var>’+_value+'</Var>’;break;
            case 6:  xmlString += ‘<VarP>’+_value+'</VarP>’;break;
            case 7:  xmlString += ‘<Fecha>’+_value+'</Fecha></ExchangeRate>’;break;
       }
       if (_i == 7)
       {
            fields = 0;
       }
    }
    ;
    xmlString += ‘<?xml version="1.0" encoding="iso-8859-11"?><Root>’;
    handle = wi.internetOpenUrl(‘http://finanzas.inicia.es/cotizaciones/tiposInteres.php?Sesion_BolsamaniaES=2h5opigq5iuhad7te24ip1jpq1&#8217;);
    if (handle)
        {
            page = wi.internetReadFile(handle);
            //info(page);
        }
    con = str2con(web::stripHTML(page,true,true),’\n’);
    for (i = 1; i < conlen(con); i++)
    {
            tmpString = strltrim(strreplace(conpeek(con, i),’\t’,”));
            if (firstSearch)
            {
                if (strstartswith(tmpString,’Fecha’))
                {
                    tillWhere = i;
                    firstSearch = false;
                    fields = 1;
                }
           }
           tmpString = strreplace(tmpString,’\t’,”);
           if (strstartsWith(tmpString,’Tipos de interés de los bancos centrales’))
                break;
           if (fields)
           {
                if (tmpString)
                {
                    if (!strstartswith(tmpString,’Fecha’))
                    {
                        formXML(fields, tmpString);
                        fields ++;
                    }
                }
           }
      }
    tb.setText(xmlString + ‘</Root>’);
    tb.toFile(‘E:\\ExchangeRates.xml’); // use xmldocument instead
    wi.internetCloseHandle(handle);
}
Note that I had to hardcode the column names to convert the same to XML. Do write in your comments if you can suggest better alternatives.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s