﻿using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using ServiceStack;

/****************************************************************
*   作者：tianzh
*   创建时间：2017/8/31 14:36:28
*   描述说明：
*
*   修改历史：
*
*
*****************************************************************/
namespace Bailun.Core.Extension.Utils
{

    /// <summary>
    /// html格式转化
    /// </summary>
    public class HtmlParser
    {
        /// <summary>
        /// 去除特殊字符(-_不过滤)
        /// </summary>
        /// <param name="hexData"></param>
        /// <returns></returns>
        public static System.String RemoveSpecialCharacter(System.String hexData)
        {
            if (string.IsNullOrEmpty(hexData))
            {
                return hexData;
            }
            return Regex.Replace(hexData, "[\\[\\]\\^\\*×――(^)$%~!@#$…&%￥—+=<>《》!！??？:：•`·、。，；,.;\"‘’“”]", "");
        }

        /// <summary>
        /// 提取html种body的内容
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public static string TakeHtmlBodyContent(string html)
        {
            if (html.IsNullOrEmpty())
            {
                return "";
            }
            var index = html.IndexOf("<body>", StringComparison.Ordinal);

            if (index > -1)
            {
                var content = html.Substring(index + 6, html.Length - index - 6);
                var endIndex = content.IndexOf("<//body>", StringComparison.Ordinal);
                if (endIndex > -1)
                {
                    content = content.Substring(0, endIndex - 1);
                }
                content = content.ReplaceAll("\n", "");
                content = Regex.Replace(content, "<!--.[^-]*(?=-->)-->", "");
                return content;
            }
            html = html.ReplaceAll("\n", "");
            html = Regex.Replace(html, "<!--.[^-]*(?=-->)-->", "");
            return html;
        }

        /// <summary>
        /// 去除HTML格式
        /// </summary>
        /// <param name="htmlstring"></param>
        /// <returns></returns>
        public static string DelHtml(string htmlstring)//将HTML去除
        {

            string strText = Regex.Replace(htmlstring, "<[^>]+>", "");
            strText = Regex.Replace(strText, "&[^;]+;", "");

            return strText;

        }
        /// <summary>
        /// 去除HTML格式(包括所有标签，换行等）
        /// </summary>
        /// <param name="htmlstring"></param>
        /// <returns></returns>
        public static string ReplaceHtml(string htmlstring)//将HTML去除
        {
            string strText = System.Text.RegularExpressions.Regex.Replace(htmlstring, @"(<style((.|\s)+?)</style>)", "");
            strText = System.Text.RegularExpressions.Regex.Replace(strText, @"(<script((.|\s)+?)</script>)", "");
            strText = System.Text.RegularExpressions.Regex.Replace(strText, "<[^>]+>", "");
            strText = System.Text.RegularExpressions.Regex.Replace(strText, "&[^;]+;", "");

            return strText;
        }
        /// <summary>
        /// html转化为纯文本
        /// </summary>
        /// <param name="result"></param>
        /// <returns></returns>
        public static string HtmlToTxt(string result)
        {
            if (result.IsNullOrEmpty())
            {
                return "";
            }
            result = result.Replace("&nbsp;&nbsp;&nbsp;&nbsp;", "\t" );

            result = result.Replace("&nbsp;", " ");
            result = result.Replace("<br/>","\n");
            result= Regex.Replace(result, "</p>", "\n");  //p标签自动替换为换行
           result= ReplaceHtml(result);
            return result;
        }

     

        /// <summary>
        /// 替换各种换行，特殊字符
        /// </summary>
        /// <param name="result"></param>
        /// <returns></returns>
        public static string RelpaceHtmlAndSpace(string result)
        {
            if (result.IsNullOrEmpty())
            {
                return "";
            }
            result = result.Replace("&#034;", "");
            result = result.Replace("&#039;", "");
            result = result.Replace("&amp;", "");
            result = result.Replace("'", "");
            result = result.Replace("(", "");
            result = result.Replace(")", "");
            result = result.Replace(",", "");
            result = result.Replace("，", "");
            result = result.Replace("-", "");
            result = result.Replace("'", "");
            result = result.Replace("\"", "");
            result = result.Replace("（", "");
            result = result.Replace("）", "");
            result = result.Replace("&nbsp;", "");
            result = result.Replace("\n", "");
            result = result.Replace("\t", "");
            result = result.Replace("\r", "");
            result = result.Replace(" ", "");
            return result;
        }

        #region 字符串转换
        /// <summary>
        /// 字符串转换
        /// </summary>
        /// <param name="itemSpecifics"></param>
        /// <returns></returns>
        public static string ReplaceHtmlDecode(string itemSpecifics)
        {
            if (itemSpecifics.IsNullOrEmpty())
            {
                return "";
            }
            var result = HttpUtility.HtmlDecode(itemSpecifics);
            result = result.Replace("&#034;", "\"");
            result = result.Replace("&#039;", "'");
            result = result.Replace("&amp;", "&");

            result = result.Replace("&nbsp;", " ");
            result = result.Replace("&quot;", "\"");
            result = result.Replace("&lt;", "<");
            result = result.Replace("&gt;", ">");
            return result;
        }
        #endregion

        /// <summary>
        /// txt转html
        /// </summary>
        /// <param name="s"></param>
        /// <param name="encode">是否做html编码</param>
        /// <returns></returns>
        public static string TxtToHtml(string s,bool encode=true)
        {

            //s = s.Replace("<", "&lt;");
            //s = s.Replace(">", "&gt;");
            s = s.Replace("\t", "&nbsp;&nbsp;&nbsp;&nbsp;");
            s = s.Replace("\n", "<br>");
            //s = s.Replace(" ", "&nbsp;");

            if (encode)
            {
               return HtmlEntitiesEncode(s);
            }
            return s;
        }
        /// <summary>
        /// HTMLEntitiesEncode（HTMLEntities编码）
        /// </summary>
        /// <param name="text">需要转换的html文本</param>
        /// <returns>HTMLEntities编码后的文本</returns>
        public static string HtmlEntitiesEncode(string text)
        {
            // 获取文本字符数组
            var htmlEncode = HttpUtility.HtmlEncode(text);
          
                char[] chars = htmlEncode.ToCharArray();

                // 初始化输出结果
                StringBuilder result = new StringBuilder(text.Length + (int)(text.Length * 0.1));

                foreach (char c in chars)
                {
                    // 将指定的 Unicode 字符的值转换为等效的 32 位有符号整数
                    int value = Convert.ToInt32(c);

                    // 内码为127以下的字符为标准ASCII编码，不需要转换，否则做 &#{数字}; 方式转换
                    if (value > 127)
                    {
                        result.AppendFormat("&#{0};", value);
                    }
                    else
                    {
                        result.Append(c);
                    }
                }

                return result.ToString();
            
        }

       
    }


}
