- A+
所属分类:.NET技术
一、什么是敏感词过滤?
敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。
通常,可以使用两种方法来过滤敏感词:
- 黑名单过滤:即定义一个黑名单,将所有敏感词择记录在其中,然后对输入的文本进行对比,如果发现有敏感词,就将其过滤掉。
- 白名单过滤:即定义一个白名单,将所有不敏感的词汇记录在其中,然后对输入的文本进行对比,如果发现有不在白名单中的词汇,就将其过滤掉。
二、ToolGood.Words是什么?
ToolGood.Words是一款高性能非法词(敏感词)检测组件,附带繁体简体互换,支持全角半角互换,获取拼音首字母,获取拼音字母,拼音模糊搜索等功能。
ToolGood.Words的源码网站:ToolGood.Words源码网站
三、在Visual Studio中安装ToolGood.Words
3.1、右键项目解决方案,选择“管理NuGet程序包”,如下图所示:
3.2、切换到“浏览”选项卡,搜索“ToolGood.Words”并安装:
安装完之后最好重新编译生成项目
四、创建“subContentCheck”类
敏感/违禁词汇因特殊内容不便上传,可自行在网站上查找
using Microsoft.AspNetCore.DataProtection.KeyManagement; using Microsoft.AspNetCore.Http; using Microsoft.CodeAnalysis.Text; using Newtonsoft.Json; using System.Collections; using System.Text; using ToolGood.Words; using static System.Net.Mime.MediaTypeNames; using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment; namespace WebApplication1 //放在自己项目中时,需要更换为自己的命名空间 { public class keywords { public List<string> IllegalKeywords { get; set; } } public class urlwords { public List<string> IllegalUrls { get; set; } } /// <summary> /// 提交的内容敏感违禁词检查类 /// </summary> public class subContentCheck { /// <summary> /// 本地静态文件地址路径 /// </summary> private IHostingEnvironment _hostingEnv; /// <summary> /// 敏感词库 /// </summary> private string dictionaryPath = "/sensitiveWords/sensitiveWords.txt"; /// <summary> /// 敏感链接、网站、网址库 /// </summary> private string urlsPath = "/sensitiveWords/IllegalUrls.txt"; /// <summary> /// 保存敏感词组 /// </summary> public string[] Words { get; set; } /// <summary> /// 一个参数的构造函数 /// </summary> /// <param name="hostingEnv">本地静态文件地址路径</param> public subContentCheck(IHostingEnvironment hostingEnv) { _hostingEnv = hostingEnv; InitDictionary(); } /// <summary> /// 初始化内存敏感词库 /// </summary> public void InitDictionary() { Words = new string[] { }; string wordsPath = _hostingEnv.WebRootPath + dictionaryPath; string urlPath = _hostingEnv.WebRootPath + urlsPath; //List<keywords> keys = new List<keywords>(); //List<urlwords> urls = new List<urlwords>(); string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8); string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8); //由于数组是非动态的,不能进行动态的添加,所有先将它转成list,操作 ArrayList arrayList = new ArrayList(Words.ToList()); if (readAllWords.Length > 0 || readAllurl.Length > 0) { if (readAllWords.Length > 1) { //keywords key = new keywords(); //key.IllegalKeywords = new List<string>(); foreach (string itemWords in readAllWords) { string[] allSplitWords = itemWords.Split('|'); foreach (string itemSplitWords in allSplitWords) { if (!string.IsNullOrEmpty(itemSplitWords)) { arrayList.Add(itemSplitWords); //string aaa = itemSplitWords; //key.IllegalKeywords.Add(aaa); //IllegalKeywords.Add(itemSplitWords); } } } //keys.Add(key); } else { if (readAllWords.Length == 1) { string[] allSplitWords = readAllWords[0].Split('|'); //keywords key = new keywords(); //key.IllegalKeywords = new List<string>(); foreach (string itemSplitWords in allSplitWords) { if (!string.IsNullOrEmpty(itemSplitWords)) { arrayList.Add(itemSplitWords); //string aaa = itemSplitWords; //key.IllegalKeywords.Add(aaa); //IllegalKeywords.Add(itemSplitWords); } } //keys.Add(key); } } if (readAllurl.Length > 1) { //urlwords url = new urlwords(); //url.IllegalUrls = new List<string>(); foreach (string itemUrls in readAllurl) { string[] allSplitUrls = itemUrls.Split('|'); foreach (string itemSplitUrls in allSplitUrls) { if (!string.IsNullOrEmpty(itemSplitUrls)) { arrayList.Add(itemSplitUrls); //string Keyword = itemSplitUrls; //url.IllegalUrls.Add(Keyword); //IllegalUrls.Add(itemSplitUrls); } } } //urls.Add(url); } else { if (readAllurl.Length == 1) { string[] allSplitUrls = readAllurl[0].Split('|'); //urlwords url = new urlwords(); //url.IllegalUrls = new List<string>(); foreach (string itemSplitUrls in allSplitUrls) { if (!string.IsNullOrEmpty(itemSplitUrls)) { arrayList.Add(itemSplitUrls); //IllegalUrls.Add(itemSplitUrls); //string Keyword = itemSplitUrls; //url.IllegalUrls.Add(Keyword); } } //urls.Add(url); } } } //我们在将list转换成String[]数组 Words = (string[])arrayList.ToArray(typeof(string)); } /// <summary> /// 过滤替换敏感词 /// </summary> /// <param name="sourceText">需要过滤替换的原内容</param> /// <param name="replaceChar">敏感词替换的字符;默认替换为‘*’</param> /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容替换时发生错误;“2”:需要替换的文本内容为空;其余则返回替换成功的字符串内容</returns> public string FilterWithChar(string sourceText, char replaceChar = '*') { if (!string.IsNullOrEmpty(sourceText)) { string result = ""; WordsSearch wordsSearch = new WordsSearch(); try { wordsSearch.SetKeywords(Words); } catch (Exception ex) { result = "0"; return result; } try { result = wordsSearch.Replace(sourceText, replaceChar); return result; } catch (Exception ex) { return result = "1"; } } else { return "2"; } } /// <summary> /// 查找原内容中知否包含敏感/违禁词 /// </summary> /// <param name="sourceText">需要判断的原内容</param> /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容查询时发生错误;“2”:需要替换的文本内容为空;“3”:原内容中包含敏感/违禁词汇;“4”:原内容中不包含敏感/违禁词汇</returns> public string FindSensitiveKey(string sourceText) { string result = ""; if (!string.IsNullOrEmpty(sourceText)) { WordsSearch wordsSearch = new WordsSearch(); try { wordsSearch.SetKeywords(Words); } catch (Exception ex) { result = "0"; return result; } try { bool res = wordsSearch.ContainsAny(sourceText); if (res) { result = "3"; return result; } else { result = "4"; return result; } } catch (Exception ex) { return result = "1"; } } else { result = "2"; } return result; } /// <summary> /// 把对象写入到json文件中 /// </summary> /// <param name="obj"></param> /// <returns></returns> public static void Write(List<keywords> jsonData, List<urlwords> urlJsonData, string filename) { var directorypath = Directory.GetCurrentDirectory(); string strFileName = directorypath + "\" + filename + ".json"; string ListJson = ""; if (jsonData != null) { ListJson = JsonConvert.SerializeObject(jsonData); } else { ListJson = JsonConvert.SerializeObject(urlJsonData); } Console.WriteLine(ListJson); writeJsonFile(strFileName, ListJson); //将序列化的json字符串内容写入Json文件,并且保存 void writeJsonFile(string path, string jsonConents) { using (FileStream fs = new FileStream(path, FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite, FileShare.ReadWrite)) { //如果json文件中有中文数据,可能会出现乱码的现象,那么需要加上如下代码 Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); using (StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding("GB2312"))) { sw.WriteLine(jsonConents); } } } } } }
五、写API接口
/// <summary> /// 进行敏感词脱敏 /// </summary> /// <param name="sourctText">需要脱敏的文本内容</param> /// <returns></returns> [HttpPost] public IActionResult sensitive_words_replace2(string sourctText) { string resultStr = ""; //实例化敏感词库 subContentCheck strCheck = new subContentCheck(_hostingEnv); if (string.IsNullOrEmpty(sourctText)) { return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr }); } try { resultStr = strCheck.FilterWithChar(sourctText); string resMsg = ""; int resCode = 200; if (resultStr=="0") { resCode = 210; resultStr = ""; resMsg = "设置违禁词时发生错误,请联系管理员!"; }else if (resultStr=="1") { resCode = 240; resultStr = ""; resMsg = "敏感内容替换时发生错误!"; } else if (resultStr == "2") { resCode = 260; resultStr = ""; resMsg = "需要替换的文本内容为空!"; } else { resCode = 200; resMsg = "敏感词替换请求成功!"; } return Json(new { code = resCode, msg = resMsg, resultStr = resultStr }); } catch (Exception ex) { return Json(new { code = 220, msg = "敏感内容替换时发生错误!", resultStr = "" }); } } /// <summary> /// 进行敏感词判断 /// </summary> /// <param name="sourctText">需要脱敏的文本内容</param> /// <returns></returns> [HttpPost] public IActionResult whether_sensitive_words(string sourctText) { string resultStr = ""; //实例化敏感词库 subContentCheck strCheck = new subContentCheck(_hostingEnv); if (string.IsNullOrEmpty(sourctText)) { return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr }); } try { resultStr = strCheck.FindSensitiveKey(sourctText); string resMsg = ""; int resCode = 200; if (resultStr == "0") { resCode = 210; resultStr = ""; resMsg = "设置违禁词时发生错误,请联系管理员!"; } else if (resultStr == "1") { resCode = 240; resultStr = ""; resMsg = "敏感内容匹配时发生错误!"; } else if (resultStr == "2") { resCode = 260; resultStr = ""; resMsg = "需要判断的文本内容为空!"; } else if (resultStr == "3") { resCode = 270; resultStr = ""; resMsg = "内容中含有敏感/违禁词!"; } else { resCode = 200; resMsg = "内容中不含敏感/违禁词!"; } return Json(new { code = resCode, msg = resMsg, resultStr = resultStr }); } catch (Exception ex) { return Json(new { code = 220, msg = "敏感内容匹配时发生错误!", resultStr = "" }); } }
六、前端封装JS方法
/** * 敏感词/违禁词替换 * @param {string} sourctText 需要进行替换的内容 * @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名 * @param {object} layui Layui实例 * @returns 替换之后的文本内容 */ function sensitive_words_replace(sourctText, boxid, layui) { let resultStr = ""; //let url = ["/Home/sensitive_words_replace", "/Home/sensitive_words_replace1", "/Home/sensitive_words_replace2"]; $.ajax({ url: "/Home/sensitive_words_replace2",//请求后端接口的路径 dataType: "JSON", type: "POST", data: { "sourctText": sourctText }, success: function (res) { let resCode = res.code; let resMsg = res.msg; if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) { //返回数据后关闭loading layer.closeAll(); resultStr = res.resultStr; layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 }); } else if (resCode == 200 || resCode == "200") { resultStr = res.resultStr; $("#" + boxid).val(resultStr); //返回数据后关闭loading layer.closeAll(); } }, error: function (error) { //返回数据后关闭loading layer.closeAll(); layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 }); } }); return resultStr; } /** * 查询是否包含敏感/违禁词 * @param {string} sourctText 需要进行替换的内容 * @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名 * @param {object} layui Layui实例 * @returns 返回Bool;包含:“true”;不包含:“false” */ function whether_sensitive_words(sourctText, boxid, layui) { let resultBool = false; $.ajax({ url: "/Home/whether_sensitive_words",//请求后端接口的路径 dataType: "JSON", type: "POST", async: false,//此处需要注意的是要想获取ajax返回的值这个async属性必须设置成同步的,否则获取不到返回值 data: { "sourctText": sourctText }, success: function (res) { let resCode = res.code; let resMsg = res.msg; if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) { resultBool = false; layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 }); } else if (resCode == 270 || resCode == "270") { resultBool = true; } else if (resCode == 200 || resCode == "200") { resultBool = false; //返回数据后关闭loading layer.closeAll(); } }, error: function (error) { layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 }); } }); return resultBool; }