注冊(cè)|登錄

聯(lián)系電話:024-31891684  13390130939
沈陽軟件公司--沈陽軟件定制

沈陽軟件開發(fā)_沈陽軟件公司_沈陽軟件定制/軟件/最新技術(shù)

Latest technology最新技術(shù)

正文提取中用到的正則表達(dá)式

瀏覽量:2872

#region 相關(guān)正則表達(dá)式

 
/// <summary>
/// 去掉所有html標(biāo)簽
/// </summary>
private static readonly Regex FilterAll = new Regex(
@"(\[([^=]*)(=[^\]]*)?\][\s\S]*?\[/\1\])|(?<lj>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");])<a\s+[^>]*>[^<]{2,}</a>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");]))|(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)|(\&\#\d+\;)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase); //(?<Link><a[\s\S]*?</a>)|
//(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)
 
/// <summary>
/// 找出title標(biāo)簽
/// </summary>
private static readonly Regex FindTitle = new Regex(
@"<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出title標(biāo)簽內(nèi)容
/// </summary>
private static readonly Regex FindTitleContent = new Regex(
@"<\s*/?title\s*>(?<Content>[\s\S]*?)<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出h 和Strong標(biāo)簽
/// </summary>
private static readonly Regex FindHStrong = new Regex(
@"<\s*/?h\s*>|<\s*/?strong\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出p 和br標(biāo)簽
/// </summary>
private static readonly Regex FindPB = new Regex(
@"<\s*/?p\s*>|<\s*br\s*/?>|<\s*/?tr\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出nbsp標(biāo)簽
/// </summary>
private static readonly Regex FindNbsp = new Regex(
@"&nbsp",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出結(jié)尾標(biāo)簽
/// </summary>
private static readonly Regex FindS = new Regex(
@"(?<Content>[\s\S]*?)\$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出是否為標(biāo)準(zhǔn)句
/// </summary>
private static readonly Regex IsSen = new Regex(
@"[,.,。!!;;::……??《》“”""]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出是否為垃圾句[strong][h]標(biāo)簽過多的
/// </summary>
private static readonly Regex IsWs = new Regex(
@"\[\(h\)\]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出是否為垃圾句冒號(hào)和·-過多的
/// </summary>
private static readonly Regex IsWsM = new Regex(
@"\[·]|[-]|[::]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出是否為BBS特征
/// </summary>
private static readonly Regex IsBbsInfo = new Regex(
@"第[^樓]{1,50}樓|Powered\s*/?by[\s\S]*?Dvbbs|Powered\s*/?by[\s\S]*?Discuz",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 取KEYWORD
/// </summary>
private static readonly Regex mKeyWord = new Regex(
@"<meta\s*name\s*=\s*['""]?keywords['""]?\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?\s*name\s*=\s*['""]?keywords['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
 
/// <summary>
/// 取DESCRIPTION
/// </summary>
private static readonly Regex mDescription = new Regex(
@"<meta\s*name\s*=\s*['""]?description['""]?\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?\s*name\s*=\s*['""]?description['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
 
/// <summary>
/// 取Tags
/// </summary>
private static readonly Regex mTag = new Regex(
@"<meta\s*name\s*=\s*['""]?tagwords['""]?\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?\s*name\s*=\s*['""]?tagwords['""]?\s*[^>]*>
", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出是否為垃圾句:后字符號(hào)過少,:號(hào)前無“說”字,:號(hào)后無"關(guān)于"
/// </summary>
private static readonly Regex IsWsMM = new Regex(
@"^[^說\s]{0,8}?[::].{0,10}$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出spider寫入的url標(biāo)記
/// </summary>
private static readonly Regex txtUrl = new Regex(
@"當(dāng)前URL為:http://(?<URL>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
/// <summary>
/// 找出spider寫入的錨點(diǎn)描述標(biāo)記
/// </summary>
private static readonly Regex txtDescription = new Regex(
@"當(dāng)前鏈接描述為:(?<Describe>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
 
///// <summary>
///// 取需要a標(biāo)簽
///// </summary>
//private static readonly Regex cleanFirst = new Regex(
// @"([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])(?<Robbish1><a\s+[^>]*>)[^<]{1,6}(?<Robbish2></a>)([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
 
#endregion
 

CRM定制 辦公OA找沈陽易勢(shì)科技有限公司

沈陽團(tuán)購(gòu)網(wǎng)|營(yíng)口網(wǎng)站制作|沈陽軟件公司|軟件定制|網(wǎng)站建設(shè)|加盟易勢(shì)|提交問題

主站蜘蛛池模板: 国产午夜三级一区二区三| 少妇人妻综合久久中文字幕| 亚洲欧美日韩另类在线一| 性色欲情网站iwww| 久久综合国产乱子伦精品免费 | 亚洲一区二区精品视频| 狠狠色综合7777久夜色撩人| 四虎成人国产精品视频| 777精品成人影院| 成年女人免费视频播放体验区| 亚洲A∨无码一区二区三区| 欧美色成人综合| 免费观看的毛片| 老司机67194精品线观看| 国产午夜三级一区二区三| 久夜色精品国产一区二区三区| 国产视频福利一区| 久久丫精品国产亚洲av| 最近最好的中文字幕2019免费| 亚洲福利视频一区| 视频一区二区在线观看| 国产欧美色一区二区三区| 51久久夜色精品国产| 大学生被内谢粉嫩无套| 一区二区精品视频| 成视频年人黄网站免费视频| 久久人妻av无码中文专区| 最近中文字幕免费完整| 亚洲午夜国产精品无码老牛影视| 老子影院午夜伦手机不四虎| 国产在线资源站| 日本dhxxxxxdh14日本| 国产精品无码无卡无需播放器| 99国产欧美久久精品| 好男人神马视频在线观看| 亚洲18在线天美| 欧美军人男男同videos可播放| 亚洲福利一区二区精品秒拍| 翁熄止痒婉艳隔壁老李头| 国产乱码精品一区二区三区四川人| 黄色毛片免费在线观看|