[程序吧 www.qhttl.com]懒人图库和模板王两大站采集神器_.NET_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > .NET > [程序吧 www.qhttl.com]懒人图库和模板王两大站采集神器

[程序吧 www.qhttl.com]懒人图库和模板王两大站采集神器

 2014/9/7 10:13:19  黑子哥  程序员俱乐部  我要评论(0)
  • 摘要:代码是由Asp.net开发的一个采集器,采集功能不是很强,是我去年采集写的,不完善,但是能能够采集和下载,采集方法是用正则表达式代码就不详细讲解了,需要学习采集的朋友自己可以研究代码去年写的,不喜勿喷不说废话:先上下载地址:懒人图库采集源码好像加密了;如果有加密请输入解压密码:www.qhttl.com主页效果:采集结果实体:publicclassJsModel{publicstringName{get;set;}publicstringRemark{get;set;
  • 标签:程序 模板

代码是由Asp.net开发的一个采集器,采集功能不是很强,是我去年采集写的,不完善,但是能能够采集和下载,采集方法是用正则表达式

代码就不详细讲解了,需要学习采集的朋友自己可以研究

代码去年写的,不喜勿喷

不说废话:先上下载地址:懒人图库采集

源码好像加密了;如果有加密请输入解压密码:www.qhttl.com主页效果:

采集结果实体:

    public class JsModel
    {
        public string Name { get; set; }

        public string Remark { get; set; }

        public string Code { get; set; }

        public string Type { get; set; }

        public byte[] FileData { get; set; }

        public string File { get; set; }

        public string Image { get; set; }

        public byte[] ImageData { get; set; }

        public int ID { get; set; }

        public bool IsDownFile { get; set; }
    }

 

js采集源码:

代码统一实现Thief借口即可,主要采集函数Get()

public class JsLr : Thief
    {
        protected string Link { get { return base.getAttr("config/lanren/js"); } }

        protected string Encod { get { return base.GetNodeText("config/lanren/js/Encoder"); } }

        protected string[] ErrorPage { get { return base.GetNodeText("config/lanren/js/errorPage").Split(','); } }

        private int PageIndex;

        private static JsLr this_JsLr;

        public static JsLr Create()
        {
            if (this_JsLr == null)
            {
                this_JsLr = new JsLr();
            }
            return this_JsLr;
        }

        private JsLr()
        {
            Jses = new Queue<Model.JsModel>();
            _dir = Context.Server.MapPath("/Download/");
            PageIndex = 1;
            IsComplate = false;
            //GetJsAsync = new Thread(GetJsAll);
            //DownAsync = new Thread(DownFile);
        }

        private bool isErrorPage()
        {
            foreach (string s in ErrorPage)
            {
                if (s == PageIndex.ToString())
                {
                    return true;
                }
            }
            return false;
        }

        public override void Get()
        {
            if (IsAsync)
            {
                //if (GetJsAsync.ThreadState == ThreadState.AbortRequested || GetJsAsync.ThreadState == ThreadState.Aborted)
                GetJsAsync = new Thread(GetJsAll);
                GetJsAsync.Start();
            }
            else
                GetJsAll();
        }
        private void GetJsAll()
        {
            if (!IsComplate)
            {
                if (isErrorPage())
                {
                    PageIndex++;
                    GetJsAll();
                }
                string html = GetHttpString(string.Format(Link, PageIndex), Encod);
                html = html.Replace("\n", "").Replace("\r", "").Replace("\t", "");
                if (html.Contains(" class=\"zuozhe\""))
                {
                    Model.JsModel model = new Model.JsModel();
                    model.Type = "js";
                    model.Name = DelHtml(ReplaceEx(html, "<div[^<>]*title.*?<h1>(?<name>.*?)</h1>", "name").Replace("/", "_").Replace("\\", "_"));
                    if (!string.IsNullOrEmpty(model.Name) && !File.Exists(_dir + "js/" + model.Name + ".zip") && !File.Exists(_dir + "js/" + "Html/" + model.Name + ".html"))
                    {
                        //获取下载文件地址
                        model.File = ReplaceEx(html, "<a[^<>]*bt-green.*?href.*?[\"'](?<url>.*?)[\"'].*?>.*?</a>", "url");
                        if (!string.IsNullOrEmpty(model.File) && model.File != "#")
                        {
                            model.IsDownFile = true;
                            model.File = "http://www.lanrentuku.com" + model.File;
                            //string[] images=ReplaceEx(html,"<div[^<>]*class=['\"]l['\"].*?img.*?src.*?[\"'](?<image>.*?)[\"'].*?>")
                            model.Image = ReplaceEx(html, "<div.*?content-js.*?[^<>]*class=['\"]l['\"].*?img.*?src.*?[\"'](?<image>.*?)[\"'].*?>", "image");
                        }
                        else
                        {
                            model.Remark = ReplaceEx(html, "<div[^<>]*content-tx.*?textarea.*?>(?<remark>.*?)</textarea>", "remark").Replace("懒人图库", "程序吧").Replace("lanrentuku.com", "qhttl.cn");
                        }
                        model.ID = PageIndex;
                        lock (Jses)
                        {
                            Jses.Enqueue(model);
                        }
                        //SendThiefModel(model);
                        lock (MsgList)
                        {
                            MsgList.Enqueue(new Model.MsgModel() { Type = "js", IsDown = false, Model = model });
                        }
                        Thread.Sleep(50);//休息
                    } PageIndex++;
                    GetJsAll();
                }
            }
            this.IsComplate = true;
            if (IsAsync)
            {
                Thread.CurrentThread.Abort();
            }
        }

        public override void DownLoad()
        {
            if (IsAsync)
            {
                //if (DownAsync.ThreadState == ThreadState.AbortRequested || DownAsync.ThreadState == ThreadState.Aborted)
                DownAsync = new Thread(DownFile);
                DownAsync.Start();
            }
            else
                DownFile();
        }
    }

素材采集:

public class ScLr : Thief
    {
        protected string Link { get { return base.getAttr("config/lanren/sc"); } }

        protected string Encod { get { return base.GetNodeText("config/lanren/sc/Encoder"); } }

        protected string[] ErrorPage { get { return base.GetNodeText("config/lanren/sc/errorPage").Split(','); } }

        private string[] TypeSc = new string[] { "bg", "gif", "psd", "qq", "png", "vector" };

        private string CurrType { get; set; }

        private int PageIndex;

        private int Count;

        private static ScLr this_scLr;

        public static ScLr Create()
        {
            if (this_scLr == null)
                this_scLr = new ScLr();
            return this_scLr;
        }
        private ScLr()
        {
            Jses = new Queue<Model.JsModel>();
            _dir = Context.Server.MapPath("/Download/");
            PageIndex = 1;
            IsComplate = false;
            //GetJsAsync = new Thread(GetScByType);
            //DownAsync = new Thread(DownFile);
        }

        public override void Get()
        {
            if (IsAsync)
            {
                //if (GetJsAsync.ThreadState == ThreadState.AbortRequested || GetJsAsync.ThreadState == ThreadState.Aborted)
                GetJsAsync = new Thread(GetScByType);
                GetJsAsync.Start();
            }
            else
                GetScAll();
        }

        public override void DownLoad()
        {
            if (IsAsync)
            {
                //if (DownAsync.ThreadState == ThreadState.AbortRequested || DownAsync.ThreadState == ThreadState.Aborted)
                DownAsync = new Thread(DownFile);
                DownAsync.Start();
            }
            else
                DownFile();
        }

        private bool isErrorPage()
        {
            foreach (string s in ErrorPage)
            {
                if (s == PageIndex.ToString())
                {
                    return true;
                }
            }
            return false;
        }

        private void GetScByType()
        {
            if (!IsComplate)
            {
                foreach (string s in TypeSc)
                {
                    CurrType = s;
                    PageIndex = 1;
                    GetScAll();
                }
            }
            this.IsComplate = true;
            if (IsAsync)
            {
                Thread.CurrentThread.Abort();
            }
        }

        private void GetScAll()
        {

            if (isErrorPage())
            {
                PageIndex++;
                GetScAll();
            }
            string html = GetHttpString(string.Format(Link, PageIndex, CurrType), Encod);
            html = html.Replace("\n", "").Replace("\r", "").Replace("\t", "");
            string ddes = ReplaceEx(html, "<div[^<>]*?list-pic.*?</div>", 0);// ReplaceEx(html, "list-pic.*?(?<dds><dd.*?/dd>).*?/div>", "dds");
            string[] dds = ReplaceEx(ddes, "<dd.*?/dd>");
            foreach (string s in dds)
            {
                JsModel model = new JsModel();
                model.ID = ++Count;
                model.Type = CurrType;
                model.Name = ReplaceEx(s, "<a.*>(?<name>[^<>]*)</a>", "name").Replace("/", "_").Replace("\\", "_");
                model.Image = ReplaceEx(s, "<a.*src.*?[\"'](?<src>.*?)[\"'].*?</a>", "src");
                if (!model.Image.Contains("http://img.lanrentuku.com"))
                {
                    model.Image = "http://img.lanrentuku.com" + model.Image;
                }
                string link = ReplaceEx(s, "<a[^<>]*href.*?[\"'](?<href>.*?)[\"'].*?</a>", "href");
                html = GetHttpString("http://www.lanrentuku.com" + link, Encod);
                if (!string.IsNullOrEmpty(model.Name) && !File.Exists(_dir + CurrType + "/" + model.Name + ".zip"))
                {
                    //获取下载文件地址
                    model.File = ReplaceEx(html, "<a[^<>]*bt-green.*?href.*?[\"'](?<url>.*?)[\"'].*?>.*?</a>", "url");
                    if (!string.IsNullOrEmpty(model.File) && model.File != "#")
                    {
                        model.IsDownFile = true;
                        model.File = model.File;
                    }
                    else
                    {
                        model.Remark = ReplaceEx(html, "<div[^<>]*content-tx.*?textarea.*?>(?<remark>.*?)</textarea>", "remark").Replace("懒人图库", "程序吧").Replace("lanrentuku.com", "qhttl.cn");
                    }
                    //model.ID = PageIndex;
                    lock (Jses)
                    {
                        Jses.Enqueue(model);
                    }
                    //SendThiefModel(model);
                    lock (MsgList)
                    {
                        MsgList.Enqueue(new Model.MsgModel() { Type = CurrType, IsDown = false, Model = model });
                    }
                    //Thread.Sleep(50);//休息两百毫秒
                }
            }
            if (dds.Length > 0)
            {
                PageIndex++;
                GetScAll();
            }
        }
    }
发表评论
用户名: 匿名