init project

2022-09-10 00:10:10 +08:00
commit b68a6dd5b8
8 changed files with 1713 additions and 0 deletions
--- a/pom.xml
+++ b/pom.xml
@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>com.hitoli.fetchPic</groupId>
+    <artifactId>fetchPic</artifactId>
+    <version>1.0-SNAPSHOT</version>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+        <java.version>1.8</java.version>
+
+        <maven-compiler.version>3.7.0</maven-compiler.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpmime</artifactId>
+            <version>4.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>1.9</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <version>1.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>fluent-hc</artifactId>
+            <version>4.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient-cache</artifactId>
+            <version>4.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient-win</artifactId>
+            <version>4.5.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpcore</artifactId>
+            <version>4.4.6</version>
+        </dependency>
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.11.3</version>
+        </dependency>
+        <dependency>
+            <groupId>net.sourceforge.htmlunit</groupId>
+            <artifactId>htmlunit</artifactId>
+            <version>2.32</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.7</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>${maven-compiler.version}</version>
+                    <configuration>
+                        <source>${java.version}</source>
+                        <target>${java.version}</target>
+                    </configuration>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+
+</project>
--- a/src/main/java/META-INF/MANIFEST.MF
+++ b/src/main/java/META-INF/MANIFEST.MF
@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: com.hitoli.fetchPic.Main
+
--- a/src/main/java/com/hitoli/fetchPic/DownLoad.java
+++ b/src/main/java/com/hitoli/fetchPic/DownLoad.java
@ -0,0 +1,166 @@
+package com.hitoli.fetchPic;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.SocketTimeoutException;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpEntity;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+
+public class DownLoad {
+
+    public static RequestConfig defaultRequestConfig = RequestConfig.custom()
+            .setSocketTimeout(5000)
+            .setConnectTimeout(5000)
+            .setConnectionRequestTimeout(5000)
+            .setStaleConnectionCheckEnabled(true)
+            .setRedirectsEnabled(true)
+            .setMaxRedirects(3)
+            .build();
+
+    public static CloseableHttpClient httpClient = HttpClients.custom().
+            setDefaultRequestConfig(defaultRequestConfig).build();
+
+    public static void downloadPict(PictInfo pictInfo) {
+
+        String url = pictInfo.getUrl();
+        CloseableHttpResponse response = null;
+        OutputStream out = null;
+        InputStream in=null;
+        BufferedReader br=null;
+        byte buffer[] = new byte[1024];
+        if(StringUtils.isNotEmpty(url)){
+            try {
+                String suffix = url.substring(url.lastIndexOf("."));
+                String temp = suffix.substring(1, suffix.length()).toUpperCase();
+
+                if (!(temp.equals("BMP") || temp.equals("JPG") || temp.equals("JPEG") || temp.equals("GIF") ||
+                        temp.equals("PNG") || temp.equals("WEBP"))) { //非图片的丢弃
+                    return;
+                }
+                String name = url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."));
+                if (StringUtils.isEmpty(name)) {
+                    name = String.valueOf(System.currentTimeMillis());
+                } else {
+                    name = Utils.specialSymbolRemoval(name + "_" + System.currentTimeMillis());
+                }
+                HttpGet httpGet = new HttpGet(url);
+                httpGet.setConfig(defaultRequestConfig);
+                if (StringUtils.isNotEmpty(Main.refererUrl)) {
+                    if (Main.refererUrl.equalsIgnoreCase("self")) {
+                        httpGet.setHeader("referer", pictInfo.getHtmlUrl());
+                    } else {
+                        httpGet.setHeader("referer", Main.refererUrl);
+                    }
+                }
+                response = httpClient.execute(httpGet);
+                HttpEntity entity = response.getEntity();
+                long imgSize = entity.getContentLength();
+                if (imgSize < Main.imgMinSize*1024) { //默认图片小于1KB的丢弃
+                    throw new Exception("图片只有" + (imgSize/1024) + "KB，小于" + Main.imgMinSize + "KB");
+                }
+                in = entity.getContent();
+                String title = pictInfo.getTitle();
+                if (StringUtils.isEmpty(title)) {
+                    title = "other";
+                } else {
+                    title = Utils.specialSymbolRemoval(title);
+                }
+                File file = Utils.createImgFile(Utils.getUseFileNameShortSite(pictInfo.getSite()), title, name, suffix);
+                Utils.info("正在下载：" + url);
+                out = new FileOutputStream(file);
+                int index = 0;
+                while((index = in.read(buffer)) != -1){
+                    out.write(buffer,0,index);
+                }
+                out.flush();
+
+                Main.imgDownloaded.add(url);
+            } catch (Exception e) {
+                Utils.error("下载失败：" + url + " [" + e.getMessage() + "]");
+                if (e instanceof SocketTimeoutException) {
+                    Main.readTimeOutImgs.add(pictInfo);
+                }
+            } finally {
+                try {
+                    if (br != null){
+                        br.close();
+                    }
+                    if (out != null){
+                        out.close();
+                    }
+                    if (in != null){
+                        in.close();
+                    }
+                    if (response != null) {
+                        response.close();
+                    }
+                } catch (IOException e) {
+                    e.printStackTrace();
+                    Utils.error(e.getMessage());
+                }
+            }
+        }
+
+    }
+
+    public static void downloadPict(List<PictInfo> pictLinks){
+
+        if (null != pictLinks && !pictLinks.isEmpty()) {
+            if (Main.thread) {
+                for (int i=0; i<pictLinks.size(); i++) {
+                    PictInfo pictInfo = pictLinks.get(i);
+                    if (pictInfo != null && !Main.imgDownloaded.contains(pictInfo.getUrl())) {
+                        new Thread(new DownloadPictRunnable(pictInfo)).start();
+                    }
+                    if ((i+1)%Main.threadSize == 0) {
+                        try {
+                            Thread.sleep(Main.threadSleep*1000);
+                        } catch (InterruptedException e) {
+                            e.printStackTrace();
+                        }
+                    }
+                }
+            } else {
+                for (PictInfo pictInfo : pictLinks) {
+                    if (pictInfo != null && !Main.imgDownloaded.contains(pictInfo.getUrl())) {
+                        downloadPict(pictInfo);
+                    }
+                }
+            }
+
+            pictLinks.clear();
+        }
+
+    }
+
+    public static class DownloadPictRunnable implements Runnable {
+
+        private PictInfo pictInfo;
+
+        public DownloadPictRunnable(PictInfo pictInfo) {
+            this.pictInfo = pictInfo;
+        }
+
+        @Override
+        public void run() {
+            DownLoad.downloadPict(pictInfo);
+        }
+
+    }
+
+}
+
+
+
+
--- a/src/main/java/com/hitoli/fetchPic/FindLink.java
+++ b/src/main/java/com/hitoli/fetchPic/FindLink.java
@ -0,0 +1,255 @@
+package com.hitoli.fetchPic;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+
+public class FindLink {
+
+    /**
+     * 找出url下的所有a标签连接
+     * @param url
+     * @param pageLinks
+     * @param lazyPageLinks 非图片链接延迟扫描
+     * @param connectExceptionRetryCount 重试次数
+     */
+    public static boolean addPageLink(String url, List<String> pageLinks, List<String> lazyPageLinks, int connectExceptionRetryCount) {
+
+        boolean result = true;
+        if (!Main.pageSet.contains(url)) { //已分析过的连接不再分析
+            Utils.info("开始分析url[" + url + "]中的可用连接");
+            try {
+                Document document = HttpUtils.getInstance().getHtmlPageResponseAsDocument(url);
+                String site = Utils.getSite(url);
+                Elements elements = document.select("a");
+                boolean findNext = false;//是否找到下一页
+                Set<String> allUrls = new HashSet<String>();//当前访问url下的所有链接地址
+                for (Element element : elements) {
+                    String href = element.attr("href");
+                    if (!"".equals(href) && !"#".equals(href) && href.indexOf(".js") == -1 && href.indexOf(".css") == -1
+                            && href.indexOf("javascript") == -1) {
+                        href = Utils.getFullPath(site, url, href);
+                        if (StringUtils.isNotEmpty(Main.fixedUrlPrefix)) {
+                            if (!href.startsWith(Main.fixedUrlPrefix)) {
+                                continue;
+                            }
+                        }
+                        if (href.equals(url) || Main.pageSet.contains(href)) {
+                            continue;
+                        }
+                        if (!Utils.checkSite(href)) {
+                            continue;
+                        }
+                        Elements imgs = element.select("img");
+                        boolean discard = false;
+                        for (Element img : imgs) {
+                            //A标签的href地址和图片地址一致，说明是图片地址，不用再对该地址进行分析
+                            if (Utils.getFullPath(site, url, img.attr("src")).equals(href)) {
+                                discard = true;
+                                break;
+                            }
+                        }
+                        if (discard) {
+                            continue;
+                        }
+                        allUrls.add(href);
+                        String text = element.text().trim();
+                        String title = element.attr("title");
+                        if (Utils.isNextPageButton(text) || Utils.isNextPageButton(title)) { //始终优先处理下一页的内容
+                            pageLinks.add(1, href);
+                            findNext = true;
+                        } else {
+                            //通过父元素的class判断url是否是头布局中的链接（头布局中的链接延迟扫描）
+                            boolean head = false;
+                            Elements parents = element.parents();
+                            for (Element parent : parents) {
+                                for (String className : parent.classNames()) {
+                                    if (className.equalsIgnoreCase("head") ||
+                                            className.equalsIgnoreCase("header") ||
+                                            className.equalsIgnoreCase("logo")) {
+                                        head = true;
+                                        break;
+                                    }
+                                    if (head) {
+                                        break;
+                                    }
+                                }
+                            }
+                            if (head || imgs.isEmpty() || Utils.isSite(href)) { //非图片链接延迟扫描
+                                if (!lazyPageLinks.contains(href)) {
+                                    lazyPageLinks.add(href);
+                                }
+                            } else {
+                                if (!pageLinks.contains(href)) {
+                                    pageLinks.add(href);
+                                }
+                            }
+                        }
+                    }
+                }
+                /**
+                 * 通过比对url/到点之间的数字，大于当前页数字，并且小于所有获取的数字则是下一页
+                 * (
+                 * 如xxx/xxx_0.html,xxx/xxx_1.html,xxx/xxx_2.html，
+                 * 当前页是xxx/xxx_0.html
+                 * 获取xxx_0,xxx_1,xxx_2
+                 * 获取数字0，1，2
+                 * 比0大的数中最小的为下一页
+                 * )
+                 */
+                if (!findNext && !allUrls.isEmpty()) {
+                    String _urlStart = url.substring(0, url.lastIndexOf("/")+1);
+                    String _urlEnd = url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."));
+                    String urlNumberStr = "";
+                    for(int i=0; i<_urlEnd.length(); i++){
+                        if(_urlEnd.charAt(i) >= 48 && _urlEnd.charAt(i) <= 57) {
+                            urlNumberStr += _urlEnd.charAt(i);
+                        }
+                    }
+                    Long urlNumber = Long.valueOf(urlNumberStr);
+                    Long nextPageNumber = 0l;
+                    String nextPageUrl = "";
+                    for (String s : allUrls) {
+                        if (!url.equalsIgnoreCase(s) && s.indexOf(_urlStart) != -1) {
+                            String _sEnd = s.substring(s.lastIndexOf("/")+1, s.lastIndexOf("."));
+                            String _sNumberStr = "";
+                            for(int i=0; i<_sEnd.length(); i++) {
+                                if (_sEnd.charAt(i) >= 48 && _sEnd.charAt(i) <= 57) {
+                                    _sNumberStr += _sEnd.charAt(i);
+                                }
+                            }
+                            Long _sNumber = Long.valueOf(_sNumberStr);
+                            if (nextPageUrl == "") {
+                                if (_sNumber.intValue() > urlNumber) {
+                                    nextPageNumber = _sNumber;
+                                    nextPageUrl = s;
+                                }
+                            } else {
+                                if (_sNumber < nextPageNumber) {
+                                    nextPageNumber = _sNumber;
+                                    nextPageUrl = s;
+                                }
+                            }
+                        }
+                    }
+                    if (nextPageUrl != "") {
+                        pageLinks.add(1, nextPageUrl);
+                        findNext = true;
+                    }
+                }
+            } catch (Exception e) {
+                if (connectExceptionRetryCount <= 0) {
+                    Utils.error("无效地址：" + url);
+                    result = false;
+                } else {
+                    Utils.error("重试访问地址：" + url + "第" + (5 - connectExceptionRetryCount + 1) + "次");
+                    connectExceptionRetryCount--;
+                    return addPageLink(url, pageLinks, lazyPageLinks, connectExceptionRetryCount);
+                }
+            }
+            Utils.info("分析url[" + url + "]中的可用连接结束");
+            Main.pageSet.add(url);
+        }
+
+        return result;
+
+    }
+
+    /**
+     * 找出url下的所有图片连接
+     * @param url
+     * @param pictInfos
+     * @param nextPageUrl
+     * @param connectExceptionRetryCount 重试次数
+     */
+    public static boolean addPictLink(String url, List<PictInfo> pictInfos, String nextPageUrl, int connectExceptionRetryCount) {
+
+        boolean result = true;
+        if (null == nextPageUrl) {
+            nextPageUrl = "";
+        }
+        Utils.info("开始分析url[" + url + "]中的可用图片连接");
+        try {
+            Document document = HttpUtils.getInstance().getHtmlPageResponseAsDocument(url);
+            String site = Utils.getSite(url);
+            Elements elements = document.select("img");
+            Element head = document.head();
+            Elements titles = head.getElementsByTag("title");
+            String title = "";
+            if (null != titles && !titles.isEmpty()) {
+                title = titles.get(0).text();
+                if (StringUtils.isNotEmpty(title)) {
+                    if (null != Main.pageTitlefilters && !Main.pageTitlefilters.isEmpty()) {
+                        for (String filter : Main.pageTitlefilters) {
+                            title = title.replaceAll(filter, "");
+                        }
+                    }
+                }
+            }
+            for (Element element : elements) {
+                String imgAttrName = "src";
+                if (null != Main.imgSrcRepletTags && !Main.imgSrcRepletTags.isEmpty()) {
+                    for (String tag : Main.imgSrcRepletTags) {
+                        if (element.hasAttr(tag)) {
+                            imgAttrName = tag;
+                            break;
+                        }
+                    }
+                }
+                String src = element.attr(imgAttrName);
+                if (StringUtils.isNotEmpty(src) && src.toUpperCase().indexOf("JAVASCRIPT") == -1) {
+                    if (!Main.imgNamefilters.isEmpty()) { //检查是否存在与要丢弃的图片名称中
+                        boolean discard = false;
+                        try {
+                            String temp = src.substring(src.lastIndexOf("/") + 1,
+                                    src.lastIndexOf(".")).toUpperCase();
+                            for (String imgNamefilter : Main.imgNamefilters) {
+                                if (temp.equals(imgNamefilter)) {
+                                    discard = true;
+                                    break;
+                                }
+                            }
+                        } catch (Exception e) {
+                        }
+                        if (discard) {
+                            continue;
+                        }
+                    }
+                    src = Utils.getFullPath(site, url, src);
+                    Element parent = element.parent();
+                    if (parent.tagName().toUpperCase().equals("A")) {
+                        //一般A标签下的img都是预览图片，但有些网站的a标签下的图片和a标签href地址一致，不是预览图片
+                        //有的图片a标签的href是下一页地址
+                        String parentUrl = parent.attr("href");
+                        String href = Utils.getFullPath(site, url, parentUrl);
+                        if (StringUtils.isNotEmpty(parentUrl) && !href.equals(src) && !nextPageUrl.equals(href)) {
+                            continue;
+                        }
+                    }
+                    if (!Main.imgDownloaded.contains(src)) {
+                        pictInfos.add(new PictInfo(site, title, src, url));
+                    }
+                }
+            }
+        } catch (Exception e) {
+            if (connectExceptionRetryCount <= 0) {
+                Utils.error("无效地址：" + url);
+                result = false;
+            } else {
+                Utils.error("重试访问地址：" + url + "第" + (5 - connectExceptionRetryCount + 1) + "次");
+                connectExceptionRetryCount--;
+                addPictLink(url, pictInfos, nextPageUrl, connectExceptionRetryCount);
+            }
+        }
+        Utils.info("分析url[" + url + "]中的可用图片连接结束");
+
+        return result;
+    }
+
+}
--- a/src/main/java/com/hitoli/fetchPic/HttpUtils.java
+++ b/src/main/java/com/hitoli/fetchPic/HttpUtils.java
@ -0,0 +1,115 @@
+package com.hitoli.fetchPic;
+
+import com.gargoylesoftware.htmlunit.BrowserVersion;
+import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+
+/**
+ * <pre>
+ * Http工具，包含：
+ * 高级http工具(使用net.sourceforge.htmlunit获取完整的html页面,即完成后台js代码的运行)
+ * </pre>
+ */
+public class HttpUtils {
+
+    private WebClient webClient;
+
+    /**
+     * 等待异步JS执行时间
+     */
+    private int waitForBackgroundJavaScript;
+
+    private static HttpUtils httpUtils;
+
+    private HttpUtils() {
+    }
+
+    /**
+     * 获取实例
+     *
+     * @return
+     */
+    public static HttpUtils getInstance() {
+        if (httpUtils == null) {
+            httpUtils = new HttpUtils();
+        }
+        return httpUtils;
+    }
+
+    /**
+     *
+     * @param browserTimeout 浏览器请求超时时间
+     * @param jsTimeout js请求超时时间
+     * @param jsEnabled 是否启用js
+     * @param waitForBackgroundJavaScript 等待异步JS执行时间
+     */
+    public void initWebClient(int browserTimeout, int jsTimeout, boolean jsEnabled, int waitForBackgroundJavaScript) {
+        this.waitForBackgroundJavaScript = waitForBackgroundJavaScript;
+
+        webClient = new WebClient(BrowserVersion.CHROME);
+
+        webClient.getOptions().setThrowExceptionOnScriptError(false);//当JS执行出错的时候是否抛出异常
+        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);//当HTTP的状态非200时是否抛出异常
+        webClient.getOptions().setActiveXNative(false);
+        webClient.getOptions().setCssEnabled(false);//是否启用CSS
+
+        if (jsEnabled) {
+            webClient.getOptions().setJavaScriptEnabled(true); //很重要，启用JS
+            webClient.setAjaxController(new NicelyResynchronizingAjaxController());//很重要，设置支持AJAX
+        } else {
+            webClient.getOptions().setJavaScriptEnabled(false);
+        }
+
+        webClient.getOptions().setTimeout(browserTimeout);//设置“浏览器”的请求超时时间
+        webClient.setJavaScriptTimeout(jsTimeout);//设置JS执行的超时时间
+    }
+
+    /**
+     * 将网页返回为解析后的文档格式
+     *
+     * @param html
+     * @return
+     * @throws Exception
+     */
+    public static Document parseHtmlToDoc(String html) throws Exception {
+        return removeHtmlSpace(html);
+    }
+
+    private static Document removeHtmlSpace(String str) {
+        Document doc = Jsoup.parse(str);
+        String result = doc.html().replace("&nbsp;", "");
+        return Jsoup.parse(result);
+    }
+
+    /**
+     * 获取页面文档字串(等待异步JS执行)
+     *
+     * @param url 页面URL
+     * @return
+     * @throws Exception
+     */
+    public String getHtmlPageResponse(String url) throws Exception {
+        HtmlPage page;
+        try {
+            page = webClient.getPage(url);
+        } catch (Exception e) {
+            throw e;
+        }
+        webClient.waitForBackgroundJavaScript(waitForBackgroundJavaScript);//该方法阻塞线程
+        return page.asXml();
+    }
+
+    /**
+     * 获取页面文档Document对象(等待异步JS执行)
+     *
+     * @param url 页面URL
+     * @return
+     * @throws Exception
+     */
+    public Document getHtmlPageResponseAsDocument(String url) throws Exception {
+        return parseHtmlToDoc(getHtmlPageResponse(url));
+    }
+}
--- a/src/main/java/com/hitoli/fetchPic/Main.java
+++ b/src/main/java/com/hitoli/fetchPic/Main.java
@ -0,0 +1,417 @@
+package com.hitoli.fetchPic;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+
+public class Main {
+
+    public static Set<String> sites = new HashSet<String>(); //所有网站根（防止访问地址越界）
+    public static Set<String> transboundarySites = new HashSet<String>(); //可越界的网站根
+    public static Set<String> pageSet = new HashSet<String>(); //所有已爬过的地址
+    public static Set<PictInfo> readTimeOutImgs = new HashSet<PictInfo>(); //所有读取超时的图片
+    public static Set<String> imgDownloaded = new HashSet<String>(); //已经下载的图片地址
+    public static Set<String> imgDownloadedDirName = new HashSet<String>();//已经下载的图片目录名称
+    public static String home = null;
+    public static String downLoadDir = null; //下载目录
+    public static String siteData = null; //网站可以扫描的域名地址记录文件（防止越界）
+    public static String downLoadedImgData = null; //已经下载的图片地址记录文件
+    public static String downloadedImgDirNameData = null; //已经下载的图片目录名称记录文件
+    public static String allowDifferentCharacters = "0";//比对下载图片的title和已经下载的图片目录名称记录允许相差的字符数（用于检查图片存放路径的目录是否已经存在）
+    public static String readTimeOutImgData = null; //所有读取超时的图片地址记录文件
+    public static String allUrlData = null; //所有已爬过的地址记录文件
+    public static String currentUrlData = null; //当前正在处理的地址记录文件
+    public static String lazyUrlData = null; //延迟处理的地址记录文件
+    public static String commandFile = null;//命令文件（每次循环pageLinks检查一次）
+    public static long imgMinSize = 1; //下载图片最小单位（KB）
+    public static List<String> nextPageNames = Arrays.stream(
+                new String[]{"下一页", "下一篇", "下一章", "后", "NEXT", ">", ">>", ">>>"}
+            ).collect(Collectors.toList()); //下一页按钮中的text
+    public static List<String> pageTitlefilters = Arrays.stream(
+            new String[]{"(\\第\\d+\\页)"}
+    ).collect(Collectors.toList()); //title中需要过滤的关键字，过滤全部关键字
+    public static List<String> imgNamefilters = Arrays.stream(
+            new String[]{"LOGO", "FAVICON"}
+    ).collect(Collectors.toList()); //下载图片中需要丢弃的图片名称
+    public static List<String> imgSrcRepletTags  = Arrays.stream(
+            new String[]{"original"}
+    ).collect(Collectors.toList()); //下载图片的链接非src标签,如果存在此list中的标签名，则获取对应标签的值为下载链接
+    public static String refererUrl = "self"; //下载图片时需要模拟的来源url（如有的情况下）,默认为self（发现图片的html地址）
+    public static String fixedUrlPrefix = ""; //只扫描固定前缀的url
+    public static int browserTimeout = 5000;//请求超时时间,默认5秒
+    public static int jsTimeout = 1000;//请求超时时间,默认1秒
+    public static int waitForBackgroundJavaScript = 1;//等待异步JS执行时间,默认1秒
+    public static boolean jsEnabled = true;//是否启用js
+    public static boolean thread = false; //是否多线程下载
+    public static int threadSize = 5; //一次开启的线程数
+    public static int threadSleep = 1; //开满线程数后等待多少秒
+    public static boolean stop = false; //是否退出
+    public static boolean autoSaveMemoryData = true; //自动保存内存数据到文件
+    public static int autoSaveMemoryDataInterval = 3; //保存间隔（分钟）
+    public static Long lastAutoSaveMemoryDataTime = null; //最后一次保存时间
+
+    public static void main(String[] args) {
+
+        HttpUtils.getInstance().initWebClient(browserTimeout, jsTimeout, jsEnabled, waitForBackgroundJavaScript);
+        List<String> pageLinks = new ArrayList<String>();
+        List<String> lazyPageLinks = new ArrayList<>();
+        List<PictInfo> pictInfos = new ArrayList<PictInfo>();
+        init(args, pageLinks, lazyPageLinks, pictInfos);
+
+        int networkExceptionRetryCount = 5;//连续访问url失败次数
+        Set<String> failUrls = new HashSet<String>();
+        while(pageLinks.size() > 0){
+            int connectExceptionRetryCount = 5;//重复访问同一url失败次数
+
+            String url = pageLinks.get(0);
+            if (StringUtils.isEmpty(url)) {
+                pageLinks.remove(0);
+                continue;
+            }
+            if (StringUtils.isNotEmpty(fixedUrlPrefix)) {
+                if (!url.startsWith(fixedUrlPrefix)) {
+                    pageLinks.remove(0);
+                    continue;
+                }
+            }
+
+            //分析页面所有连接
+            boolean result = FindLink.addPageLink(url, pageLinks, lazyPageLinks, connectExceptionRetryCount);
+            //分析页面所有图片
+            if (result) {
+                networkExceptionRetryCount = 5;//访问成功，恢复次数
+                String nextPageUrl = "";
+                if (pageLinks.size() >= 2) {
+                    nextPageUrl = pageLinks.get(1);
+                }
+                result = FindLink.addPictLink(url, pictInfos, nextPageUrl, connectExceptionRetryCount);
+                if (!result) {
+                    failUrls.add(url);
+                    networkExceptionRetryCount--;
+                } else {
+                    networkExceptionRetryCount = 5;//访问成功，恢复次数
+                }
+            } else {
+                failUrls.add(url);
+                networkExceptionRetryCount--;
+            }
+            if (networkExceptionRetryCount <= 0) {
+                Utils.error("网络可能出现问题，连续访问5次不同url失败");
+                pageLinks.remove(0);
+                pageLinks.addAll(0, failUrls);
+                pageSet.removeAll(failUrls);
+                writeMemoryDataToFile(pageLinks, lazyPageLinks);
+                writeDefaultCommand();
+                System.exit(0);
+            }
+            if (pictInfos.size() > 0) {
+                Utils.info("url[" + url + "]中找到" + pictInfos.size() + "个可用图片连接");
+                DownLoad.downloadPict(pictInfos);
+            } else {
+                Utils.info("url[" + url + "]中找到0可用图片连接");
+            }
+            pageLinks.remove(0);
+
+            //图片链接扫描完后取非图片链接继续扫描
+            getLazyUrl(pageLinks, lazyPageLinks);
+
+            //检查命令文件，如果stop等于true或者pagelinks为空则停止程序
+            checkAndExecutCommand(pageLinks, lazyPageLinks);
+
+            //检查是否自动保存内存数据
+            checkAutoSaveMemoryData(pageLinks, lazyPageLinks);
+        }
+
+    }
+
+    private static void init(String[] prams, List<String> pageLinks, List<String> lazyPageLinks, List<PictInfo> pictInfos) {
+
+        if (thread) {
+            sites = Collections.synchronizedSet(new HashSet<String>());
+            pageSet = Collections.synchronizedSet(new HashSet<String>());
+            imgDownloaded = Collections.synchronizedSet(new HashSet<String>());
+            readTimeOutImgs = Collections.synchronizedSet(new HashSet<PictInfo>());
+            pageLinks = Collections.synchronizedList(new ArrayList<String>());
+            lazyPageLinks = Collections.synchronizedList(new ArrayList<String>());
+            pictInfos = Collections.synchronizedList(new ArrayList<PictInfo>());
+        }
+
+        //图片保存路径
+        home = "";
+        if (null != prams && prams.length > 0) {
+            home = prams[0];
+        }
+        System.out.println("HOME路径为：" + home);
+        if (!Utils.checkFilePathExists(home)) {
+            System.out.println("HOME路径不存在,请手动创建");
+            System.exit(1);
+        }
+
+        downLoadDir = home.endsWith(File.separator) ? (home + "pict") : (home + File.separator + "pict");
+        //创建文件下载目录
+        Utils.createDir(downLoadDir);
+        siteData = downLoadDir + File.separator + "siteData";
+        downLoadedImgData = downLoadDir + File.separator + "downLoadedImgData";
+        downloadedImgDirNameData = downLoadDir + File.separator + "downloadedImgDirNameData";
+        readTimeOutImgData = downLoadDir + File.separator + "readTimeOutImgData";
+        allUrlData = downLoadDir + File.separator + "allUrlData";
+        currentUrlData = downLoadDir + File.separator + "currentUrlData";
+        lazyUrlData = downLoadDir + File.separator + "lazyUrlData";
+        commandFile = downLoadDir + File.separator + "command";
+        //创建网站域名地址记录文件(不存在才创建)
+        Utils.createFile(siteData);
+        //创建下载记录文件(不存在才创建)
+        Utils.createFile(downLoadedImgData);
+        //创建已经下载的图片目录名称文件(不存在才创建)
+        Utils.createFile(downloadedImgDirNameData);
+        //创建读取超时记录文件(不存在才创建)
+        Utils.createFile(readTimeOutImgData);
+        //创建所有已爬过的地址记录文件(不存在才创建)
+        Utils.createFile(allUrlData);
+        //创建当前需要处理的地址记录文件(不存在才创建)
+        Utils.createFile(currentUrlData);
+        //创建延迟处理的地址记录文件(不存在才创建)
+        Utils.createFile(lazyUrlData);
+        //创建命令文件(不存在才创建)
+        Utils.createFile(commandFile);
+        //写入初始命令
+        writeDefaultCommand();
+        //读取网站域名地址记录到内存中
+        Utils.readFileDataToCollection(siteData, sites);
+        //读取已经下载的文件记录到内存中
+        Utils.readFileDataToCollection(downLoadedImgData, imgDownloaded);
+        //读取已经下载的图片目录名称到内存中
+        Utils.readFileDataToCollection(downloadedImgDirNameData, imgDownloadedDirName);
+        //读取超时文件记录到内存中
+        Utils.readFilePictInfoDataToCollection(readTimeOutImgData, readTimeOutImgs);
+        //读取所有已爬过的地址记录到内存中
+        Utils.readFileDataToCollection(allUrlData, pageSet);
+
+        //读取当前需要处理的连接地址记录到内存中
+        Utils.readFileDataToCollection(currentUrlData, pageLinks);
+        //读取延迟处理的连接地址记录到内存中
+        Utils.readFileDataToCollection(lazyUrlData, lazyPageLinks);
+        //图片链接扫描完后取非图片链接继续扫描
+        getLazyUrl(pageLinks, lazyPageLinks);
+        if (pageLinks.isEmpty()) {
+            if (null == prams || prams.length < 2) {
+                Utils.error("请输入抓取地址");
+                System.exit(1);
+            } else {
+                pageLinks.add(prams[1]);
+                sites.add(Utils.getSite(prams[1]));
+                Utils.writeStringToFile(siteData, "", false);
+                Utils.writeCollectionToFile(siteData, sites);
+            }
+        }
+
+        if (null != prams && prams.length >= 3) {
+            Integer _imgMinSize = null;
+            try {
+                _imgMinSize = Integer.valueOf(prams[2]);
+            } catch (Exception e) {
+                Utils.error("抓取最小图片大小输入有误，必须大于等于0");
+            }
+            if (_imgMinSize == null || _imgMinSize < 0) {
+                Utils.error("抓取最小图片大小输入有误，必须大于等于0");
+            } else {
+                imgMinSize = _imgMinSize;
+                Utils.error("抓取最小图片大小为" + imgMinSize + "KB");
+            }
+        }
+
+        if (autoSaveMemoryData) { //首次启动把启动时间作为最后一次自动保存时间
+            lastAutoSaveMemoryDataTime = System.currentTimeMillis();
+        }
+
+        if (!readTimeOutImgs.isEmpty()) {
+            Utils.info("重新下载上次超时的图片");
+            List<PictInfo> _pictInfos = readTimeOutImgs.stream().map(p -> {
+                return new PictInfo(p.getSite(), p.getTitle(), p.getUrl(), p.getHtmlUrl());
+            }).collect(Collectors.toList());
+            readTimeOutImgs.clear();
+            DownLoad.downloadPict(_pictInfos);
+        }
+
+    }
+
+    private static void getLazyUrl(List<String> pageLinks, List<String> lazyPageLinks) {
+        //图片链接扫描完后取非图片链接继续扫描
+        if (pageLinks.isEmpty() && !lazyPageLinks.isEmpty()) {
+            int index = 0;
+            String lazyUrl = lazyPageLinks.get(index);
+            if (Utils.isSite(lazyUrl)) {
+                if (lazyPageLinks.size() > 1) {
+                    index = 1;
+                    lazyUrl = lazyPageLinks.get(index);
+                }
+            }
+            pageLinks.add(lazyUrl);
+            lazyPageLinks.remove(index);
+        }
+    }
+
+    private static void checkAndExecutCommand(List<String> pageLinks, List<String> lazyPageLinks) {
+
+        readCommandToMemory("stop");
+
+        if (stop || pageLinks.isEmpty()) {
+            if (pageLinks.isEmpty()) {
+                Utils.info("无法找到新的url，抓取图片结束");
+            }
+            writeMemoryDataToFile(pageLinks, lazyPageLinks);
+            writeDefaultCommand();
+            System.exit(0);
+        }
+
+    }
+
+    private static void checkAutoSaveMemoryData(List<String> pageLinks, List<String> lazyPageLinks) {
+
+        if (autoSaveMemoryData) {
+            if (null == lastAutoSaveMemoryDataTime) {
+                writeMemoryDataToFile(pageLinks, lazyPageLinks);
+                writeDefaultCommand();
+                lastAutoSaveMemoryDataTime = System.currentTimeMillis();
+            } else if (System.currentTimeMillis() > (lastAutoSaveMemoryDataTime + (autoSaveMemoryDataInterval*60*1000))) {
+                writeMemoryDataToFile(pageLinks, lazyPageLinks);
+                writeDefaultCommand();
+                lastAutoSaveMemoryDataTime = System.currentTimeMillis();
+            }
+        }
+
+    }
+
+    private static void writeDefaultCommand() {
+
+        readCommandToMemory();
+
+        Utils.writeStringToFile(commandFile, "", false);
+        Utils.writeStringToFile(commandFile, "stop=false");
+        Utils.writeStringToFile(commandFile, "imgMinSize=" + imgMinSize);
+        Utils.writeStringToFile(commandFile, "nextPageNames=" + StringUtils.join(nextPageNames, ","));
+        Utils.writeStringToFile(commandFile, "pageTitlefilters=" + StringUtils.join(pageTitlefilters, ","));
+        Utils.writeStringToFile(commandFile, "imgNamefilters=" + StringUtils.join(imgNamefilters, ","));
+        Utils.writeStringToFile(commandFile, "imgSrcRepletTags=" + StringUtils.join(imgSrcRepletTags, ","));
+        Utils.writeStringToFile(commandFile, "allowDifferentCharacters=" + allowDifferentCharacters);
+        Utils.writeStringToFile(commandFile, "browserTimeout=" + browserTimeout);
+        Utils.writeStringToFile(commandFile, "jsTimeout=" + jsTimeout);
+        Utils.writeStringToFile(commandFile, "jsEnabled=" + jsEnabled);
+        Utils.writeStringToFile(commandFile, "waitForBackgroundJavaScript=" + waitForBackgroundJavaScript);
+        Utils.writeStringToFile(commandFile, "refererUrl=" + refererUrl);
+        Utils.writeStringToFile(commandFile, "transboundarySites=" + StringUtils.join(transboundarySites, ","));
+        Utils.writeStringToFile(commandFile, "fixedUrlPrefix=" + fixedUrlPrefix);
+        Utils.writeStringToFile(commandFile, "thread=" + thread);
+        Utils.writeStringToFile(commandFile, "threadSize=" + threadSize);
+        Utils.writeStringToFile(commandFile, "threadSleep=" + threadSleep);
+        Utils.writeStringToFile(commandFile, "autoSaveMemoryData=" + autoSaveMemoryData);
+        Utils.writeStringToFile(commandFile, "autoSaveMemoryDataInterval=" + autoSaveMemoryDataInterval);
+
+    }
+
+    private static void readCommandToMemory() {
+        readCommandToMemory(Collections.EMPTY_LIST);
+    }
+
+    private static void readCommandToMemory(String key) {
+        readCommandToMemory(Arrays.asList(new String[] {key}));
+    }
+
+    private static void readCommandToMemory(List<String> keys) {
+
+        Set<String> command = new HashSet<String>(); //命令列表
+        Utils.readFileDataToCollection(commandFile, command);
+        for (String c : command) {
+            String[] _c = c.split("=");
+            if (_c.length != 2) {
+                continue;
+            }
+            if (null != keys && !keys.isEmpty()) {
+                boolean exists = false;
+                for (String key : keys) {
+                    if (_c[0].equals(key)) {
+                        exists = true;
+                        break;
+                    }
+                }
+                if (!exists) {
+                    continue;
+                }
+            }
+            try {
+                if (_c[0].equals("stop")) {
+                    stop = Boolean.valueOf(_c[1]);
+                } else if (_c[0].equals("imgMinSize")) {
+                    imgMinSize = Long.valueOf(_c[1]);
+                } else if (_c[0].equals("nextPageNames")) {
+                    nextPageNames = Arrays.stream(_c[1].split(",")).collect(Collectors.toList());
+                } else if (_c[0].equals("pageTitlefilters")) {
+                    pageTitlefilters = Arrays.stream(_c[1].split(",")).collect(Collectors.toList());
+                } else if (_c[0].equals("imgNamefilters")) {
+                    imgNamefilters = Arrays.stream(_c[1].split(",")).collect(Collectors.toList());
+                } else if (_c[0].equals("imgSrcRepletTags")) {
+                        imgSrcRepletTags = Arrays.stream(_c[1].split(",")).collect(Collectors.toList());
+                } else if (_c[0].equals("allowDifferentCharacters")) {
+                    allowDifferentCharacters = String.valueOf(_c[1]);
+                } else if (_c[0].equals("browserTimeout")) {
+                    browserTimeout = Integer.valueOf(_c[1]);
+                } else if (_c[0].equals("jsTimeout")) {
+                    jsTimeout = Integer.valueOf(_c[1]);
+                } else if (_c[0].equals("jsEnabled")) {
+                    jsEnabled = Boolean.valueOf(_c[1]);
+                } else if (_c[0].equals("waitForBackgroundJavaScript")) {
+                    waitForBackgroundJavaScript = Integer.valueOf(_c[1]);
+                }  else if (_c[0].equals("refererUrl")) {
+                    refererUrl = _c[1];
+                }  else if (_c[0].equals("transboundarySites")) {
+                    transboundarySites = Arrays.stream(_c[1].split(",")).collect(Collectors.toSet());
+                } else if (_c[0].equals("fixedUrlPrefix")) {
+                    fixedUrlPrefix = _c[1];
+                } else if (_c[0].equals("thread")) {
+                    thread = Boolean.valueOf(_c[1]);
+                } else if (_c[0].equals("threadSize")) {
+                    threadSize = Integer.valueOf(_c[1]);
+                } else if (_c[0].equals("threadSleep")) {
+                    threadSleep = Integer.valueOf(_c[1]);
+                } else if (_c[0].equals("autoSaveMemoryData")) {
+                    autoSaveMemoryData = Boolean.valueOf(_c[1]);
+                } else if (_c[0].equals("autoSaveMemoryDataInterval")) {
+                    autoSaveMemoryDataInterval = Integer.valueOf(_c[1]);
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                Utils.error("参数错误" + e.getMessage());
+            }
+        }
+
+    }
+
+    private static void writeMemoryDataToFile(List<String> pageLinks, List<String> lazyPageLinks) {
+        Utils.info("正在保存重要的分析数据，请不要强制退出");
+        //先用一条空数据把记录覆盖，再用新数据写入到记录文件中
+        Utils.writeStringToFile(siteData, "", false);
+        Utils.writeCollectionToFile(siteData, sites);
+        Utils.writeStringToFile(allUrlData, "", false);
+        Utils.writeCollectionToFile(allUrlData, pageSet);
+        Utils.writeStringToFile(currentUrlData, "", false);
+        Utils.writeCollectionToFile(currentUrlData, pageLinks);
+        Utils.writeStringToFile(lazyUrlData, "", false);
+        Utils.writeCollectionToFile(lazyUrlData, lazyPageLinks);
+        Utils.writeStringToFile(downLoadedImgData, "", false);
+        Utils.writeCollectionToFile(downLoadedImgData, imgDownloaded);
+        Utils.writeStringToFile(downloadedImgDirNameData, "", false);
+        Utils.writeCollectionToFile(downloadedImgDirNameData, imgDownloadedDirName);
+        Utils.writeStringToFile(readTimeOutImgData, "", false);
+        Utils.writePictInfoCollectionToFile(readTimeOutImgData, readTimeOutImgs);
+    }
+
+}
+
--- a/src/main/java/com/hitoli/fetchPic/PictInfo.java
+++ b/src/main/java/com/hitoli/fetchPic/PictInfo.java
@ -0,0 +1,47 @@
+package com.hitoli.fetchPic;
+
+public class PictInfo {
+    private String site;
+    private String url;
+    private String title;
+    private String htmlUrl;
+
+    public PictInfo(String site, String title, String url, String htmlUrl) {
+        this.site = null == site ? "" : site;
+        this.title = null == title ? "" : title;
+        this.url = null == url ? "" : url;
+        this.htmlUrl = null == htmlUrl ? "" : htmlUrl;
+    }
+
+    public String getSite() {
+        return site;
+    }
+
+    public void setSite(String site) {
+        this.site = site;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public void setTitle(String title) {
+        this.title = title;
+    }
+
+    public String getHtmlUrl() {
+        return htmlUrl;
+    }
+
+    public void setHtmlUrl(String htmlUrl) {
+        this.htmlUrl = htmlUrl;
+    }
+}
--- a/src/main/java/com/hitoli/fetchPic/Utils.java
+++ b/src/main/java/com/hitoli/fetchPic/Utils.java
@ -0,0 +1,622 @@
+package com.hitoli.fetchPic;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+public class Utils {
+
+    private static Logger loginfo = null;
+    private static Logger logerror = null;
+
+    static class MyLogHander extends Formatter {
+        @Override
+        public String format(LogRecord record) {
+            return LocalDateTime.ofEpochSecond(record.getMillis()/1000, 0, ZoneOffset.ofHours(8)) +
+                    " "  + record.getLevel() + " : " + record.getMessage()+"\n";
+        }
+    }
+
+    private static void createLogger() {
+        loginfo = Logger.getLogger("fetchPicLog-info");
+        loginfo.setLevel(Level.ALL);
+        logerror = Logger.getLogger("fetchPicLog-error");
+        logerror.setLevel(Level.WARNING);
+//        ConsoleHandler consoleHandler = new ConsoleHandler();
+//        consoleHandler.setLevel(Level.ALL);
+//        loginfo.addHandler(consoleHandler);
+//        logerror.addHandler(consoleHandler);
+        FileHandler logInfoFileHandler = null;
+        FileHandler logErrorFileHandler = null;
+        try {
+            logInfoFileHandler = new FileHandler(Main.downLoadDir + File.separator + "fetchPicLog-info.log");
+            logErrorFileHandler = new FileHandler(Main.downLoadDir + File.separator + "fetchPicLog-error.log");
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        if (null != logInfoFileHandler) {
+            logInfoFileHandler.setLevel(Level.INFO);
+            logInfoFileHandler.setFormatter(new MyLogHander());
+            loginfo.addHandler(logInfoFileHandler);
+        } else {
+            System.out.println("创建信息日志文件失败");
+        }
+        if (null != logErrorFileHandler) {
+            logErrorFileHandler.setLevel(Level.WARNING);
+            logErrorFileHandler.setFormatter(new MyLogHander());
+            logerror.addHandler(logErrorFileHandler);
+        } else {
+            System.out.println("创建错误日志文件失败");
+        }
+    }
+
+    public static void info(String msg) {
+        if (null == loginfo) {
+            createLogger();
+        }
+        loginfo.info(msg);
+    }
+
+    public static void error(String msg) {
+        if (null == logerror) {
+            createLogger();
+        }
+        logerror.warning(msg);
+    }
+
+    /**
+     * 创建目录
+     * @param dir
+     */
+    public static void createDir(String dir){
+
+        File file = new File(dir);
+        if(!file.exists()){
+            file.mkdir();
+        }
+
+    }
+
+    /**
+     * 创建新的图片目录
+     * @param dir
+     */
+    public static void createNewImgDir(String dir){
+
+        File file = new File(dir);
+        if(!file.exists()){
+            file.mkdir();
+            Main.imgDownloadedDirName.add(file.getName() + "#####" + file.getPath());
+        }
+
+    }
+
+    /**
+     * 读取目录下的目录
+     * @param dir
+     * @return
+     */
+    public static List<File> getDirs(String dir) {
+        File file = new File(dir);
+        if(file.exists()){
+            return Arrays.asList(file.listFiles()).stream().filter(f -> !f.isDirectory()).collect(Collectors.toList());
+        }
+        return Collections.emptyList();
+    }
+
+    /**
+     * 删除目录
+     * @param dir
+     */
+    public static void delDir(String dir){
+
+        File file = new File(dir);
+        if(file.exists() && file.isDirectory()){
+            for (File f : file.listFiles()) {
+                delFile(f.getPath());
+            }
+            file.delete();
+        }
+
+    }
+
+    /**
+     * 创建文件
+     * @param fielPath
+     */
+    public static void createFile(String fielPath){
+
+        File file = new File(fielPath);
+        if(!file.exists()){
+            try {
+                file.createNewFile();
+            } catch (IOException e) {
+                e.printStackTrace();
+                Utils.error(e.getMessage());
+            }
+        }
+
+    }
+
+    /**
+     * 删除文件
+     * @param filePath
+     */
+    public static void delFile(String filePath){
+
+        File file = new File(filePath);
+        if(file.exists() && file.isFile()){
+            file.delete();
+        }
+
+    }
+
+    /**
+     * 检查指定路径是否存在
+     * @param filePath
+     * @return
+     */
+    public static boolean checkFilePathExists(String filePath) {
+        return new File(filePath).exists();
+    }
+
+    /**
+     * 读取文件信息到集合中（一行一条数据）
+     * @param filePath
+     * @param collection
+     */
+    public static void readFileDataToCollection(String filePath, Collection<String> collection) {
+
+        InputStreamReader in = null;
+        BufferedReader br = null;
+        try {
+            in = new InputStreamReader(new FileInputStream(new File(filePath)));
+            br = new BufferedReader(in);
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (StringUtils.isNotEmpty(line)) {
+                    collection.add(line);
+                }
+            }
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } catch (IOException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } finally {
+            closeIO(in, br, null, null);
+        }
+
+    }
+
+    /**
+     * 读取文件图片信息对象到集合中（一行一条数据）
+     * @param filePath
+     * @param collection
+     */
+    public static void readFilePictInfoDataToCollection(String filePath, Collection<PictInfo> collection) {
+
+        InputStreamReader in = null;
+        BufferedReader br = null;
+        try {
+            in = new InputStreamReader(new FileInputStream(new File(filePath)));
+            br = new BufferedReader(in);
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (StringUtils.isNotEmpty(line)) {
+                    String[] datas = line.split("#####");
+                    if (datas.length >= 3) {
+                        PictInfo pictInfo = new PictInfo(datas[0], datas[1], datas[2], (datas.length == 3 ? null : datas[3]));
+                        collection.add(pictInfo);
+                    }
+                }
+            }
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } catch (IOException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } finally {
+            closeIO(in, br, null, null);
+        }
+
+    }
+
+    /**
+     * 把字符串写入文件（一行一条）,默认追加写入
+     * @param filePath
+     * @param str
+     */
+    public static void writeStringToFile(String filePath, String str) {
+        writeStringToFile(filePath, str, true);
+    }
+
+    /**
+     * 把集合中的数据写到文件中（一行一条）
+     * @param filePath
+     * @param collection
+     */
+    public static void writeCollectionToFile(String filePath, Collection<String> collection) {
+
+        if (null != collection && !collection.isEmpty()) {
+            FileWriter fw = null;
+            BufferedWriter out = null;
+            try {
+                fw = new FileWriter(new File(filePath), true);
+                out = new BufferedWriter(fw);
+                for (String str : collection) {
+                    out.write(str += "\r\n");
+                }
+                out.flush();
+            } catch (FileNotFoundException e) {
+                e.printStackTrace();
+                Utils.error(e.getMessage());
+            } catch (IOException e) {
+                e.printStackTrace();
+                Utils.error(e.getMessage());
+            } finally {
+                closeIO(null, null, fw, out);
+            }
+        }
+
+    }
+
+    /**
+     * 把集合中的图片对象数据写到文件中（一行一条）
+     * @param filePath
+     * @param collection
+     */
+    public static void writePictInfoCollectionToFile(String filePath, Collection<PictInfo> collection) {
+
+        if (null != collection && !collection.isEmpty()) {
+            FileWriter fw = null;
+            BufferedWriter out = null;
+            try {
+                fw = new FileWriter(new File(filePath), true);
+                out = new BufferedWriter(fw);
+                for (PictInfo pictInfo : collection) {
+                    String str = pictInfo.getSite() + "#####" + pictInfo.getTitle() + "#####" + pictInfo.getUrl();
+                    out.write(str += "\r\n");
+                }
+                out.flush();
+            } catch (FileNotFoundException e) {
+                e.printStackTrace();
+                Utils.error(e.getMessage());
+            } catch (IOException e) {
+                e.printStackTrace();
+                Utils.error(e.getMessage());
+            } finally {
+                closeIO(null, null, fw, out);
+            }
+        }
+
+    }
+
+    /**
+     * 把字符串写入文件（一行一条）
+     * @param filePath
+     * @param str
+     * @param append 是否追加
+     */
+    public static void writeStringToFile(String filePath, String str, boolean append) {
+
+        FileWriter fw = null;
+        BufferedWriter out = null;
+        try {
+            fw = new FileWriter(new File(filePath), append);
+            out = new BufferedWriter(fw);
+            if (!(StringUtils.isEmpty(str) && !append)) { //如果字符串为空并且不追加，则只写入空的字符串（不带换行符）
+                str += "\r\n";
+            }
+            out.write(str);
+            out.flush();
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } catch (IOException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        } finally {
+            closeIO(null, null, fw, out);
+        }
+
+    }
+
+    private static void closeIO(InputStreamReader in, BufferedReader br, FileWriter fw, BufferedWriter out) {
+        try {
+            if (br != null) {
+                br.close();
+            }
+            if (in != null) {
+                in.close();
+            }
+            if (out != null) {
+                out.close();
+            }
+            if (fw != null) {
+                fw.close();
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+            Utils.error(e.getMessage());
+        }
+    }
+
+    /**
+     * 获取域名（无多余的后缀）
+     * @param context
+     * @return
+     */
+    public static String getSite(String context) {
+
+        String site = "";
+        if (context.startsWith("http")) {
+            int start = 7;
+            String _start = "http://";
+            if (context.startsWith("https")) {
+                start = 8;
+                _start = "https://";
+            }
+            site = context.substring(start);
+            if (site.indexOf("/") != -1) {
+                site = _start + site.substring(0, site.indexOf("/"));
+            } else {
+                site = _start + site;
+            }
+        } else {
+            if (context.indexOf("/") != -1) {
+                site = context.substring(0, context.indexOf("/"));
+            } else {
+                site = context;
+            }
+        }
+
+        return site;
+
+    }
+
+    /**
+     * 获取完整地址
+     * @param site 无多余后缀的网站地址
+     * @param path 有后缀的地址
+     * @param url  抓取的地址
+     * @return 如果抓取的地址开头是/，说明是从根目录开始。如果开头为..则表示从有后缀的地址往前回退
+     */
+    public static String getFullPath(String site, String path, String url) {
+
+        if (url.startsWith("//")) {
+            if (site.startsWith("https")) {
+                url = "https:" + url;
+            } else {
+                url = "http:" +url;
+            }
+        } else if (url.startsWith("/")) {
+            url = site + url;
+        } else if (url.startsWith("../")) {
+            url = path + url;
+        } else if (url.startsWith("./") || (url.indexOf("/") == -1)) {
+            url = path.substring(0, (path.lastIndexOf("/") + 1)) + url;
+        }
+
+        return url;
+
+    }
+
+    /**
+     * 检查url是否在网站域名内，防止超出边界
+     * @param url
+     * @return
+     */
+    public static Boolean checkSite(String url) {
+        Boolean result = false;
+        for (String s : Main.transboundarySites) {//先检查是否在可越界域名内
+            s = getShortSite(s);
+            if (url.indexOf(s) != -1) {
+                result = true;
+                break;
+            }
+        }
+        if (!result) {
+            for (String s : Main.sites) {
+                s = getShortSite(s);
+                if (url.indexOf(s) != -1) {
+                    result = true;
+                    break;
+                }
+            }
+        }
+        if (!result) {
+            info("扫描到的url=" + url + "越界");
+        }
+        return  result;
+    }
+
+    /**
+     * 获取网站短域名（不包含www及http前缀）
+     * @param site
+     * @return
+     */
+    public static String getShortSite(String site) {
+        if (site.startsWith("http")) {
+            int start = 7;
+            if (site.startsWith("https")) {
+                start = 8;
+            }
+            site = site.substring(start);
+        }
+        return site.substring(site.indexOf(".")+1);
+    }
+
+    /**
+     * 检查url是否是域名地址
+     * @param url
+     * @return
+     */
+    public static boolean isSite(String url) {
+        boolean result = false;
+        url = removeHttpOrHttps(url);
+        if (url.substring(url.length() - 1, url.length()).equals("/")) {
+            url = url.substring(0, url.length() - 1);
+        }
+        for (String site : Main.sites) {
+            if (url.equalsIgnoreCase(removeHttpOrHttps(site))) {
+                result = true;
+                break;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * 去除http://或https://
+     * @param url
+     * @return
+     */
+    public static String removeHttpOrHttps(String url) {
+        if (url.startsWith("http")) {
+            int start = 7;
+            if (url.startsWith("https")) {
+                start = 8;
+            }
+            url = url.substring(start);
+        }
+        return url;
+    }
+
+    /**
+     * 获取可做为文件名的网站短域名（不包含www及http前缀，点用下划线替换）
+     * @param site
+     * @return
+     */
+    public static String getUseFileNameShortSite(String site) {
+        return getShortSite(site).replaceAll("\\.", "_");
+    }
+
+    /**
+     * 特殊符号去除
+     * @param str
+     * @return
+     */
+    public static String specialSymbolRemoval(String str) {
+        return str.replaceAll("[\\\\/:\\\\*\\\\?\\\\\"<>\\\\|]", "");
+    }
+
+    /**
+     * 检查text是否"下一页"按钮中的文字
+     * @param text
+     * @return
+     */
+    public static boolean isNextPageButton(String text) {
+        boolean result = false;
+        for (String nextPageName : Main.nextPageNames) {
+            if (text.equals(nextPageName)) {
+                result = true;
+                break;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * 创建图片文件
+     * @param title
+     * @return
+     */
+    public static File createImgFile(String site, String title, String name, String suffix) throws IOException {
+        File file = null;
+        if (null == Main.imgDownloadedDirName || Main.imgDownloadedDirName.isEmpty()) {
+            file = _createImgFile(site, title, name, suffix);
+        } else {
+            Double allowDifferentCharacters = Double.parseDouble(Main.allowDifferentCharacters);
+            String oldPath = null;
+            String newTitle = title.replaceAll("(\\[+)|(\\]+)|(\\s)|(\\.+)|(\\_+)|(\\-+)|(\\——+)|(\\-+)|(\\第\\d+\\页)|(\\第)|(\\页)", "");
+            char[] newChar = newTitle.toCharArray();
+            for (String s : Main.imgDownloadedDirName) {
+                String[] _s = s.split("#####");
+                if (_s.length == 2) {
+                    String oldTitle = _s[0].replaceAll("(\\[+)|(\\]+)|(\\s)|(\\.+)|(\\_+)|(\\-+)|(\\——+)|(\\-+)|(\\第\\d+\\页)|(\\第)|(\\页)", "");
+                    if (oldTitle.equalsIgnoreCase(newTitle)) {//名称一样
+                        oldPath = _s[1];
+                        break;
+                    }
+                    char[] oldChar = oldTitle.toCharArray();
+                    int difference = 0;
+                    char[] maxLengthChar = null;
+                    char[] minLengthChar = null;
+                    if (oldChar.length >= newChar.length) {
+                        maxLengthChar = oldChar;
+                        minLengthChar = newChar;
+                    } else {
+                        maxLengthChar = newChar;
+                        minLengthChar = oldChar;
+                    }
+                    if (allowDifferentCharacters < 1 && allowDifferentCharacters > 0) {//取相差百分比
+                        allowDifferentCharacters = maxLengthChar.length - (maxLengthChar.length * allowDifferentCharacters);
+                    }
+                    if (maxLengthChar.length != minLengthChar.length &&
+                            ((maxLengthChar.length - minLengthChar.length) > allowDifferentCharacters)) {//长度已经超过相差值
+                        continue;
+                    }
+                    for (int j=0; j<maxLengthChar.length; j++) {
+                        if (!String.valueOf(maxLengthChar[j]).equals(String.valueOf(minLengthChar[j]))) {
+                            difference++;
+                        }
+                    }
+
+                    if (difference <= allowDifferentCharacters) {//判断是否在允许的不同字符数之内
+                        oldPath = _s[1];
+                        break;
+                    }
+                }
+                if (StringUtils.isNotEmpty(oldPath)) {
+                    break;
+                }
+            }
+            if (StringUtils.isNotEmpty(oldPath)) {
+                createDir(oldPath);
+                file = new File(oldPath + File.separator + name + suffix);
+            } else {
+                file = _createImgFile(site, title, name, suffix);
+            }
+        }
+        if (null == file) {
+            throw new IOException("创建图片文件失败");
+        }
+        return file;
+    }
+
+    private static File _createImgFile(String site, String title, String name, String suffix) {
+        if (!checkFilePathExists(Main.downLoadDir + File.separator + site + File.separator + title)) {
+            createDir(Main.downLoadDir + File.separator + site);
+            createNewImgDir(Main.downLoadDir + File.separator + site + File.separator + title);
+        }
+        return new File(Main.downLoadDir + File.separator + site
+                + File.separator + title + File.separator + name + suffix);
+    }
+
+}
+
+
+
+