1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
| package com.webmagic;
import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.selector.Selectable;
import java.io.File; import java.io.FileOutputStream; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.IntStream;
import static java.util.stream.Collectors.toList;
public class W3PdfProcessor implements PageProcessor { public static final String D_DEV_SOUP_SRC_MAIN_RESOURCES_W3_PDF = "D:\\dev\\soup\\src\\main\\resources\\w3Pdf\\"; private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
static Pdf pdf = new Pdf("上****娃)",316,"******/4ff5fff9005d59bbf09fd0/" ,"******************");
@Override public void process(Page page) { byte[] bytes = page.getBytes(); Selectable url = page.getUrl(); List<String> strings = Arrays.asList(url.get().split("/")); Collections.reverse(strings); try { saveFile(pdf.getName()+"\\"+strings.get(0)+"."+pdf.getName()+".png",bytes); } catch (Exception e) { e.printStackTrace(); }
}
@Override public Site getSite() { return site; }
public static void main(String[] args) { new File(D_DEV_SOUP_SRC_MAIN_RESOURCES_W3_PDF+pdf.getName()).mkdir(); List<String> collect = IntStream.rangeClosed(1, pdf.getPage()) .mapToObj(e -> pdf.getPreUrl() + e).collect(toList()); Spider .create(new W3PdfProcessor()) .addUrl(collect.toArray(new String[0])) .thread(10) .run(); }
public static void saveFile(String filename,byte [] data)throws Exception{ if(data != null){ String filepath = D_DEV_SOUP_SRC_MAIN_RESOURCES_W3_PDF + filename; File file = new File(filepath); if(file.exists()){ file.delete(); } FileOutputStream fos = new FileOutputStream(file); fos.write(data,0,data.length); fos.flush(); fos.close(); } } } @Data @AllArgsConstructor @NoArgsConstructor class Pdf{ String name; int page; String preUrl; String pdfOnlineUrl; }
|