PDFBoxメモ
この方法だと画像が取れない形式のPDFがあるっぽい。XObjectは全部取ってきて、PDXObjectImageインスタンスか、PDXObjectFormインスタンスで処理を分けてやるといい。
public static void main(String[] args) throws IOException { String readFile = args[0]; FileInputStream pdfStream = new FileInputStream(readFile); PDFParser pdfParser = new PDFParser(pdfStream); pdfParser.parse(); int cnt = 0; PDDocument pdf = pdfParser.getPDDocument(); for (Iterator<PDPage> i = pdf.getDocumentCatalog().getAllPages().iterator(); i.hasNext();) { Map<String, PDXObject> objs = i.next().getResources().getXObjects(); for (Iterator<String> j = objs.keySet().iterator(); j.hasNext();) { PDXObject obj = objs.get(j.next()); if (obj instanceof PDXObjectImage) { PDXObjectImage image = (PDXObjectImage) obj; image.write2file(String.valueOf(cnt)); cnt++; } else if (obj instanceof PDXObjectForm) { PDXObjectForm form = (PDXObjectForm) obj; Map<String, PDXObjectImage> images = form.getResources().getImages(); for (Iterator<String> k = images.keySet().iterator(); k.hasNext();) { PDXObjectImage image = images.get(k.next()); image.write2file(String.valueOf(cnt)); cnt++; } } } } }