maven project
pom.xml add dependencies
<!-- tika ****************************************************** -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.4</version>
</dependency>
<!-- tika ****************************************************** -->
code
package com.blogspot.na5cent.learning.tika;
import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
/**
* redcrow
*/
public class TikaTest {
private static final Logger LOG = Logger.getLogger(TikaTest.class.getName());
public static void main(String[] args) {
Tika tika = new Tika();
InputStream pdfInputStream = null;
try {
pdfInputStream = TikaTest.class.getResourceAsStream("/APress_ProJavaScriptDesignPatterns.pdf");
String extractString = tika.parseToString(pdfInputStream);
LOG.log(Level.SEVERE, extractString);
} catch (IOException ex) {
LOG.log(Level.WARNING, null, ex);
} catch (TikaException ex) {
LOG.log(Level.WARNING, null, ex);
} finally {
if (pdfInputStream != null) {
try {
pdfInputStream.close();
} catch (IOException ex) {
LOG.log(Level.WARNING, null, ex);
}
}
}
}
}
result


ไม่มีความคิดเห็น:
แสดงความคิดเห็น