Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 6 of 6 for Microsoft (0.03 sec)

  1. fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/MsPowerPointExtractor.java

    import org.codelibs.fess.crawler.entity.ExtractData;
    import org.codelibs.fess.crawler.exception.CrawlerSystemException;
    import org.codelibs.fess.crawler.exception.ExtractException;
    
    /**
     * Extracts text content from Microsoft PowerPoint documents.
     */
    public class MsPowerPointExtractor extends AbstractExtractor {
    
        /**
         * Creates a new MsPowerPointExtractor instance.
         */
        public MsPowerPointExtractor() {
            super();
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Jul 06 02:13:03 UTC 2025
    - 2.1K bytes
    - Viewed (0)
  2. fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/TikaExtractorTest.java

            assertEquals("こめんと", extractData.getValues("w:Comments")[0]);
            assertEquals("たぐ|さぶたいとる", String.join("|", extractData.getValues("dc:subject")));
            assertEquals("Microsoft Office Word", extractData.getValues("extended-properties:Application")[0]);
            assertEquals("sugaya", extractData.getValues("meta:last-author")[0]);
            assertEquals("太郎", extractData.getValues("dc:creator")[0]);
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Aug 07 02:55:08 UTC 2025
    - 30.6K bytes
    - Viewed (0)
  3. fess-crawler/src/main/resources/org/codelibs/fess/crawler/mime/tika-mimetypes.xml

        <!-- Use DefaultDetector / org.apache.tika.parser.microsoft.POIFSContainerDetector for more reliable detection of OLE2 documents -->
        <alias type="application/vnd.ms-word"/>
        <_comment>Microsoft Word Document</_comment>
        <tika:link>http://en.wikipedia.org/wiki/.doc</tika:link>
        <tika:uti>com.microsoft.word.doc</tika:uti>
        <magic priority="50">
          <match value="Microsoft\ Word\ 6.0\ Document" type="string" offset="2080"/>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Thu Mar 13 08:18:01 UTC 2025
    - 320.1K bytes
    - Viewed (1)
  4. README.md

    - **FTP**: FTP server crawling with authentication
    - **SMB/CIFS**: Windows network shares
    - **Storage**: Cloud storage systems (MinIO, S3-compatible)
    
    ### Content Formats
    
    #### Office Documents
    - Microsoft Office (Word, Excel, PowerPoint)
    - OpenOffice/LibreOffice documents
    - RTF, WordPerfect
    
    #### PDFs and Images
    - PDF documents (text and metadata extraction)
    - Images (JPEG, PNG, GIF, TIFF, BMP)
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sun Aug 31 05:32:52 UTC 2025
    - 15.3K bytes
    - Viewed (0)
  5. fess-crawler/pom.xml

    			<artifactId>tika-parser-mail-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-microsoft-module</artifactId>
    			<version>${tika.version}</version>
    		</dependency>
    		<dependency>
    			<groupId>org.apache.tika</groupId>
    			<artifactId>tika-parser-miscoffice-module</artifactId>
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Sep 06 04:15:37 UTC 2025
    - 11.3K bytes
    - Viewed (0)
  6. fess-crawler-lasta/src/main/resources/crawler/extractor.xml

    				"image/vnd.fastbidsheet",
    				"image/vnd.fpx",
    				"image/vnd.fst",
    				"image/vnd.fujixerox.edmics-mmr",
    				"image/vnd.fujixerox.edmics-rlc",
    				"image/vnd.globalgraphics.pgb",
    				"image/vnd.microsoft.icon",
    				"image/vnd.mix",
    				"image/vnd.ms-modi",
    				"image/vnd.net-fpx",
    				"image/vnd.radiance",
    				"image/vnd.sealed.png",
    				"image/vnd.sealedmedia.softseal.gif",
    Registered: Sun Sep 21 03:50:09 UTC 2025
    - Last Modified: Sat Aug 01 21:40:30 UTC 2020
    - 49K bytes
    - Viewed (0)
Back to top