Extract - Code Search

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/PdfExtractor.java

import org.codelibs.fess.crawler.helper.MimeTypeHelper;

/**
 * PdfExtractor extracts text content from PDF files using Apache PDFBox.
 * It supports password-protected PDFs and can extract embedded documents and annotations.
 *
 * <p>The extractor runs text extraction in a separate thread with a configurable timeout
 * to prevent hanging on problematic PDF files. It also extracts metadata from the PDF
 * document and includes it in the extraction result.
 *

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Sun Nov 23 12:19:14 UTC 2025

- 12.8K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/AbstractXmlExtractor.java

                throw new ExtractException(e);
            }
        }

        return encoding;
    }

    /**
     * Extracts text content from the given content by removing tags and processing attributes.
     * @param content The content to extract from.
     * @return The extracted text.
     */
    protected String extractString(final String content) {

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Sun Nov 23 12:19:14 UTC 2025

- 8.6K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/LhaExtractor.java

import jp.gr.java_conf.dangan.util.lha.LhaFile;
import jp.gr.java_conf.dangan.util.lha.LhaHeader;

/**
 * Extractor implementation for LHA (LZH) archive files.
 * This extractor can extract text content from files within LHA archives
 * by using appropriate extractors for each contained file type.
 *
 * @author shinsuke
 */
public class LhaExtractor extends AbstractExtractor {

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Sun Nov 23 12:19:14 UTC 2025

- 5.9K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/main/java/org/codelibs/fess/crawler/extractor/impl/EmlExtractor.java

        } catch (final MessagingException e) {
            throw new ExtractException(e);
        }
    }

    /**
     * Puts a value into the extract data with appropriate type conversion.
     *
     * @param data the extract data to store the value in
     * @param key the key for the value
     * @param value the value to store
     */

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Sun Jul 06 02:13:03 UTC 2025

- 12.6K bytes

- Viewed (0)

github.com/minio/minio

internal/s3select/jstream/README.md

Using the below example document:
<img width="85%" src="https://bradley.codes/static/img/jstream-levels.gif" alt="jstream"/>

we can choose to extract and act only the objects within the top-level array:
```go
f, _ := os.Open("input.json")
decoder := jstream.NewDecoder(f, 1) // extract JSON values at a depth level of 1
for mv := range decoder.Stream() {
  fmt.Printf("%v\n ", mv.Value)
}
```

output:
```

Registered: Sun Dec 28 19:28:13 UTC 2025

- Last Modified: Mon Sep 23 19:35:41 UTC 2024

- 3.2K bytes

- Viewed (0)

github.com/minio/minio

internal/s3select/sql/parser_test.go

		"sum(t)",
		"avg(s.id[1])",
		"coalesce(s.id[1], 2, 2 + 3)",

		"cast(s as string)",
		"cast(s AS INT)",
		"cast(s as DECIMAL)",
		"extract(YEAR from '2018-01-09')",
		"extract(month from '2018-01-09')",

		"extract(hour from '2018-01-09')",
		"extract(day from '2018-01-09')",
		"substring('abcd' from 2 for 2)",
		"substring('abcd' from 2)",
		"substring('abcd' , 2 , 2)",

		"substring('abcd' , 22 )",

Registered: Sun Dec 28 19:28:13 UTC 2025

- Last Modified: Thu Jan 18 07:03:17 UTC 2024

- 9.2K bytes

- Viewed (0)

github.com/codelibs/jcifs

src/main/java/jcifs/internal/smb2/persistent/HandleGuid.java

        ByteBuffer bb = ByteBuffer.wrap(result).order(java.nio.ByteOrder.LITTLE_ENDIAN);

        long mostSig = guid.getMostSignificantBits();
        long leastSig = guid.getLeastSignificantBits();

        // Extract GUID components from UUID
        int data1 = (int) (mostSig >>> 32); // first 4 bytes
        short data2 = (short) (mostSig >>> 16); // next 2 bytes
        short data3 = (short) mostSig; // next 2 bytes

Registered: Sat Dec 20 13:44:44 UTC 2025

- Last Modified: Sat Aug 23 02:21:31 UTC 2025

- 4.5K bytes

- Viewed (0)

github.com/codelibs/fess

src/main/java/org/codelibs/fess/helper/PermissionHelper.java

     */
    public void setUserPrefix(final String userPrefix) {
        this.userPrefix = userPrefix;
    }

    /**
     * Extracts role type information from SMB (Server Message Block) response data.
     * Processes both SMB and SMB1 protocols to extract allowed and denied SIDs.
     *
     * @param responseData the response data containing SMB metadata

Registered: Sat Dec 20 09:19:18 UTC 2025

- Last Modified: Thu Nov 13 05:54:52 UTC 2025

- 15.4K bytes

- Viewed (0)

github.com/tensorflow/tensorflow

ci/official/installer_wheel.sh

# 2. tf_nightly-a.b.c.devYYYYMMDD-py3-none-any.whl
pure_python_whl=$(ls "$TFCI_OUTPUT_DIR"/*py3-none-any*)
pure_python_whl=$(basename "${pure_python_whl}")

# Extract the package name from the wheel name. That is, extract every character
# before the pattern "-py3-" in the wheel name.
pkg_name=$(echo "${pure_python_whl}" | awk -F'-py3-' '{print $1}')

Registered: Tue Dec 30 12:39:10 UTC 2025

- Last Modified: Tue Mar 04 22:39:12 UTC 2025

- 3.5K bytes

- Viewed (0)

github.com/codelibs/fess-crawler

fess-crawler/src/test/java/org/codelibs/fess/crawler/extractor/impl/ArchiveExtractorErrorHandlingTest.java

            assertTrue("Error message should mention TAR archive or extraction failure",
                    e.getMessage().contains("TAR") || e.getMessage().contains("extract"));
        }
    }

    /**
     * Test that ZipExtractor successfully extracts from valid archive.
     */
    public void test_ZipExtractor_validArchive_extractsSuccessfully() {

Registered: Sat Dec 20 11:21:39 UTC 2025

- Last Modified: Mon Nov 24 03:59:47 UTC 2025

- 12.6K bytes

- Viewed (0)

Search Options