package org.tribuo.data.text;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.PrimitiveProvenance;
import com.oracle.labs.mlrg.olcut.provenance.Provenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.SkeletalConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance;
import com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Queue;
import java.util.logging.Logger;
import org.tribuo.ConfigurableDataSource;
import org.tribuo.Example;
import org.tribuo.Output;
import org.tribuo.OutputFactory;
import org.tribuo.provenance.ConfiguredDataSourceProvenance;

/* loaded from: input_file:org/tribuo/data/text/DirectoryFileSource.class */
public class DirectoryFileSource<T extends Output<T>> implements ConfigurableDataSource<T> {
    private static final Logger logger = Logger.getLogger(DirectoryFileSource.class.getName());
    private static final Charset enc = StandardCharsets.UTF_8;

    @Config(description = "The top-level directory containing the data set.")
    private Path dataDir;

    @Config(description = "The preprocessors to apply to the input documents.")
    protected List<DocumentPreprocessor> preprocessors;

    @Config(mandatory = true, description = "The output factory to use.")
    protected OutputFactory<T> outputFactory;

    @Config(mandatory = true, description = "The feature extractor that converts text into examples.")
    protected TextFeatureExtractor<T> extractor;

    /* loaded from: input_file:org/tribuo/data/text/DirectoryFileSource$DirectoryFileSourceProvenance.class */
    public static class DirectoryFileSourceProvenance extends SkeletalConfiguredObjectProvenance implements ConfiguredDataSourceProvenance {
        private static final long serialVersionUID = 1;
        private final DateTimeProvenance fileModifiedTime;
        private final DateTimeProvenance dataSourceCreationTime;

        <T extends Output<T>> DirectoryFileSourceProvenance(DirectoryFileSource<T> directoryFileSource) {
            super(directoryFileSource, "DataSource");
            this.fileModifiedTime = new DateTimeProvenance("file-modified-time", OffsetDateTime.ofInstant(Instant.ofEpochMilli(((DirectoryFileSource) directoryFileSource).dataDir.toFile().lastModified()), ZoneId.systemDefault()));
            this.dataSourceCreationTime = new DateTimeProvenance("datasource-creation-time", OffsetDateTime.now());
        }

        public DirectoryFileSourceProvenance(Map<String, Provenance> map) {
            this(extractProvenanceInfo(map));
        }

        private DirectoryFileSourceProvenance(SkeletalConfiguredObjectProvenance.ExtractedInfo extractedInfo) {
            super(extractedInfo);
            this.dataSourceCreationTime = (DateTimeProvenance) extractedInfo.instanceValues.get("datasource-creation-time");
            this.fileModifiedTime = (DateTimeProvenance) extractedInfo.instanceValues.get("file-modified-time");
        }

        protected static SkeletalConfiguredObjectProvenance.ExtractedInfo extractProvenanceInfo(Map<String, Provenance> map) {
            HashMap hashMap = new HashMap(map);
            String value = ObjectProvenance.checkAndExtractProvenance(hashMap, "class-name", StringProvenance.class, DirectoryFileSourceProvenance.class.getSimpleName()).getValue();
            String value2 = ObjectProvenance.checkAndExtractProvenance(hashMap, "host-short-name", StringProvenance.class, DirectoryFileSourceProvenance.class.getSimpleName()).getValue();
            HashMap hashMap2 = new HashMap();
            hashMap2.put("datasource-creation-time", ObjectProvenance.checkAndExtractProvenance(hashMap, "datasource-creation-time", DateTimeProvenance.class, DirectoryFileSourceProvenance.class.getSimpleName()));
            return new SkeletalConfiguredObjectProvenance.ExtractedInfo(value, value2, hashMap, hashMap2);
        }

        public Map<String, PrimitiveProvenance<?>> getInstanceValues() {
            HashMap hashMap = new HashMap();
            hashMap.put("file-modified-time", this.fileModifiedTime);
            hashMap.put("datasource-creation-time", this.dataSourceCreationTime);
            return hashMap;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof DirectoryFileSourceProvenance) || !super.equals(obj)) {
                return false;
            }
            DirectoryFileSourceProvenance directoryFileSourceProvenance = (DirectoryFileSourceProvenance) obj;
            return this.fileModifiedTime.equals(directoryFileSourceProvenance.fileModifiedTime) && this.dataSourceCreationTime.equals(directoryFileSourceProvenance.dataSourceCreationTime);
        }

        public int hashCode() {
            return Objects.hash(Integer.valueOf(super.hashCode()), this.fileModifiedTime, this.dataSourceCreationTime);
        }
    }

    /* loaded from: input_file:org/tribuo/data/text/DirectoryFileSource$DirectoryIterator.class */
    private class DirectoryIterator implements Iterator<Example<T>> {
        private Path labelPath;
        private String label;
        private final Queue<Path> labelDirs = new ArrayDeque();
        private final Queue<Path> labelPaths = new ArrayDeque();
        private final StringBuilder db = new StringBuilder();

        public DirectoryIterator() {
            try {
                DirectoryStream<Path> newDirectoryStream = Files.newDirectoryStream(DirectoryFileSource.this.dataDir);
                try {
                    Iterator<Path> it = newDirectoryStream.iterator();
                    while (it.hasNext()) {
                        this.labelDirs.offer(it.next());
                    }
                    if (newDirectoryStream != null) {
                        newDirectoryStream.close();
                    }
                    DirectoryFileSource.logger.info(String.format("Got %d output directories in %s", Integer.valueOf(this.labelDirs.size()), DirectoryFileSource.this.dataDir));
                } finally {
                }
            } catch (IOException e) {
                throw new IllegalStateException("Can't open directory " + DirectoryFileSource.this.dataDir, e);
            }
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return (this.labelPaths.isEmpty() && this.labelDirs.isEmpty()) ? false : true;
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // java.util.Iterator
        public Example<T> next() {
            if (this.labelPaths.isEmpty()) {
                if (this.labelDirs.isEmpty()) {
                    throw new NoSuchElementException("No more files");
                }
                this.labelPath = this.labelDirs.poll();
                this.label = this.labelPath.getFileName().toString();
                try {
                    DirectoryStream<Path> newDirectoryStream = Files.newDirectoryStream(this.labelPath);
                    try {
                        Iterator<Path> it = newDirectoryStream.iterator();
                        while (it.hasNext()) {
                            this.labelPaths.offer(it.next());
                        }
                        DirectoryFileSource.logger.info(String.format("Got %d paths in %s", Integer.valueOf(this.labelPaths.size()), this.labelPath));
                        if (newDirectoryStream != null) {
                            newDirectoryStream.close();
                        }
                    } finally {
                    }
                } catch (IOException e) {
                    throw new IllegalStateException("Can't open directory " + this.labelPath, e);
                }
            }
            Path poll = this.labelPaths.poll();
            this.db.delete(0, this.db.length());
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(poll.toFile()), DirectoryFileSource.enc));
                while (true) {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String trim = readLine.trim();
                        if (trim.isEmpty()) {
                            this.db.append('\n');
                        } else {
                            this.db.append(trim);
                        }
                        this.db.append('\n');
                    } finally {
                    }
                }
                String sb = this.db.toString();
                Iterator<DocumentPreprocessor> it2 = DirectoryFileSource.this.preprocessors.iterator();
                while (it2.hasNext()) {
                    sb = it2.next().processDoc(sb);
                    if (sb == null) {
                        break;
                    }
                }
                if (sb != null) {
                    Example<T> extract = DirectoryFileSource.this.extractor.extract(DirectoryFileSource.this.outputFactory.generateOutput(this.label), sb);
                    bufferedReader.close();
                    return extract;
                }
                if (!hasNext()) {
                    throw new NoSuchElementException("No more files");
                }
                Example<T> next = next();
                bufferedReader.close();
                return next;
            } catch (IOException e2) {
                throw new IllegalStateException("Error reading path " + poll, e2);
            }
        }
    }

    protected DirectoryFileSource() {
        this.dataDir = Paths.get(".", new String[0]);
        this.preprocessors = new ArrayList();
    }

    public DirectoryFileSource(Path path, OutputFactory<T> outputFactory, TextFeatureExtractor<T> textFeatureExtractor, DocumentPreprocessor... documentPreprocessorArr) {
        this.dataDir = Paths.get(".", new String[0]);
        this.preprocessors = new ArrayList();
        this.dataDir = path;
        this.outputFactory = outputFactory;
        this.extractor = textFeatureExtractor;
        this.preprocessors.addAll(Arrays.asList(documentPreprocessorArr));
    }

    public String toString() {
        return "DirectoryDataSource(directory=" + this.dataDir.toString() + ",extractor=" + this.extractor.toString() + ",preprocessors=" + this.preprocessors.toString() + ")";
    }

    public OutputFactory<T> getOutputFactory() {
        return this.outputFactory;
    }

    public Iterator<Example<T>> iterator() {
        return new DirectoryIterator();
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredDataSourceProvenance m55getProvenance() {
        return new DirectoryFileSourceProvenance(this);
    }
}
