import React from "react";
import { useDrop } from "react-use";

import { Dataset } from "~dataset/Dataset";
import { Attribute } from "~dataset/attributes/Attribute";
import { CategoryAttribute } from "~dataset/attributes/CategoryAttribute";
import { useDataStore } from "~stores/data";
import Papa from "papaparse";
import { NumberAttribute } from "~dataset/attributes/NumberAttribute";
import { TextAttribute } from "~dataset/attributes/TextAttribute";
import { AnyAttributeObject } from "~dataset/types";

function parseArrayString(value: string): number[] {
  try {
    return JSON.parse(value) as number[];
  } catch (e) {
    const trimmedString = value.replace(/\[|\]/g, "").trim();
    const stringValues = trimmedString.split(/\s+/);
    const array = stringValues.map((v) => parseFloat(v));
    return array;
  }
}

const isCategory = (values: string[]) => {
  const uniqueValues = new Set(values);
  return uniqueValues.size <= values.length / 2;
};

const XYZ = ["x", "y", "z"];

export function CSVLoader({ onLoad }: { onLoad: (dataset: Dataset) => void }) {
  const handleLoadUrl = async (url: string) => {
    const dataset = await fetch(url)
      .then((response) => response.text())
      .then((text) =>
        Papa.parse<Record<string, string | number>>(text, {
          header: true,
          dynamicTyping: true,
          skipEmptyLines: true,
          transformHeader: (header) => header.toLowerCase(),
        })
      )
      .then((result) => {
        const dataset = new Dataset();

        function buildAttribute(field: string): Attribute | undefined {
          const firstValue = result.data[0][field];

          if (typeof firstValue === "number") {
            return NumberAttribute.fromArray(
              field,
              result.data.map((row) => row[field] as number)
            );
          }

          if (typeof firstValue === "string") {
            const values = result.data.map((row) => row[field] as string);
            if (isCategory(values)) {
              return CategoryAttribute.fromTextValues(field, values);
            } else {
              return TextAttribute.fromTextValues(field, values);
            }
          }
        }

        result.meta.fields?.forEach((field) => {
          if (field === "embeddings") {
            const arrays = result.data.map((row) =>
              parseArrayString(row[field] as string)
            );

            const count = arrays.length;
            const dimensions = arrays[0].length;

            const values = new Float32Array(count * dimensions);
            for (let i = 0; i < count; i++) {
              for (let j = 0; j < dimensions; j++) {
                values[i * dimensions + j] = arrays[i][j];
              }
            }

            dataset.setEmbeddings(values, dimensions);
            const projections = dataset.computeProjections();
            if (projections) {
              dataset.setProjections(projections);
            }
            return;
          }

          const attribute = buildAttribute(field);

          if (attribute) {
            dataset.addAttribute(attribute);
          }
        });
        return dataset;
      });
    onLoad(dataset);
  };

  const handleLoadFile = async (file: File) => {
    if (file.name.endsWith(".csv")) {
      handleLoadUrl(URL.createObjectURL(file));
    }
  };

  useDrop({
    onFiles: (files) => {
      files.forEach(handleLoadFile);
    },
  });

  return null;
}

export default React.memo(CSVLoader);
