
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.net.URL;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;


public class DublicateCheck {

	
	//Add array list of pair link- file hash
	public static Map<String, String> linksFileHashs = new HashMap<>();
	public static Map<String, ArrayList<String>> HashDublicates = new HashMap<>();
	
	//add method to save the array list with the links/hashs
	//add method to recall the array list with the links/hashs

	
	public static boolean save(){
		Set<String> keys = linksFileHashs.keySet();
	    Iterator<String> keyit = keys.iterator();
	    while(keyit.hasNext()){
	    	String key = keyit.next();
	       	linksFileHashs.get(key);
	        log(key + " , " + linksFileHashs.get(key), "LinkHashsSaved");
	    }

		keys = HashDublicates.keySet();
	    keyit = keys.iterator();
	    while(keyit.hasNext()){
	    	String key = keyit.next();
	       	HashDublicates.get(key);
	       	for(int i =0;i<HashDublicates.get(key).size(); i++)
	             	log(key + " , " + HashDublicates.get(key).get(i), "LinkHashsSaved");
	    }
	    
		return true;
	}
	
	public static boolean refill(String path){
		
		try{
			//hard-coded: path to the file containing the links to be analyzed
			//String path = "D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/LinkHashs_chunks1-10_xmi.txt"; 
			//collect those links that are .xmi files
			BufferedReader in = new BufferedReader(new FileReader(path));
	        String zeile = null;
	        while ((zeile = in.readLine()) != null) {
	        	String[] row = zeile.split(" , ");
	        	if(linksFileHashs.containsKey(row[0])){
	            	if(HashDublicates.containsKey(row[0])){
	            		HashDublicates.get(row[0]).add(row[1]);
	            	} else{
	            	 ArrayList	al = ArrayList.class.newInstance();
	            	 al.add(row[1]);
	            	 HashDublicates.put(row[0], al);
	            	}
	            }else{
	            	linksFileHashs.put(row[0], row[1]);
	            }
	        }

	        System.out.println("refilled : " + path + " : " + linksFileHashs.size());
	        
	        in.close();
	                
		}catch(Exception e){System.out.println("Excpetion: " + e);}		
		
		
		return true;
	}	
	
	public static void main(String[] args)throws Exception {
	
		//refill("D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/LinkHashs_chunks1-15_xmi.txt"); 
		
		try{
			//hard-coded: path to the file containing the links to be analyzed
			String path = "D:/Projects/UMLinOpenSource/InputXMIAnalyzer/umls.txt"; 
			//collect those links that are .xmi files
			BufferedReader in = new BufferedReader(new FileReader(path));
		       String zeile = null;
		       while ((zeile = in.readLine()) != null) {
		       	if(download(zeile)){
		       		storeHash(zeile);
		       		}
		       }
	        in.close();    
	        
		        
		}catch(Exception e){System.out.println("Excpetion: " + e);}		
		
        Set<String> keys = HashDublicates.keySet();
        Iterator<String> keyit = keys.iterator();
        while(keyit.hasNext()){
        	String key = keyit.next();
        	HashDublicates.get(key);
        	String dubnames = "";
        	for(int i =0;i<HashDublicates.get(key).size(); i++)
        	  dubnames = dubnames + " , " + HashDublicates.get(key).get(i);
        	log(key + " , " + HashDublicates.get(key).size() + " , " + linksFileHashs.get(key) + dubnames, "Dublicates");
        }
		
		save();
			
    }

	
	//add method analysis - take file create hash
	private static void storeHash(String link)throws Exception {
		// original example for hashing file checksum taken form http://www.mkyong.com/java/java-md5-hashing-example/
		try{
			MessageDigest md = MessageDigest.getInstance("MD5");
        FileInputStream fis = new FileInputStream("D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/temp.txt");
        
        byte[] dataBytes = new byte[1024];
     
        int nread = 0; 
        while ((nread = fis.read(dataBytes)) != -1) {
          md.update(dataBytes, 0, nread);
        };
        byte[] mdbytes = md.digest();
        
		//convert the byte to hex format method 1
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < mdbytes.length; i++) {
          sb.append(Integer.toString((mdbytes[i] & 0xff) + 0x100, 16).substring(1));
        }

        System.out.println("Digest(in hex format):: " + sb.toString());
        if(linksFileHashs.containsKey(sb.toString())){
        	if(HashDublicates.containsKey(sb.toString())){
        		HashDublicates.get(sb.toString()).add(link);
        	} else{
        	 ArrayList	al = ArrayList.class.newInstance();
        	 al.add(link);
        	 HashDublicates.put(sb.toString(), al);
        	}
        }else{
        	linksFileHashs.put(sb.toString(), link);
        }
        
        log(sb.toString()+ " , " + link, "LinkHashs");
        
        
		}catch(Exception e){System.out.println("Exception 2: " + e);}	
    }

	/*
	 * Helper method to write the log files including the resulting link lists
	 */
	public static void log(String message, String filename) { 
		try{
	      PrintWriter out = new PrintWriter(new FileWriter("D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/" + filename +".txt", true), true);
	      out.println(message);
	      out.close();
	    }catch(Exception e){}
	}	
	
	/*
	 * Method downloads the file from a given link and writes it to the same static 
	 * The method partially bases on a code example from stackoverflow (http://stackoverflow.com/questions/23248792/how-to-download-a-file-from-the-internet-using-java)
	 */
	public static boolean download(String linkname) throws IOException {
		 
		try{
			String fileName = "D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/temp.txt"; //the place where the file is stored
			URL link = new URL(linkname);
			InputStream in = new BufferedInputStream(link.openStream());
	
			
			try{
		 		ByteArrayOutputStream out = new ByteArrayOutputStream();
		 		byte[] buf = new byte[1024];
		 		int n = 0;
		 		int count = 0;
		 		//max read 100 lines (XMI files include their schemas within the first lines)
		 		while (-1!=(n=in.read(buf)) && count<100)
		 		{
		 			count = count+1; 
		 			out.write(buf, 0, n);
		 			out.flush();
		 		}
		 		out.close();
			 
		 		byte[] response = out.toByteArray();
		 		
		 		FileOutputStream fos = new FileOutputStream(fileName);
		 		fos.write(response);
		 		fos.close();
	        
		 	}catch(Exception e){System.out.println("Exception 4: " + e); log(linkname, "TooBigToRead"); return false;}		 
		 	
			in.close();
			return true;
		 
		}catch(Exception e){System.out.println("Exception 5: " + e); log(linkname, "NotDownloaded"); return false;}
	}
	
}
