import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class XMIAnalyzer {

	public static ArrayList<String> xmilinks; 
	//public static ArrayList<String> xmlnss;
	
	/*
	 * Simple script that takes a list of links from a file, downloads them one by one and checks schema references for hints whether the file contains a UML model 
	 */
	public static void main(String[] args) {
		//
		try {
			xmilinks = ArrayList.class.newInstance();
			//xmlnss = ArrayList.class.newInstance();
		
			try{
				//hard-coded: path to the file containing the links to be analyzed
				String path = "D:/Projects/UMLinOpenSource/InputXMIAnalyzer/urls-10360000-11000000.txt"; 
				//collect those links that are .xmi files
				BufferedReader in = new BufferedReader(new FileReader(path));
		        String zeile = null;
		        while ((zeile = in.readLine()) != null) {
		           if(zeile.endsWith(".xmi")){
		        		xmilinks.add(zeile);
		        		log(zeile,  "xmilinks");
		        	}	         
		           if(zeile.endsWith(".uml")){
		        		log(zeile,  "umllinks");
		        	}
		        }

		        in.close();
		        
		        //iterate the xmi links to check whether they are UML
		        Iterator<String> linkit = xmilinks.iterator();
		        while(linkit.hasNext()){
		        	String link = linkit.next();
		        		//download(link);
		        		//System.out.println("downloaded: " + link);
		        		analyzeTemp(link);    	        	
		        }
		        
		        //Iterator<String> linkitx = xmlnss.iterator();
		        //while(linkitx.hasNext()){
		        //	log(linkitx.next(), "xmlnss");
		        //}
		        
			}catch(Exception e){System.out.println("Excpetion: " + e);}		
			
		}catch(InstantiationException e1) {
			e1.printStackTrace();
		}catch(IllegalAccessException e1) {
			e1.printStackTrace();
		}
	}

	/*
	 * Helper method to write the log files including the resulting link lists
	 */
	public static void log(String message, String filename) { 
		try{
	      PrintWriter out = new PrintWriter(new FileWriter("D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/" + filename +".txt", true), true);
	      out.println(message);
	      out.close();
	    }catch(Exception e){}
	}	
	
	/*
	 * Method gets a link name, & accesses request the download and scans the file for XMI, UML, or MOF schema references
	 * NOTE: it is assumed that the files are XML - thought the method should also work for other XML or textual files
	 */
	public static void analyzeTemp(String linkname){
		try{
			//calls download from the link to a temporary file 
			if(download(linkname)){
			//access downloaded link in temporary file
			BufferedReader in = new BufferedReader(new FileReader("D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/temp.txt"));
	        String zeile = null;
	        Boolean done = false;
	        Boolean isXMI = false;
	        Boolean isUML = false;
	        Boolean isMOF = false;
	        
	        //parse the different lines of the file & check for XMI, UML, and MOF schema references
	        while ((zeile = in.readLine()) != null) {
	           //if a schema ref is found, a log is written in the files xmi, xmiIndirect, umls, umlsIndirect, mof, mofIndirect
	        	//direct: the global schema is set to the xmi, uml, or mof reference
	        	if(zeile.contains("xmlns=\"omg.org/XMI")) {isXMI=true;done=true;}//log(linkname + " : " + "xmlns=\"omg.org/UML",  "xmi"); System.out.println("logged xmi");done=true;}
	        	if(zeile.contains("xmlns=\"http://www.omg.org/XMI")) {isXMI=true;done=true;}//log(linkname + " : " + "xmlns=\"http://www.omg.org/XMI", "xmi"); System.out.println("logged xmi"); done=true;}
	        	if(zeile.contains("xmlns=\"http://www.omg.org/spec/XMI")) {isXMI=true;done=true;}//log(linkname + " : " + "xmlns=\"http://www.omg.org/spec/XMI",  "xmi"); System.out.println("logged xmi");done=true;}
	        	
	          	if(zeile.contains("xmlns=\"omg.org/UML")) {isUML=true;done=true;}//log(linkname + " : " + "xmlns=\"omg.org/UML",  "umls"); System.out.println("logged uml");done=true;}
	        	if(zeile.contains("xmlns=\"org.omg/UML")) {isUML=true;done=true;}//log(linkname + " : " + "xmlns=\"org.omg/UML",  "umls"); System.out.println("logged uml");done=true;}
	        	if(zeile.contains("xmlns=\"org.omg.xmi.namespace.UML")) {isUML=true;done=true;}//log(linkname + " : " + "xmlns=\"org.omg.xmi.namespace.UML",  "umls"); System.out.println("logged uml");done=true;}
	        	if(zeile.contains("xmlns=\"http://www.omg.org/spec/UML")) {isUML=true;done=true;}//log(linkname + " : " + "xmlns=\"http://www.omg.org/spec/UML",  "umls"); System.out.println("logged uml");done=true;}
	        	if(zeile.contains("xmlns=\"http://schema.omg.org/spec/UML")) {isUML=true;done=true;}//log(linkname + " : " + "xmlns=\"http://schema.omg.org/spec/UML",  "umls"); System.out.println("logged uml");done=true;}
	  	       	
	        	if(zeile.contains("xmlns=\"http://www.omg.org/spec/MOF")) {isMOF=true;done=true;}//log(linkname + " : " + "xmlns=\"http://www.omg.org/spec/MOF",  "mof"); System.out.println("logged mof");done=true;}
		        
	        	//indirect: some namespace is set to the xmi, uml, or mof reference
	        	if(zeile.contains("omg.org/XMI")) {isXMI=true;done=true;}//log(linkname + " : " + "omg.org/XMI",  "xmiIndirect"); System.out.println("logged xmi indirect");done=true;}
	        	if(zeile.contains("omg.org/spec/XMI/")) {isXMI=true;done=true;}//log(linkname + " : " + "omg.org/spec/XMI/",  "xmiIndirect"); System.out.println("logged xmi indirect");done=true;}
	        	if(zeile.contains("org.omg.xmi.namespace.Model")) {isXMI=true;done=true;}//log(linkname + " : " + "org.omg.xmi.namespace.Model",  "xmiIndirect"); System.out.println("logged mof indirect");done=true;}
	        	if(zeile.contains("http://www.omg.org/XMI")) {isXMI=true;done=true;}//log(linkname + " : " + "http://www.omg.org/XMI", "xmiIndirect"); System.out.println("logged xmi indirect");done=true;}
	        	if(zeile.contains("<XMI.exporter>Netbeans XMI Writer</XMI.exporter>")) {isXMI=true;done=true;}//log(linkname + " : " + "<XMI.exporter>Netbeans XMI Writer</XMI.exporter>",  "xmiIndirect"); System.out.println("logged mof indirect");done=true;}
	        	
	        	if(zeile.contains("org.omg/UML")) {isUML=true;done=true;}//log(linkname + " : " + "org.omg/UML",  "umlsIndirect"); System.out.println("logged uml indirect");done=true;}
	        	if(zeile.contains("org.omg/standards/UML")) {isUML=true;done=true;}//log(linkname + " : " + "org.omg/standards/UML",  "umlsIndirect"); System.out.println("logged mof indirect");done=true;}
	        	if(zeile.contains("org.omg.xmi.namespace.UML")) {isUML=true;done=true;}//log(linkname + " : " + "org.omg.xmi.namespace.UML",  "umlsIndirect"); System.out.println("logged uml indirect");done=true;}
	        	if(zeile.contains("omg.org/UML")) {isUML=true;done=true;}//log(linkname + " : " + "omg.org/UML",  "umlsIndirect"); System.out.println("logged uml indirect");done=true;}
	        	if(zeile.contains("omg.org/spec/UML")) {isUML=true;done=true;}//log(linkname + " : " + "omg.org/spec/UML",  "umlsIndirect"); System.out.println("logged uml indirect");done=true;}
	        	if(zeile.contains("http://schema.omg.org/spec/UML")) {isUML=true;done=true;}//log(linkname + " : " + "http://schema.omg.org/spec/UML",  "umlsIndirect"); System.out.println("logged uml indirect");done=true;}
	        	if(zeile.contains("<XMI.exporter>Novosoft UML Library</XMI.exporter>")) {isUML=true;done=true;}//log(linkname + " : " + "<XMI.exporter>Novosoft UML Library</XMI.exporter>",  "umlIndirect"); System.out.println("logged mof indirect");done=true;}
	        	
	        	if(zeile.contains("omg.org/spec/MOF")) {isMOF=true;done=true;}//log(linkname + " : " + "omg.org/spec/MOF",  "mofIndirect"); System.out.println("logged mof indirect");done=true;}
	        	if(zeile.contains("omg.org/mof.Model")) {isMOF=true;done=true;}//{log(linkname + " : " + "omg.org/mof.Model",  "mofIndirect"); System.out.println("logged mof indirect");done=true;}
	        	        	
	        }
	        
	        if(isMOF){
	        	log(linkname ,  "mof"); System.out.println("logged mof: " + linkname);
	        } else if(isUML){
	        	log(linkname ,  "uml"); System.out.println("logged uml: " + linkname);
	        } else if(isXMI){
	        	log(linkname ,  "xmi"); System.out.println("logged xmi: " + linkname);
	        }	
	        //if no UML schema found, safe link in file "NoOMGXMIorUML"
	        else{
        		log(linkname,  "NoOMGXMIorUML");
        	}

	       in.close(); 
			}
		}catch(Exception e){System.out.println("Excpetion: " + e);}
		
	}
	
	/*
	 * Method downloads the file from a given link and writes it to the same static 
	 * The method partially bases on a code example from stackoverflow (http://stackoverflow.com/questions/23248792/how-to-download-a-file-from-the-internet-using-java)
	 */
	public static boolean download(String linkname) throws IOException {
		 
		try{
			String fileName = "D:/Projects/UMLinOpenSource/OutputXMIAnalyzerNew/temp.txt"; //the place where the file is stored
			URL link = new URL(linkname); 
			InputStream in = new BufferedInputStream(link.openStream());
		 	
			try{
		 		ByteArrayOutputStream out = new ByteArrayOutputStream();
		 		byte[] buf = new byte[1024];
		 		int n = 0;
		 		int count = 0;
		 		//max read 100 lines (XMI files include their schemas within the first lines)
		 		while (-1!=(n=in.read(buf)) && count<100)
		 		{
		 			count = count+1; 
		 			out.write(buf, 0, n);
		 			out.flush();
		 		}
		 		out.close();
			 
		 		byte[] response = out.toByteArray();
		 		
		 		FileOutputStream fos = new FileOutputStream(fileName);
		 		fos.write(response);
		 		fos.close();
	        
		 	}catch(Exception e){System.out.println("Excpetion: " + e); log(linkname, "TooBigToRead"); return false;}		 
		 	
			in.close();
			return true;
		 
		}catch(IOException e){System.out.println("Excpetion: " + e); log(linkname, "NotDownloaded"); return false;}
	}
	
}
