|
684 | 684 | "## Cleaning and Tidying DateTime Data"
|
685 | 685 | ]
|
686 | 686 | },
|
| 687 | + { |
| 688 | + "cell_type": "code", |
| 689 | + "execution_count": 9, |
| 690 | + "metadata": {}, |
| 691 | + "outputs": [ |
| 692 | + { |
| 693 | + "data": { |
| 694 | + "text/html": [ |
| 695 | + "<div>\n", |
| 696 | + "<style scoped>\n", |
| 697 | + " .dataframe tbody tr th:only-of-type {\n", |
| 698 | + " vertical-align: middle;\n", |
| 699 | + " }\n", |
| 700 | + "\n", |
| 701 | + " .dataframe tbody tr th {\n", |
| 702 | + " vertical-align: top;\n", |
| 703 | + " }\n", |
| 704 | + "\n", |
| 705 | + " .dataframe thead th {\n", |
| 706 | + " text-align: right;\n", |
| 707 | + " }\n", |
| 708 | + "</style>\n", |
| 709 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 710 | + " <thead>\n", |
| 711 | + " <tr style=\"text-align: right;\">\n", |
| 712 | + " <th></th>\n", |
| 713 | + " <th>Wban</th>\n", |
| 714 | + " <th>date</th>\n", |
| 715 | + " <th>Time</th>\n", |
| 716 | + " <th>StationType</th>\n", |
| 717 | + " <th>sky_condition</th>\n", |
| 718 | + " <th>visibility</th>\n", |
| 719 | + " <th>dry_bulb_faren</th>\n", |
| 720 | + " <th>dry_bulb_cel</th>\n", |
| 721 | + " <th>wet_bulb_faren</th>\n", |
| 722 | + " <th>wet_bulb_cel</th>\n", |
| 723 | + " <th>dew_point_faren</th>\n", |
| 724 | + " <th>dew_point_cel</th>\n", |
| 725 | + " <th>relative_humidity</th>\n", |
| 726 | + " <th>wind_speed</th>\n", |
| 727 | + " <th>wind_direction</th>\n", |
| 728 | + " <th>station_pressure</th>\n", |
| 729 | + " <th>sea_level_pressure</th>\n", |
| 730 | + " </tr>\n", |
| 731 | + " </thead>\n", |
| 732 | + " <tbody>\n", |
| 733 | + " <tr>\n", |
| 734 | + " <th>0</th>\n", |
| 735 | + " <td>13904</td>\n", |
| 736 | + " <td>20110101</td>\n", |
| 737 | + " <td>0053</td>\n", |
| 738 | + " <td>12</td>\n", |
| 739 | + " <td>OVC045</td>\n", |
| 740 | + " <td>10.00</td>\n", |
| 741 | + " <td>51</td>\n", |
| 742 | + " <td>10.6</td>\n", |
| 743 | + " <td>38</td>\n", |
| 744 | + " <td>3.1</td>\n", |
| 745 | + " <td>15</td>\n", |
| 746 | + " <td>-9.4</td>\n", |
| 747 | + " <td>24</td>\n", |
| 748 | + " <td>15</td>\n", |
| 749 | + " <td>360</td>\n", |
| 750 | + " <td>29.42</td>\n", |
| 751 | + " <td>29.95</td>\n", |
| 752 | + " </tr>\n", |
| 753 | + " <tr>\n", |
| 754 | + " <th>1</th>\n", |
| 755 | + " <td>13904</td>\n", |
| 756 | + " <td>20110101</td>\n", |
| 757 | + " <td>0153</td>\n", |
| 758 | + " <td>12</td>\n", |
| 759 | + " <td>OVC049</td>\n", |
| 760 | + " <td>10.00</td>\n", |
| 761 | + " <td>51</td>\n", |
| 762 | + " <td>10.6</td>\n", |
| 763 | + " <td>37</td>\n", |
| 764 | + " <td>3.0</td>\n", |
| 765 | + " <td>14</td>\n", |
| 766 | + " <td>-10.0</td>\n", |
| 767 | + " <td>23</td>\n", |
| 768 | + " <td>10</td>\n", |
| 769 | + " <td>340</td>\n", |
| 770 | + " <td>29.49</td>\n", |
| 771 | + " <td>30.01</td>\n", |
| 772 | + " </tr>\n", |
| 773 | + " <tr>\n", |
| 774 | + " <th>2</th>\n", |
| 775 | + " <td>13904</td>\n", |
| 776 | + " <td>20110101</td>\n", |
| 777 | + " <td>0253</td>\n", |
| 778 | + " <td>12</td>\n", |
| 779 | + " <td>OVC060</td>\n", |
| 780 | + " <td>10.00</td>\n", |
| 781 | + " <td>51</td>\n", |
| 782 | + " <td>10.6</td>\n", |
| 783 | + " <td>37</td>\n", |
| 784 | + " <td>2.9</td>\n", |
| 785 | + " <td>13</td>\n", |
| 786 | + " <td>-10.6</td>\n", |
| 787 | + " <td>22</td>\n", |
| 788 | + " <td>15</td>\n", |
| 789 | + " <td>010</td>\n", |
| 790 | + " <td>29.49</td>\n", |
| 791 | + " <td>30.01</td>\n", |
| 792 | + " </tr>\n", |
| 793 | + " <tr>\n", |
| 794 | + " <th>3</th>\n", |
| 795 | + " <td>13904</td>\n", |
| 796 | + " <td>20110101</td>\n", |
| 797 | + " <td>0353</td>\n", |
| 798 | + " <td>12</td>\n", |
| 799 | + " <td>OVC065</td>\n", |
| 800 | + " <td>10.00</td>\n", |
| 801 | + " <td>50</td>\n", |
| 802 | + " <td>10.0</td>\n", |
| 803 | + " <td>38</td>\n", |
| 804 | + " <td>3.1</td>\n", |
| 805 | + " <td>17</td>\n", |
| 806 | + " <td>-8.3</td>\n", |
| 807 | + " <td>27</td>\n", |
| 808 | + " <td>7</td>\n", |
| 809 | + " <td>350</td>\n", |
| 810 | + " <td>29.51</td>\n", |
| 811 | + " <td>30.03</td>\n", |
| 812 | + " </tr>\n", |
| 813 | + " <tr>\n", |
| 814 | + " <th>4</th>\n", |
| 815 | + " <td>13904</td>\n", |
| 816 | + " <td>20110101</td>\n", |
| 817 | + " <td>0453</td>\n", |
| 818 | + " <td>12</td>\n", |
| 819 | + " <td>BKN070</td>\n", |
| 820 | + " <td>10.00</td>\n", |
| 821 | + " <td>50</td>\n", |
| 822 | + " <td>10.0</td>\n", |
| 823 | + " <td>37</td>\n", |
| 824 | + " <td>2.8</td>\n", |
| 825 | + " <td>15</td>\n", |
| 826 | + " <td>-9.4</td>\n", |
| 827 | + " <td>25</td>\n", |
| 828 | + " <td>11</td>\n", |
| 829 | + " <td>020</td>\n", |
| 830 | + " <td>29.51</td>\n", |
| 831 | + " <td>30.04</td>\n", |
| 832 | + " </tr>\n", |
| 833 | + " </tbody>\n", |
| 834 | + "</table>\n", |
| 835 | + "</div>" |
| 836 | + ], |
| 837 | + "text/plain": [ |
| 838 | + " Wban date Time StationType sky_condition visibility dry_bulb_faren \\\n", |
| 839 | + "0 13904 20110101 0053 12 OVC045 10.00 51 \n", |
| 840 | + "1 13904 20110101 0153 12 OVC049 10.00 51 \n", |
| 841 | + "2 13904 20110101 0253 12 OVC060 10.00 51 \n", |
| 842 | + "3 13904 20110101 0353 12 OVC065 10.00 50 \n", |
| 843 | + "4 13904 20110101 0453 12 BKN070 10.00 50 \n", |
| 844 | + "\n", |
| 845 | + " dry_bulb_cel wet_bulb_faren wet_bulb_cel dew_point_faren dew_point_cel \\\n", |
| 846 | + "0 10.6 38 3.1 15 -9.4 \n", |
| 847 | + "1 10.6 37 3.0 14 -10.0 \n", |
| 848 | + "2 10.6 37 2.9 13 -10.6 \n", |
| 849 | + "3 10.0 38 3.1 17 -8.3 \n", |
| 850 | + "4 10.0 37 2.8 15 -9.4 \n", |
| 851 | + "\n", |
| 852 | + " relative_humidity wind_speed wind_direction station_pressure \\\n", |
| 853 | + "0 24 15 360 29.42 \n", |
| 854 | + "1 23 10 340 29.49 \n", |
| 855 | + "2 22 15 010 29.49 \n", |
| 856 | + "3 27 7 350 29.51 \n", |
| 857 | + "4 25 11 020 29.51 \n", |
| 858 | + "\n", |
| 859 | + " sea_level_pressure \n", |
| 860 | + "0 29.95 \n", |
| 861 | + "1 30.01 \n", |
| 862 | + "2 30.01 \n", |
| 863 | + "3 30.03 \n", |
| 864 | + "4 30.04 " |
| 865 | + ] |
| 866 | + }, |
| 867 | + "execution_count": 9, |
| 868 | + "metadata": {}, |
| 869 | + "output_type": "execute_result" |
| 870 | + } |
| 871 | + ], |
| 872 | + "source": [ |
| 873 | + "# Convert the date column to string\n", |
| 874 | + "df_dropped['date'] = df_dropped['date'].astype(str)\n", |
| 875 | + "\n", |
| 876 | + "# Add leading zeroes zeros to the 'Time' column\n", |
| 877 | + "df_dropped['Time'] = df_dropped['Time'].apply(lambda x: '{:0>4}'.format(x))\n", |
| 878 | + "\n", |
| 879 | + "# Concatenate the new date and Time columns\n", |
| 880 | + "date_string = df_dropped['date'] + df_dropped['Time']\n", |
| 881 | + "\n", |
| 882 | + "# Concatenate the new date and Time columns\n", |
| 883 | + "date_times = pd.to_datetime(date_string, format='%Y%m%d%H%M')\n", |
| 884 | + "\n", |
| 885 | + "# Set the index to be the new date_times\n", |
| 886 | + "df_clean = df_dropped.set_index(date_times)\n", |
| 887 | + "\n", |
| 888 | + "df_dropped.head()" |
| 889 | + ] |
| 890 | + }, |
687 | 891 | {
|
688 | 892 | "cell_type": "code",
|
689 | 893 | "execution_count": null,
|
|
0 commit comments